1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             assert(VD == VD->getCanonicalDecl() &&
69                         "Canonical decl must be captured.");
70             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
71                             isCapturedVar(CGF, VD) ||
72                                 (CGF.CapturedStmtInfo &&
73                                  InlinedShareds.isGlobalVarCaptured(VD)),
74                             VD->getType().getNonReferenceType(), VK_LValue,
75                             SourceLocation());
76             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
77               return CGF.EmitLValue(&DRE).getAddress();
78             });
79           }
80         }
81         (void)InlinedShareds.Privatize();
82       }
83     }
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S,
100                         /*AsInlined=*/false,
101                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S,
116                         /*AsInlined=*/false,
117                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 };
119 
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
123   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
125       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
126         for (const auto *I : PreInits->decls())
127           CGF.EmitVarDecl(cast<VarDecl>(*I));
128       }
129     }
130   }
131 
132 public:
133   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
134       : CodeGenFunction::RunCleanupsScope(CGF) {
135     emitPreInitStmt(CGF, S);
136   }
137 };
138 
139 } // namespace
140 
141 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
142                                          const OMPExecutableDirective &S,
143                                          const RegionCodeGenTy &CodeGen);
144 
145 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
146   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
147     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
148       OrigVD = OrigVD->getCanonicalDecl();
149       bool IsCaptured =
150           LambdaCaptureFields.lookup(OrigVD) ||
151           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
152           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
153       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
154                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
155       return EmitLValue(&DRE);
156     }
157   }
158   return EmitLValue(E);
159 }
160 
161 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
162   auto &C = getContext();
163   llvm::Value *Size = nullptr;
164   auto SizeInChars = C.getTypeSizeInChars(Ty);
165   if (SizeInChars.isZero()) {
166     // getTypeSizeInChars() returns 0 for a VLA.
167     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
168       llvm::Value *ArraySize;
169       std::tie(ArraySize, Ty) = getVLASize(VAT);
170       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
171     }
172     SizeInChars = C.getTypeSizeInChars(Ty);
173     if (SizeInChars.isZero())
174       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
175     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
176   } else
177     Size = CGM.getSize(SizeInChars);
178   return Size;
179 }
180 
181 void CodeGenFunction::GenerateOpenMPCapturedVars(
182     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
183   const RecordDecl *RD = S.getCapturedRecordDecl();
184   auto CurField = RD->field_begin();
185   auto CurCap = S.captures().begin();
186   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
187                                                  E = S.capture_init_end();
188        I != E; ++I, ++CurField, ++CurCap) {
189     if (CurField->hasCapturedVLAType()) {
190       auto VAT = CurField->getCapturedVLAType();
191       auto *Val = VLASizeMap[VAT->getSizeExpr()];
192       CapturedVars.push_back(Val);
193     } else if (CurCap->capturesThis())
194       CapturedVars.push_back(CXXThisValue);
195     else if (CurCap->capturesVariableByCopy()) {
196       llvm::Value *CV =
197           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
198 
199       // If the field is not a pointer, we need to save the actual value
200       // and load it as a void pointer.
201       if (!CurField->getType()->isAnyPointerType()) {
202         auto &Ctx = getContext();
203         auto DstAddr = CreateMemTemp(
204             Ctx.getUIntPtrType(),
205             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
206         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
207 
208         auto *SrcAddrVal = EmitScalarConversion(
209             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
210             Ctx.getPointerType(CurField->getType()), SourceLocation());
211         LValue SrcLV =
212             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
213 
214         // Store the value using the source type pointer.
215         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
216 
217         // Load the value using the destination type pointer.
218         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
219       }
220       CapturedVars.push_back(CV);
221     } else {
222       assert(CurCap->capturesVariable() && "Expected capture by reference.");
223       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
224     }
225   }
226 }
227 
228 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
229                                     StringRef Name, LValue AddrLV,
230                                     bool isReferenceType = false) {
231   ASTContext &Ctx = CGF.getContext();
232 
233   auto *CastedPtr = CGF.EmitScalarConversion(
234       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
235       Ctx.getPointerType(DstType), SourceLocation());
236   auto TmpAddr =
237       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
238           .getAddress();
239 
240   // If we are dealing with references we need to return the address of the
241   // reference instead of the reference of the value.
242   if (isReferenceType) {
243     QualType RefType = Ctx.getLValueReferenceType(DstType);
244     auto *RefVal = TmpAddr.getPointer();
245     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
246     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
247     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
248   }
249 
250   return TmpAddr;
251 }
252 
253 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
254   if (T->isLValueReferenceType()) {
255     return C.getLValueReferenceType(
256         getCanonicalParamType(C, T.getNonReferenceType()),
257         /*SpelledAsLValue=*/false);
258   }
259   if (T->isPointerType())
260     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
261   if (auto *A = T->getAsArrayTypeUnsafe()) {
262     if (auto *VLA = dyn_cast<VariableArrayType>(A))
263       return getCanonicalParamType(C, VLA->getElementType());
264     else if (!A->isVariablyModifiedType())
265       return C.getCanonicalType(T);
266   }
267   return C.getCanonicalParamType(T);
268 }
269 
270 namespace {
271   /// Contains required data for proper outlined function codegen.
272   struct FunctionOptions {
273     /// Captured statement for which the function is generated.
274     const CapturedStmt *S = nullptr;
275     /// true if cast to/from  UIntPtr is required for variables captured by
276     /// value.
277     const bool UIntPtrCastRequired = true;
278     /// true if only casted arguments must be registered as local args or VLA
279     /// sizes.
280     const bool RegisterCastedArgsOnly = false;
281     /// Name of the generated function.
282     const StringRef FunctionName;
283     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
284                              bool RegisterCastedArgsOnly,
285                              StringRef FunctionName)
286         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
287           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
288           FunctionName(FunctionName) {}
289   };
290 }
291 
292 static llvm::Function *emitOutlinedFunctionPrologue(
293     CodeGenFunction &CGF, FunctionArgList &Args,
294     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
295         &LocalAddrs,
296     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
297         &VLASizes,
298     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
299   const CapturedDecl *CD = FO.S->getCapturedDecl();
300   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
301   assert(CD->hasBody() && "missing CapturedDecl body");
302 
303   CXXThisValue = nullptr;
304   // Build the argument list.
305   CodeGenModule &CGM = CGF.CGM;
306   ASTContext &Ctx = CGM.getContext();
307   FunctionArgList TargetArgs;
308   Args.append(CD->param_begin(),
309               std::next(CD->param_begin(), CD->getContextParamPosition()));
310   TargetArgs.append(
311       CD->param_begin(),
312       std::next(CD->param_begin(), CD->getContextParamPosition()));
313   auto I = FO.S->captures().begin();
314   FunctionDecl *DebugFunctionDecl = nullptr;
315   if (!FO.UIntPtrCastRequired) {
316     FunctionProtoType::ExtProtoInfo EPI;
317     DebugFunctionDecl = FunctionDecl::Create(
318         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
319         SourceLocation(), DeclarationName(), Ctx.VoidTy,
320         Ctx.getTrivialTypeSourceInfo(
321             Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
322         SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
323   }
324   for (auto *FD : RD->fields()) {
325     QualType ArgType = FD->getType();
326     IdentifierInfo *II = nullptr;
327     VarDecl *CapVar = nullptr;
328 
329     // If this is a capture by copy and the type is not a pointer, the outlined
330     // function argument type should be uintptr and the value properly casted to
331     // uintptr. This is necessary given that the runtime library is only able to
332     // deal with pointers. We can pass in the same way the VLA type sizes to the
333     // outlined function.
334     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
335         I->capturesVariableArrayType()) {
336       if (FO.UIntPtrCastRequired)
337         ArgType = Ctx.getUIntPtrType();
338     }
339 
340     if (I->capturesVariable() || I->capturesVariableByCopy()) {
341       CapVar = I->getCapturedVar();
342       II = CapVar->getIdentifier();
343     } else if (I->capturesThis())
344       II = &Ctx.Idents.get("this");
345     else {
346       assert(I->capturesVariableArrayType());
347       II = &Ctx.Idents.get("vla");
348     }
349     if (ArgType->isVariablyModifiedType())
350       ArgType = getCanonicalParamType(Ctx, ArgType);
351     VarDecl *Arg;
352     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
353       Arg = ParmVarDecl::Create(
354           Ctx, DebugFunctionDecl,
355           CapVar ? CapVar->getLocStart() : FD->getLocStart(),
356           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
357           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
358     } else {
359       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
360                                       II, ArgType, ImplicitParamDecl::Other);
361     }
362     Args.emplace_back(Arg);
363     // Do not cast arguments if we emit function with non-original types.
364     TargetArgs.emplace_back(
365         FO.UIntPtrCastRequired
366             ? Arg
367             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
368     ++I;
369   }
370   Args.append(
371       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
372       CD->param_end());
373   TargetArgs.append(
374       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
375       CD->param_end());
376 
377   // Create the function declaration.
378   const CGFunctionInfo &FuncInfo =
379       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
380   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
381 
382   llvm::Function *F =
383       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
384                              FO.FunctionName, &CGM.getModule());
385   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
386   if (CD->isNothrow())
387     F->setDoesNotThrow();
388 
389   // Generate the function.
390   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
391                     FO.S->getLocStart(), CD->getBody()->getLocStart());
392   unsigned Cnt = CD->getContextParamPosition();
393   I = FO.S->captures().begin();
394   for (auto *FD : RD->fields()) {
395     // Do not map arguments if we emit function with non-original types.
396     Address LocalAddr(Address::invalid());
397     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
398       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
399                                                              TargetArgs[Cnt]);
400     } else {
401       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
402     }
403     // If we are capturing a pointer by copy we don't need to do anything, just
404     // use the value that we get from the arguments.
405     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
406       const VarDecl *CurVD = I->getCapturedVar();
407       // If the variable is a reference we need to materialize it here.
408       if (CurVD->getType()->isReferenceType()) {
409         Address RefAddr = CGF.CreateMemTemp(
410             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
411         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
412                               /*Volatile=*/false, CurVD->getType());
413         LocalAddr = RefAddr;
414       }
415       if (!FO.RegisterCastedArgsOnly)
416         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
417       ++Cnt;
418       ++I;
419       continue;
420     }
421 
422     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
423                                         AlignmentSource::Decl);
424     if (FD->hasCapturedVLAType()) {
425       if (FO.UIntPtrCastRequired) {
426         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
427                                                           Args[Cnt]->getName(),
428                                                           ArgLVal),
429                                      FD->getType(), AlignmentSource::Decl);
430       }
431       auto *ExprArg =
432           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
433       auto VAT = FD->getCapturedVLAType();
434       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
435     } else if (I->capturesVariable()) {
436       auto *Var = I->getCapturedVar();
437       QualType VarTy = Var->getType();
438       Address ArgAddr = ArgLVal.getAddress();
439       if (!VarTy->isReferenceType()) {
440         if (ArgLVal.getType()->isLValueReferenceType()) {
441           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
442         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
443           assert(ArgLVal.getType()->isPointerType());
444           ArgAddr = CGF.EmitLoadOfPointer(
445               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
446         }
447       }
448       if (!FO.RegisterCastedArgsOnly) {
449         LocalAddrs.insert(
450             {Args[Cnt],
451              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
452       }
453     } else if (I->capturesVariableByCopy()) {
454       assert(!FD->getType()->isAnyPointerType() &&
455              "Not expecting a captured pointer.");
456       auto *Var = I->getCapturedVar();
457       QualType VarTy = Var->getType();
458       LocalAddrs.insert(
459           {Args[Cnt],
460            {Var,
461             FO.UIntPtrCastRequired
462                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
463                                        ArgLVal, VarTy->isReferenceType())
464                 : ArgLVal.getAddress()}});
465     } else {
466       // If 'this' is captured, load it into CXXThisValue.
467       assert(I->capturesThis());
468       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
469                          .getScalarVal();
470       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
471     }
472     ++Cnt;
473     ++I;
474   }
475 
476   return F;
477 }
478 
479 llvm::Function *
480 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
481   assert(
482       CapturedStmtInfo &&
483       "CapturedStmtInfo should be set when generating the captured function");
484   const CapturedDecl *CD = S.getCapturedDecl();
485   // Build the argument list.
486   bool NeedWrapperFunction =
487       getDebugInfo() &&
488       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
489   FunctionArgList Args;
490   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
491   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
492   SmallString<256> Buffer;
493   llvm::raw_svector_ostream Out(Buffer);
494   Out << CapturedStmtInfo->getHelperName();
495   if (NeedWrapperFunction)
496     Out << "_debug__";
497   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
498                      Out.str());
499   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
500                                                    VLASizes, CXXThisValue, FO);
501   for (const auto &LocalAddrPair : LocalAddrs) {
502     if (LocalAddrPair.second.first) {
503       setAddrOfLocalVar(LocalAddrPair.second.first,
504                         LocalAddrPair.second.second);
505     }
506   }
507   for (const auto &VLASizePair : VLASizes)
508     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
509   PGO.assignRegionCounters(GlobalDecl(CD), F);
510   CapturedStmtInfo->EmitBody(*this, CD->getBody());
511   FinishFunction(CD->getBodyRBrace());
512   if (!NeedWrapperFunction)
513     return F;
514 
515   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
516                             /*RegisterCastedArgsOnly=*/true,
517                             CapturedStmtInfo->getHelperName());
518   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
519   Args.clear();
520   LocalAddrs.clear();
521   VLASizes.clear();
522   llvm::Function *WrapperF =
523       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
524                                    WrapperCGF.CXXThisValue, WrapperFO);
525   llvm::SmallVector<llvm::Value *, 4> CallArgs;
526   for (const auto *Arg : Args) {
527     llvm::Value *CallArg;
528     auto I = LocalAddrs.find(Arg);
529     if (I != LocalAddrs.end()) {
530       LValue LV = WrapperCGF.MakeAddrLValue(
531           I->second.second,
532           I->second.first ? I->second.first->getType() : Arg->getType(),
533           AlignmentSource::Decl);
534       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
535     } else {
536       auto EI = VLASizes.find(Arg);
537       if (EI != VLASizes.end())
538         CallArg = EI->second.second;
539       else {
540         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
541                                               Arg->getType(),
542                                               AlignmentSource::Decl);
543         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
544       }
545     }
546     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
547   }
548   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
549                                                   F, CallArgs);
550   WrapperCGF.FinishFunction();
551   return WrapperF;
552 }
553 
554 //===----------------------------------------------------------------------===//
555 //                              OpenMP Directive Emission
556 //===----------------------------------------------------------------------===//
557 void CodeGenFunction::EmitOMPAggregateAssign(
558     Address DestAddr, Address SrcAddr, QualType OriginalType,
559     const llvm::function_ref<void(Address, Address)> &CopyGen) {
560   // Perform element-by-element initialization.
561   QualType ElementTy;
562 
563   // Drill down to the base element type on both arrays.
564   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
565   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
566   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
567 
568   auto SrcBegin = SrcAddr.getPointer();
569   auto DestBegin = DestAddr.getPointer();
570   // Cast from pointer to array type to pointer to single element.
571   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
572   // The basic structure here is a while-do loop.
573   auto BodyBB = createBasicBlock("omp.arraycpy.body");
574   auto DoneBB = createBasicBlock("omp.arraycpy.done");
575   auto IsEmpty =
576       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
577   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
578 
579   // Enter the loop body, making that address the current address.
580   auto EntryBB = Builder.GetInsertBlock();
581   EmitBlock(BodyBB);
582 
583   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
584 
585   llvm::PHINode *SrcElementPHI =
586     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
587   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
588   Address SrcElementCurrent =
589       Address(SrcElementPHI,
590               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
591 
592   llvm::PHINode *DestElementPHI =
593     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
594   DestElementPHI->addIncoming(DestBegin, EntryBB);
595   Address DestElementCurrent =
596     Address(DestElementPHI,
597             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
598 
599   // Emit copy.
600   CopyGen(DestElementCurrent, SrcElementCurrent);
601 
602   // Shift the address forward by one element.
603   auto DestElementNext = Builder.CreateConstGEP1_32(
604       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
605   auto SrcElementNext = Builder.CreateConstGEP1_32(
606       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
607   // Check whether we've reached the end.
608   auto Done =
609       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
610   Builder.CreateCondBr(Done, DoneBB, BodyBB);
611   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
612   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
613 
614   // Done.
615   EmitBlock(DoneBB, /*IsFinished=*/true);
616 }
617 
618 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
619                                   Address SrcAddr, const VarDecl *DestVD,
620                                   const VarDecl *SrcVD, const Expr *Copy) {
621   if (OriginalType->isArrayType()) {
622     auto *BO = dyn_cast<BinaryOperator>(Copy);
623     if (BO && BO->getOpcode() == BO_Assign) {
624       // Perform simple memcpy for simple copying.
625       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
626     } else {
627       // For arrays with complex element types perform element by element
628       // copying.
629       EmitOMPAggregateAssign(
630           DestAddr, SrcAddr, OriginalType,
631           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
632             // Working with the single array element, so have to remap
633             // destination and source variables to corresponding array
634             // elements.
635             CodeGenFunction::OMPPrivateScope Remap(*this);
636             Remap.addPrivate(DestVD, [DestElement]() -> Address {
637               return DestElement;
638             });
639             Remap.addPrivate(
640                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
641             (void)Remap.Privatize();
642             EmitIgnoredExpr(Copy);
643           });
644     }
645   } else {
646     // Remap pseudo source variable to private copy.
647     CodeGenFunction::OMPPrivateScope Remap(*this);
648     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
649     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
650     (void)Remap.Privatize();
651     // Emit copying of the whole variable.
652     EmitIgnoredExpr(Copy);
653   }
654 }
655 
656 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
657                                                 OMPPrivateScope &PrivateScope) {
658   if (!HaveInsertPoint())
659     return false;
660   bool FirstprivateIsLastprivate = false;
661   llvm::DenseSet<const VarDecl *> Lastprivates;
662   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
663     for (const auto *D : C->varlists())
664       Lastprivates.insert(
665           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
666   }
667   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
668   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
669   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
670     auto IRef = C->varlist_begin();
671     auto InitsRef = C->inits().begin();
672     for (auto IInit : C->private_copies()) {
673       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
674       bool ThisFirstprivateIsLastprivate =
675           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
676       auto *CapFD = CapturesInfo.lookup(OrigVD);
677       auto *FD = CapturedStmtInfo->lookup(OrigVD);
678       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
679           !FD->getType()->isReferenceType()) {
680         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
681         ++IRef;
682         ++InitsRef;
683         continue;
684       }
685       FirstprivateIsLastprivate =
686           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
687       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
688         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
689         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
690         bool IsRegistered;
691         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
692                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
693                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
694         Address OriginalAddr = EmitLValue(&DRE).getAddress();
695         QualType Type = VD->getType();
696         if (Type->isArrayType()) {
697           // Emit VarDecl with copy init for arrays.
698           // Get the address of the original variable captured in current
699           // captured region.
700           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
701             auto Emission = EmitAutoVarAlloca(*VD);
702             auto *Init = VD->getInit();
703             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
704               // Perform simple memcpy.
705               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
706                                   Type);
707             } else {
708               EmitOMPAggregateAssign(
709                   Emission.getAllocatedAddress(), OriginalAddr, Type,
710                   [this, VDInit, Init](Address DestElement,
711                                        Address SrcElement) {
712                     // Clean up any temporaries needed by the initialization.
713                     RunCleanupsScope InitScope(*this);
714                     // Emit initialization for single element.
715                     setAddrOfLocalVar(VDInit, SrcElement);
716                     EmitAnyExprToMem(Init, DestElement,
717                                      Init->getType().getQualifiers(),
718                                      /*IsInitializer*/ false);
719                     LocalDeclMap.erase(VDInit);
720                   });
721             }
722             EmitAutoVarCleanups(Emission);
723             return Emission.getAllocatedAddress();
724           });
725         } else {
726           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
727             // Emit private VarDecl with copy init.
728             // Remap temp VDInit variable to the address of the original
729             // variable
730             // (for proper handling of captured global variables).
731             setAddrOfLocalVar(VDInit, OriginalAddr);
732             EmitDecl(*VD);
733             LocalDeclMap.erase(VDInit);
734             return GetAddrOfLocalVar(VD);
735           });
736         }
737         assert(IsRegistered &&
738                "firstprivate var already registered as private");
739         // Silence the warning about unused variable.
740         (void)IsRegistered;
741       }
742       ++IRef;
743       ++InitsRef;
744     }
745   }
746   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
747 }
748 
749 void CodeGenFunction::EmitOMPPrivateClause(
750     const OMPExecutableDirective &D,
751     CodeGenFunction::OMPPrivateScope &PrivateScope) {
752   if (!HaveInsertPoint())
753     return;
754   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
755   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
756     auto IRef = C->varlist_begin();
757     for (auto IInit : C->private_copies()) {
758       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
759       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
760         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
761         bool IsRegistered =
762             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
763               // Emit private VarDecl with copy init.
764               EmitDecl(*VD);
765               return GetAddrOfLocalVar(VD);
766             });
767         assert(IsRegistered && "private var already registered as private");
768         // Silence the warning about unused variable.
769         (void)IsRegistered;
770       }
771       ++IRef;
772     }
773   }
774 }
775 
776 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
777   if (!HaveInsertPoint())
778     return false;
779   // threadprivate_var1 = master_threadprivate_var1;
780   // operator=(threadprivate_var2, master_threadprivate_var2);
781   // ...
782   // __kmpc_barrier(&loc, global_tid);
783   llvm::DenseSet<const VarDecl *> CopiedVars;
784   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
785   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
786     auto IRef = C->varlist_begin();
787     auto ISrcRef = C->source_exprs().begin();
788     auto IDestRef = C->destination_exprs().begin();
789     for (auto *AssignOp : C->assignment_ops()) {
790       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
791       QualType Type = VD->getType();
792       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
793         // Get the address of the master variable. If we are emitting code with
794         // TLS support, the address is passed from the master as field in the
795         // captured declaration.
796         Address MasterAddr = Address::invalid();
797         if (getLangOpts().OpenMPUseTLS &&
798             getContext().getTargetInfo().isTLSSupported()) {
799           assert(CapturedStmtInfo->lookup(VD) &&
800                  "Copyin threadprivates should have been captured!");
801           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
802                           VK_LValue, (*IRef)->getExprLoc());
803           MasterAddr = EmitLValue(&DRE).getAddress();
804           LocalDeclMap.erase(VD);
805         } else {
806           MasterAddr =
807             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
808                                         : CGM.GetAddrOfGlobal(VD),
809                     getContext().getDeclAlign(VD));
810         }
811         // Get the address of the threadprivate variable.
812         Address PrivateAddr = EmitLValue(*IRef).getAddress();
813         if (CopiedVars.size() == 1) {
814           // At first check if current thread is a master thread. If it is, no
815           // need to copy data.
816           CopyBegin = createBasicBlock("copyin.not.master");
817           CopyEnd = createBasicBlock("copyin.not.master.end");
818           Builder.CreateCondBr(
819               Builder.CreateICmpNE(
820                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
821                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
822               CopyBegin, CopyEnd);
823           EmitBlock(CopyBegin);
824         }
825         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
826         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
827         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
828       }
829       ++IRef;
830       ++ISrcRef;
831       ++IDestRef;
832     }
833   }
834   if (CopyEnd) {
835     // Exit out of copying procedure for non-master thread.
836     EmitBlock(CopyEnd, /*IsFinished=*/true);
837     return true;
838   }
839   return false;
840 }
841 
842 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
843     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
844   if (!HaveInsertPoint())
845     return false;
846   bool HasAtLeastOneLastprivate = false;
847   llvm::DenseSet<const VarDecl *> SIMDLCVs;
848   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
849     auto *LoopDirective = cast<OMPLoopDirective>(&D);
850     for (auto *C : LoopDirective->counters()) {
851       SIMDLCVs.insert(
852           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
853     }
854   }
855   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
856   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
857     HasAtLeastOneLastprivate = true;
858     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
859       break;
860     auto IRef = C->varlist_begin();
861     auto IDestRef = C->destination_exprs().begin();
862     for (auto *IInit : C->private_copies()) {
863       // Keep the address of the original variable for future update at the end
864       // of the loop.
865       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
866       // Taskloops do not require additional initialization, it is done in
867       // runtime support library.
868       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
869         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
870         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
871           DeclRefExpr DRE(
872               const_cast<VarDecl *>(OrigVD),
873               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
874                   OrigVD) != nullptr,
875               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
876           return EmitLValue(&DRE).getAddress();
877         });
878         // Check if the variable is also a firstprivate: in this case IInit is
879         // not generated. Initialization of this variable will happen in codegen
880         // for 'firstprivate' clause.
881         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
882           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
883           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
884             // Emit private VarDecl with copy init.
885             EmitDecl(*VD);
886             return GetAddrOfLocalVar(VD);
887           });
888           assert(IsRegistered &&
889                  "lastprivate var already registered as private");
890           (void)IsRegistered;
891         }
892       }
893       ++IRef;
894       ++IDestRef;
895     }
896   }
897   return HasAtLeastOneLastprivate;
898 }
899 
900 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
901     const OMPExecutableDirective &D, bool NoFinals,
902     llvm::Value *IsLastIterCond) {
903   if (!HaveInsertPoint())
904     return;
905   // Emit following code:
906   // if (<IsLastIterCond>) {
907   //   orig_var1 = private_orig_var1;
908   //   ...
909   //   orig_varn = private_orig_varn;
910   // }
911   llvm::BasicBlock *ThenBB = nullptr;
912   llvm::BasicBlock *DoneBB = nullptr;
913   if (IsLastIterCond) {
914     ThenBB = createBasicBlock(".omp.lastprivate.then");
915     DoneBB = createBasicBlock(".omp.lastprivate.done");
916     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
917     EmitBlock(ThenBB);
918   }
919   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
920   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
921   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
922     auto IC = LoopDirective->counters().begin();
923     for (auto F : LoopDirective->finals()) {
924       auto *D =
925           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
926       if (NoFinals)
927         AlreadyEmittedVars.insert(D);
928       else
929         LoopCountersAndUpdates[D] = F;
930       ++IC;
931     }
932   }
933   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
934     auto IRef = C->varlist_begin();
935     auto ISrcRef = C->source_exprs().begin();
936     auto IDestRef = C->destination_exprs().begin();
937     for (auto *AssignOp : C->assignment_ops()) {
938       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
939       QualType Type = PrivateVD->getType();
940       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
941       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
942         // If lastprivate variable is a loop control variable for loop-based
943         // directive, update its value before copyin back to original
944         // variable.
945         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
946           EmitIgnoredExpr(FinalExpr);
947         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
948         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
949         // Get the address of the original variable.
950         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
951         // Get the address of the private variable.
952         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
953         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
954           PrivateAddr =
955               Address(Builder.CreateLoad(PrivateAddr),
956                       getNaturalTypeAlignment(RefTy->getPointeeType()));
957         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
958       }
959       ++IRef;
960       ++ISrcRef;
961       ++IDestRef;
962     }
963     if (auto *PostUpdate = C->getPostUpdateExpr())
964       EmitIgnoredExpr(PostUpdate);
965   }
966   if (IsLastIterCond)
967     EmitBlock(DoneBB, /*IsFinished=*/true);
968 }
969 
970 void CodeGenFunction::EmitOMPReductionClauseInit(
971     const OMPExecutableDirective &D,
972     CodeGenFunction::OMPPrivateScope &PrivateScope) {
973   if (!HaveInsertPoint())
974     return;
975   SmallVector<const Expr *, 4> Shareds;
976   SmallVector<const Expr *, 4> Privates;
977   SmallVector<const Expr *, 4> ReductionOps;
978   SmallVector<const Expr *, 4> LHSs;
979   SmallVector<const Expr *, 4> RHSs;
980   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
981     auto IPriv = C->privates().begin();
982     auto IRed = C->reduction_ops().begin();
983     auto ILHS = C->lhs_exprs().begin();
984     auto IRHS = C->rhs_exprs().begin();
985     for (const auto *Ref : C->varlists()) {
986       Shareds.emplace_back(Ref);
987       Privates.emplace_back(*IPriv);
988       ReductionOps.emplace_back(*IRed);
989       LHSs.emplace_back(*ILHS);
990       RHSs.emplace_back(*IRHS);
991       std::advance(IPriv, 1);
992       std::advance(IRed, 1);
993       std::advance(ILHS, 1);
994       std::advance(IRHS, 1);
995     }
996   }
997   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
998   unsigned Count = 0;
999   auto ILHS = LHSs.begin();
1000   auto IRHS = RHSs.begin();
1001   auto IPriv = Privates.begin();
1002   for (const auto *IRef : Shareds) {
1003     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1004     // Emit private VarDecl with reduction init.
1005     RedCG.emitSharedLValue(*this, Count);
1006     RedCG.emitAggregateType(*this, Count);
1007     auto Emission = EmitAutoVarAlloca(*PrivateVD);
1008     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1009                              RedCG.getSharedLValue(Count),
1010                              [&Emission](CodeGenFunction &CGF) {
1011                                CGF.EmitAutoVarInit(Emission);
1012                                return true;
1013                              });
1014     EmitAutoVarCleanups(Emission);
1015     Address BaseAddr = RedCG.adjustPrivateAddress(
1016         *this, Count, Emission.getAllocatedAddress());
1017     bool IsRegistered = PrivateScope.addPrivate(
1018         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
1019     assert(IsRegistered && "private var already registered as private");
1020     // Silence the warning about unused variable.
1021     (void)IsRegistered;
1022 
1023     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1024     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1025     QualType Type = PrivateVD->getType();
1026     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1027     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1028       // Store the address of the original variable associated with the LHS
1029       // implicit variable.
1030       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1031         return RedCG.getSharedLValue(Count).getAddress();
1032       });
1033       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1034         return GetAddrOfLocalVar(PrivateVD);
1035       });
1036     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1037                isa<ArraySubscriptExpr>(IRef)) {
1038       // Store the address of the original variable associated with the LHS
1039       // implicit variable.
1040       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1041         return RedCG.getSharedLValue(Count).getAddress();
1042       });
1043       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1044         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1045                                             ConvertTypeForMem(RHSVD->getType()),
1046                                             "rhs.begin");
1047       });
1048     } else {
1049       QualType Type = PrivateVD->getType();
1050       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1051       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1052       // Store the address of the original variable associated with the LHS
1053       // implicit variable.
1054       if (IsArray) {
1055         OriginalAddr = Builder.CreateElementBitCast(
1056             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1057       }
1058       PrivateScope.addPrivate(
1059           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1060       PrivateScope.addPrivate(
1061           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1062             return IsArray
1063                        ? Builder.CreateElementBitCast(
1064                              GetAddrOfLocalVar(PrivateVD),
1065                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1066                        : GetAddrOfLocalVar(PrivateVD);
1067           });
1068     }
1069     ++ILHS;
1070     ++IRHS;
1071     ++IPriv;
1072     ++Count;
1073   }
1074 }
1075 
1076 void CodeGenFunction::EmitOMPReductionClauseFinal(
1077     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1078   if (!HaveInsertPoint())
1079     return;
1080   llvm::SmallVector<const Expr *, 8> Privates;
1081   llvm::SmallVector<const Expr *, 8> LHSExprs;
1082   llvm::SmallVector<const Expr *, 8> RHSExprs;
1083   llvm::SmallVector<const Expr *, 8> ReductionOps;
1084   bool HasAtLeastOneReduction = false;
1085   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1086     HasAtLeastOneReduction = true;
1087     Privates.append(C->privates().begin(), C->privates().end());
1088     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1089     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1090     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1091   }
1092   if (HasAtLeastOneReduction) {
1093     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1094                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1095                       D.getDirectiveKind() == OMPD_simd;
1096     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd ||
1097                            D.getDirectiveKind() == OMPD_distribute_simd;
1098     // Emit nowait reduction if nowait clause is present or directive is a
1099     // parallel directive (it always has implicit barrier).
1100     CGM.getOpenMPRuntime().emitReduction(
1101         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1102         {WithNowait, SimpleReduction, ReductionKind});
1103   }
1104 }
1105 
1106 static void emitPostUpdateForReductionClause(
1107     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1108     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1109   if (!CGF.HaveInsertPoint())
1110     return;
1111   llvm::BasicBlock *DoneBB = nullptr;
1112   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1113     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1114       if (!DoneBB) {
1115         if (auto *Cond = CondGen(CGF)) {
1116           // If the first post-update expression is found, emit conditional
1117           // block if it was requested.
1118           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1119           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1120           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1121           CGF.EmitBlock(ThenBB);
1122         }
1123       }
1124       CGF.EmitIgnoredExpr(PostUpdate);
1125     }
1126   }
1127   if (DoneBB)
1128     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1129 }
1130 
1131 namespace {
1132 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1133 /// parallel function. This is necessary for combined constructs such as
1134 /// 'distribute parallel for'
1135 typedef llvm::function_ref<void(CodeGenFunction &,
1136                                 const OMPExecutableDirective &,
1137                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1138     CodeGenBoundParametersTy;
1139 } // anonymous namespace
1140 
1141 static void emitCommonOMPParallelDirective(
1142     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1143     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1144     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1145   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1146   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1147       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1148   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1149     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1150     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1151                                          /*IgnoreResultAssign*/ true);
1152     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1153         CGF, NumThreads, NumThreadsClause->getLocStart());
1154   }
1155   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1156     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1157     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1158         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1159   }
1160   const Expr *IfCond = nullptr;
1161   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1162     if (C->getNameModifier() == OMPD_unknown ||
1163         C->getNameModifier() == OMPD_parallel) {
1164       IfCond = C->getCondition();
1165       break;
1166     }
1167   }
1168 
1169   OMPParallelScope Scope(CGF, S);
1170   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1171   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1172   // lower and upper bounds with the pragma 'for' chunking mechanism.
1173   // The following lambda takes care of appending the lower and upper bound
1174   // parameters when necessary
1175   CodeGenBoundParameters(CGF, S, CapturedVars);
1176   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1177   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1178                                               CapturedVars, IfCond);
1179 }
1180 
1181 static void emitEmptyBoundParameters(CodeGenFunction &,
1182                                      const OMPExecutableDirective &,
1183                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1184 
1185 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1186   // Emit parallel region as a standalone region.
1187   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1188     OMPPrivateScope PrivateScope(CGF);
1189     bool Copyins = CGF.EmitOMPCopyinClause(S);
1190     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1191     if (Copyins) {
1192       // Emit implicit barrier to synchronize threads and avoid data races on
1193       // propagation master's thread values of threadprivate variables to local
1194       // instances of that variables of all other implicit threads.
1195       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1196           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1197           /*ForceSimpleCall=*/true);
1198     }
1199     CGF.EmitOMPPrivateClause(S, PrivateScope);
1200     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1201     (void)PrivateScope.Privatize();
1202     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1203     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1204   };
1205   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1206                                  emitEmptyBoundParameters);
1207   emitPostUpdateForReductionClause(
1208       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1209 }
1210 
1211 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1212                                       JumpDest LoopExit) {
1213   RunCleanupsScope BodyScope(*this);
1214   // Update counters values on current iteration.
1215   for (auto I : D.updates()) {
1216     EmitIgnoredExpr(I);
1217   }
1218   // Update the linear variables.
1219   // In distribute directives only loop counters may be marked as linear, no
1220   // need to generate the code for them.
1221   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1222     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1223       for (auto *U : C->updates())
1224         EmitIgnoredExpr(U);
1225     }
1226   }
1227 
1228   // On a continue in the body, jump to the end.
1229   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1230   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1231   // Emit loop body.
1232   EmitStmt(D.getBody());
1233   // The end (updates/cleanups).
1234   EmitBlock(Continue.getBlock());
1235   BreakContinueStack.pop_back();
1236 }
1237 
1238 void CodeGenFunction::EmitOMPInnerLoop(
1239     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1240     const Expr *IncExpr,
1241     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1242     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1243   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1244 
1245   // Start the loop with a block that tests the condition.
1246   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1247   EmitBlock(CondBlock);
1248   const SourceRange &R = S.getSourceRange();
1249   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1250                  SourceLocToDebugLoc(R.getEnd()));
1251 
1252   // If there are any cleanups between here and the loop-exit scope,
1253   // create a block to stage a loop exit along.
1254   auto ExitBlock = LoopExit.getBlock();
1255   if (RequiresCleanup)
1256     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1257 
1258   auto LoopBody = createBasicBlock("omp.inner.for.body");
1259 
1260   // Emit condition.
1261   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1262   if (ExitBlock != LoopExit.getBlock()) {
1263     EmitBlock(ExitBlock);
1264     EmitBranchThroughCleanup(LoopExit);
1265   }
1266 
1267   EmitBlock(LoopBody);
1268   incrementProfileCounter(&S);
1269 
1270   // Create a block for the increment.
1271   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1272   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1273 
1274   BodyGen(*this);
1275 
1276   // Emit "IV = IV + 1" and a back-edge to the condition block.
1277   EmitBlock(Continue.getBlock());
1278   EmitIgnoredExpr(IncExpr);
1279   PostIncGen(*this);
1280   BreakContinueStack.pop_back();
1281   EmitBranch(CondBlock);
1282   LoopStack.pop();
1283   // Emit the fall-through block.
1284   EmitBlock(LoopExit.getBlock());
1285 }
1286 
1287 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1288   if (!HaveInsertPoint())
1289     return false;
1290   // Emit inits for the linear variables.
1291   bool HasLinears = false;
1292   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1293     for (auto *Init : C->inits()) {
1294       HasLinears = true;
1295       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1296       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1297         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1298         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1299         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1300                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1301                         VD->getInit()->getType(), VK_LValue,
1302                         VD->getInit()->getExprLoc());
1303         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1304                                                 VD->getType()),
1305                        /*capturedByInit=*/false);
1306         EmitAutoVarCleanups(Emission);
1307       } else
1308         EmitVarDecl(*VD);
1309     }
1310     // Emit the linear steps for the linear clauses.
1311     // If a step is not constant, it is pre-calculated before the loop.
1312     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1313       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1314         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1315         // Emit calculation of the linear step.
1316         EmitIgnoredExpr(CS);
1317       }
1318   }
1319   return HasLinears;
1320 }
1321 
1322 void CodeGenFunction::EmitOMPLinearClauseFinal(
1323     const OMPLoopDirective &D,
1324     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1325   if (!HaveInsertPoint())
1326     return;
1327   llvm::BasicBlock *DoneBB = nullptr;
1328   // Emit the final values of the linear variables.
1329   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1330     auto IC = C->varlist_begin();
1331     for (auto *F : C->finals()) {
1332       if (!DoneBB) {
1333         if (auto *Cond = CondGen(*this)) {
1334           // If the first post-update expression is found, emit conditional
1335           // block if it was requested.
1336           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1337           DoneBB = createBasicBlock(".omp.linear.pu.done");
1338           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1339           EmitBlock(ThenBB);
1340         }
1341       }
1342       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1343       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1344                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1345                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1346       Address OrigAddr = EmitLValue(&DRE).getAddress();
1347       CodeGenFunction::OMPPrivateScope VarScope(*this);
1348       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1349       (void)VarScope.Privatize();
1350       EmitIgnoredExpr(F);
1351       ++IC;
1352     }
1353     if (auto *PostUpdate = C->getPostUpdateExpr())
1354       EmitIgnoredExpr(PostUpdate);
1355   }
1356   if (DoneBB)
1357     EmitBlock(DoneBB, /*IsFinished=*/true);
1358 }
1359 
1360 static void emitAlignedClause(CodeGenFunction &CGF,
1361                               const OMPExecutableDirective &D) {
1362   if (!CGF.HaveInsertPoint())
1363     return;
1364   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1365     unsigned ClauseAlignment = 0;
1366     if (auto AlignmentExpr = Clause->getAlignment()) {
1367       auto AlignmentCI =
1368           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1369       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1370     }
1371     for (auto E : Clause->varlists()) {
1372       unsigned Alignment = ClauseAlignment;
1373       if (Alignment == 0) {
1374         // OpenMP [2.8.1, Description]
1375         // If no optional parameter is specified, implementation-defined default
1376         // alignments for SIMD instructions on the target platforms are assumed.
1377         Alignment =
1378             CGF.getContext()
1379                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1380                     E->getType()->getPointeeType()))
1381                 .getQuantity();
1382       }
1383       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1384              "alignment is not power of 2");
1385       if (Alignment != 0) {
1386         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1387         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1388       }
1389     }
1390   }
1391 }
1392 
1393 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1394     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1395   if (!HaveInsertPoint())
1396     return;
1397   auto I = S.private_counters().begin();
1398   for (auto *E : S.counters()) {
1399     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1400     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1401     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1402       // Emit var without initialization.
1403       if (!LocalDeclMap.count(PrivateVD)) {
1404         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1405         EmitAutoVarCleanups(VarEmission);
1406       }
1407       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1408                       /*RefersToEnclosingVariableOrCapture=*/false,
1409                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1410       return EmitLValue(&DRE).getAddress();
1411     });
1412     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1413         VD->hasGlobalStorage()) {
1414       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1415         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1416                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1417                         E->getType(), VK_LValue, E->getExprLoc());
1418         return EmitLValue(&DRE).getAddress();
1419       });
1420     }
1421     ++I;
1422   }
1423 }
1424 
1425 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1426                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1427                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1428   if (!CGF.HaveInsertPoint())
1429     return;
1430   {
1431     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1432     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1433     (void)PreCondScope.Privatize();
1434     // Get initial values of real counters.
1435     for (auto I : S.inits()) {
1436       CGF.EmitIgnoredExpr(I);
1437     }
1438   }
1439   // Check that loop is executed at least one time.
1440   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1441 }
1442 
1443 void CodeGenFunction::EmitOMPLinearClause(
1444     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1445   if (!HaveInsertPoint())
1446     return;
1447   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1448   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1449     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1450     for (auto *C : LoopDirective->counters()) {
1451       SIMDLCVs.insert(
1452           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1453     }
1454   }
1455   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1456     auto CurPrivate = C->privates().begin();
1457     for (auto *E : C->varlists()) {
1458       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1459       auto *PrivateVD =
1460           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1461       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1462         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1463           // Emit private VarDecl with copy init.
1464           EmitVarDecl(*PrivateVD);
1465           return GetAddrOfLocalVar(PrivateVD);
1466         });
1467         assert(IsRegistered && "linear var already registered as private");
1468         // Silence the warning about unused variable.
1469         (void)IsRegistered;
1470       } else
1471         EmitVarDecl(*PrivateVD);
1472       ++CurPrivate;
1473     }
1474   }
1475 }
1476 
1477 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1478                                      const OMPExecutableDirective &D,
1479                                      bool IsMonotonic) {
1480   if (!CGF.HaveInsertPoint())
1481     return;
1482   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1483     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1484                                  /*ignoreResult=*/true);
1485     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1486     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1487     // In presence of finite 'safelen', it may be unsafe to mark all
1488     // the memory instructions parallel, because loop-carried
1489     // dependences of 'safelen' iterations are possible.
1490     if (!IsMonotonic)
1491       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1492   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1493     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1494                                  /*ignoreResult=*/true);
1495     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1496     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1497     // In presence of finite 'safelen', it may be unsafe to mark all
1498     // the memory instructions parallel, because loop-carried
1499     // dependences of 'safelen' iterations are possible.
1500     CGF.LoopStack.setParallel(false);
1501   }
1502 }
1503 
1504 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1505                                       bool IsMonotonic) {
1506   // Walk clauses and process safelen/lastprivate.
1507   LoopStack.setParallel(!IsMonotonic);
1508   LoopStack.setVectorizeEnable(true);
1509   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1510 }
1511 
1512 void CodeGenFunction::EmitOMPSimdFinal(
1513     const OMPLoopDirective &D,
1514     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1515   if (!HaveInsertPoint())
1516     return;
1517   llvm::BasicBlock *DoneBB = nullptr;
1518   auto IC = D.counters().begin();
1519   auto IPC = D.private_counters().begin();
1520   for (auto F : D.finals()) {
1521     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1522     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1523     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1524     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1525         OrigVD->hasGlobalStorage() || CED) {
1526       if (!DoneBB) {
1527         if (auto *Cond = CondGen(*this)) {
1528           // If the first post-update expression is found, emit conditional
1529           // block if it was requested.
1530           auto *ThenBB = createBasicBlock(".omp.final.then");
1531           DoneBB = createBasicBlock(".omp.final.done");
1532           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1533           EmitBlock(ThenBB);
1534         }
1535       }
1536       Address OrigAddr = Address::invalid();
1537       if (CED)
1538         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1539       else {
1540         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1541                         /*RefersToEnclosingVariableOrCapture=*/false,
1542                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1543         OrigAddr = EmitLValue(&DRE).getAddress();
1544       }
1545       OMPPrivateScope VarScope(*this);
1546       VarScope.addPrivate(OrigVD,
1547                           [OrigAddr]() -> Address { return OrigAddr; });
1548       (void)VarScope.Privatize();
1549       EmitIgnoredExpr(F);
1550     }
1551     ++IC;
1552     ++IPC;
1553   }
1554   if (DoneBB)
1555     EmitBlock(DoneBB, /*IsFinished=*/true);
1556 }
1557 
1558 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1559                                          const OMPLoopDirective &S,
1560                                          CodeGenFunction::JumpDest LoopExit) {
1561   CGF.EmitOMPLoopBody(S, LoopExit);
1562   CGF.EmitStopPoint(&S);
1563 }
1564 
1565 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1566                               PrePostActionTy &Action) {
1567   Action.Enter(CGF);
1568   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1569          "Expected simd directive");
1570   OMPLoopScope PreInitScope(CGF, S);
1571   // if (PreCond) {
1572   //   for (IV in 0..LastIteration) BODY;
1573   //   <Final counter/linear vars updates>;
1574   // }
1575   //
1576 
1577   // Emit: if (PreCond) - begin.
1578   // If the condition constant folds and can be elided, avoid emitting the
1579   // whole loop.
1580   bool CondConstant;
1581   llvm::BasicBlock *ContBlock = nullptr;
1582   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1583     if (!CondConstant)
1584       return;
1585   } else {
1586     auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1587     ContBlock = CGF.createBasicBlock("simd.if.end");
1588     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1589                 CGF.getProfileCount(&S));
1590     CGF.EmitBlock(ThenBlock);
1591     CGF.incrementProfileCounter(&S);
1592   }
1593 
1594   // Emit the loop iteration variable.
1595   const Expr *IVExpr = S.getIterationVariable();
1596   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1597   CGF.EmitVarDecl(*IVDecl);
1598   CGF.EmitIgnoredExpr(S.getInit());
1599 
1600   // Emit the iterations count variable.
1601   // If it is not a variable, Sema decided to calculate iterations count on
1602   // each iteration (e.g., it is foldable into a constant).
1603   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1604     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1605     // Emit calculation of the iterations count.
1606     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1607   }
1608 
1609   CGF.EmitOMPSimdInit(S);
1610 
1611   emitAlignedClause(CGF, S);
1612   (void)CGF.EmitOMPLinearClauseInit(S);
1613   {
1614     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1615     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1616     CGF.EmitOMPLinearClause(S, LoopScope);
1617     CGF.EmitOMPPrivateClause(S, LoopScope);
1618     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1619     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1620     (void)LoopScope.Privatize();
1621     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1622                          S.getInc(),
1623                          [&S](CodeGenFunction &CGF) {
1624                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1625                            CGF.EmitStopPoint(&S);
1626                          },
1627                          [](CodeGenFunction &) {});
1628     CGF.EmitOMPSimdFinal(
1629         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1630     // Emit final copy of the lastprivate variables at the end of loops.
1631     if (HasLastprivateClause)
1632       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1633     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1634     emitPostUpdateForReductionClause(
1635         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1636   }
1637   CGF.EmitOMPLinearClauseFinal(
1638       S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1639   // Emit: if (PreCond) - end.
1640   if (ContBlock) {
1641     CGF.EmitBranch(ContBlock);
1642     CGF.EmitBlock(ContBlock, true);
1643   }
1644 }
1645 
1646 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1647   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1648     emitOMPSimdRegion(CGF, S, Action);
1649   };
1650   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1651   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1652 }
1653 
1654 void CodeGenFunction::EmitOMPOuterLoop(
1655     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1656     CodeGenFunction::OMPPrivateScope &LoopScope,
1657     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1658     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1659     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1660   auto &RT = CGM.getOpenMPRuntime();
1661 
1662   const Expr *IVExpr = S.getIterationVariable();
1663   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1664   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1665 
1666   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1667 
1668   // Start the loop with a block that tests the condition.
1669   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1670   EmitBlock(CondBlock);
1671   const SourceRange &R = S.getSourceRange();
1672   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1673                  SourceLocToDebugLoc(R.getEnd()));
1674 
1675   llvm::Value *BoolCondVal = nullptr;
1676   if (!DynamicOrOrdered) {
1677     // UB = min(UB, GlobalUB) or
1678     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1679     // 'distribute parallel for')
1680     EmitIgnoredExpr(LoopArgs.EUB);
1681     // IV = LB
1682     EmitIgnoredExpr(LoopArgs.Init);
1683     // IV < UB
1684     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1685   } else {
1686     BoolCondVal =
1687         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1688                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1689   }
1690 
1691   // If there are any cleanups between here and the loop-exit scope,
1692   // create a block to stage a loop exit along.
1693   auto ExitBlock = LoopExit.getBlock();
1694   if (LoopScope.requiresCleanups())
1695     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1696 
1697   auto LoopBody = createBasicBlock("omp.dispatch.body");
1698   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1699   if (ExitBlock != LoopExit.getBlock()) {
1700     EmitBlock(ExitBlock);
1701     EmitBranchThroughCleanup(LoopExit);
1702   }
1703   EmitBlock(LoopBody);
1704 
1705   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1706   // LB for loop condition and emitted it above).
1707   if (DynamicOrOrdered)
1708     EmitIgnoredExpr(LoopArgs.Init);
1709 
1710   // Create a block for the increment.
1711   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1712   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1713 
1714   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1715   // with dynamic/guided scheduling and without ordered clause.
1716   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1717     LoopStack.setParallel(!IsMonotonic);
1718   else
1719     EmitOMPSimdInit(S, IsMonotonic);
1720 
1721   SourceLocation Loc = S.getLocStart();
1722 
1723   // when 'distribute' is not combined with a 'for':
1724   // while (idx <= UB) { BODY; ++idx; }
1725   // when 'distribute' is combined with a 'for'
1726   // (e.g. 'distribute parallel for')
1727   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1728   EmitOMPInnerLoop(
1729       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1730       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1731         CodeGenLoop(CGF, S, LoopExit);
1732       },
1733       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1734         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1735       });
1736 
1737   EmitBlock(Continue.getBlock());
1738   BreakContinueStack.pop_back();
1739   if (!DynamicOrOrdered) {
1740     // Emit "LB = LB + Stride", "UB = UB + Stride".
1741     EmitIgnoredExpr(LoopArgs.NextLB);
1742     EmitIgnoredExpr(LoopArgs.NextUB);
1743   }
1744 
1745   EmitBranch(CondBlock);
1746   LoopStack.pop();
1747   // Emit the fall-through block.
1748   EmitBlock(LoopExit.getBlock());
1749 
1750   // Tell the runtime we are done.
1751   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1752     if (!DynamicOrOrdered)
1753       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1754                                                      S.getDirectiveKind());
1755   };
1756   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1757 }
1758 
1759 void CodeGenFunction::EmitOMPForOuterLoop(
1760     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1761     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1762     const OMPLoopArguments &LoopArgs,
1763     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1764   auto &RT = CGM.getOpenMPRuntime();
1765 
1766   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1767   const bool DynamicOrOrdered =
1768       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1769 
1770   assert((Ordered ||
1771           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1772                                  LoopArgs.Chunk != nullptr)) &&
1773          "static non-chunked schedule does not need outer loop");
1774 
1775   // Emit outer loop.
1776   //
1777   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1778   // When schedule(dynamic,chunk_size) is specified, the iterations are
1779   // distributed to threads in the team in chunks as the threads request them.
1780   // Each thread executes a chunk of iterations, then requests another chunk,
1781   // until no chunks remain to be distributed. Each chunk contains chunk_size
1782   // iterations, except for the last chunk to be distributed, which may have
1783   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1784   //
1785   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1786   // to threads in the team in chunks as the executing threads request them.
1787   // Each thread executes a chunk of iterations, then requests another chunk,
1788   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1789   // each chunk is proportional to the number of unassigned iterations divided
1790   // by the number of threads in the team, decreasing to 1. For a chunk_size
1791   // with value k (greater than 1), the size of each chunk is determined in the
1792   // same way, with the restriction that the chunks do not contain fewer than k
1793   // iterations (except for the last chunk to be assigned, which may have fewer
1794   // than k iterations).
1795   //
1796   // When schedule(auto) is specified, the decision regarding scheduling is
1797   // delegated to the compiler and/or runtime system. The programmer gives the
1798   // implementation the freedom to choose any possible mapping of iterations to
1799   // threads in the team.
1800   //
1801   // When schedule(runtime) is specified, the decision regarding scheduling is
1802   // deferred until run time, and the schedule and chunk size are taken from the
1803   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1804   // implementation defined
1805   //
1806   // while(__kmpc_dispatch_next(&LB, &UB)) {
1807   //   idx = LB;
1808   //   while (idx <= UB) { BODY; ++idx;
1809   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1810   //   } // inner loop
1811   // }
1812   //
1813   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1814   // When schedule(static, chunk_size) is specified, iterations are divided into
1815   // chunks of size chunk_size, and the chunks are assigned to the threads in
1816   // the team in a round-robin fashion in the order of the thread number.
1817   //
1818   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1819   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1820   //   LB = LB + ST;
1821   //   UB = UB + ST;
1822   // }
1823   //
1824 
1825   const Expr *IVExpr = S.getIterationVariable();
1826   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1827   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1828 
1829   if (DynamicOrOrdered) {
1830     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1831     llvm::Value *LBVal = DispatchBounds.first;
1832     llvm::Value *UBVal = DispatchBounds.second;
1833     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1834                                                              LoopArgs.Chunk};
1835     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1836                            IVSigned, Ordered, DipatchRTInputValues);
1837   } else {
1838     CGOpenMPRuntime::StaticRTInput StaticInit(
1839         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1840         LoopArgs.ST, LoopArgs.Chunk);
1841     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1842                          ScheduleKind, StaticInit);
1843   }
1844 
1845   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1846                                     const unsigned IVSize,
1847                                     const bool IVSigned) {
1848     if (Ordered) {
1849       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1850                                                             IVSigned);
1851     }
1852   };
1853 
1854   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1855                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1856   OuterLoopArgs.IncExpr = S.getInc();
1857   OuterLoopArgs.Init = S.getInit();
1858   OuterLoopArgs.Cond = S.getCond();
1859   OuterLoopArgs.NextLB = S.getNextLowerBound();
1860   OuterLoopArgs.NextUB = S.getNextUpperBound();
1861   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1862                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1863 }
1864 
1865 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1866                              const unsigned IVSize, const bool IVSigned) {}
1867 
1868 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1869     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1870     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1871     const CodeGenLoopTy &CodeGenLoopContent) {
1872 
1873   auto &RT = CGM.getOpenMPRuntime();
1874 
1875   // Emit outer loop.
1876   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1877   // dynamic
1878   //
1879 
1880   const Expr *IVExpr = S.getIterationVariable();
1881   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1882   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1883 
1884   CGOpenMPRuntime::StaticRTInput StaticInit(
1885       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1886       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1887   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1888 
1889   // for combined 'distribute' and 'for' the increment expression of distribute
1890   // is store in DistInc. For 'distribute' alone, it is in Inc.
1891   Expr *IncExpr;
1892   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1893     IncExpr = S.getDistInc();
1894   else
1895     IncExpr = S.getInc();
1896 
1897   // this routine is shared by 'omp distribute parallel for' and
1898   // 'omp distribute': select the right EUB expression depending on the
1899   // directive
1900   OMPLoopArguments OuterLoopArgs;
1901   OuterLoopArgs.LB = LoopArgs.LB;
1902   OuterLoopArgs.UB = LoopArgs.UB;
1903   OuterLoopArgs.ST = LoopArgs.ST;
1904   OuterLoopArgs.IL = LoopArgs.IL;
1905   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1906   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1907                           ? S.getCombinedEnsureUpperBound()
1908                           : S.getEnsureUpperBound();
1909   OuterLoopArgs.IncExpr = IncExpr;
1910   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1911                            ? S.getCombinedInit()
1912                            : S.getInit();
1913   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1914                            ? S.getCombinedCond()
1915                            : S.getCond();
1916   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1917                              ? S.getCombinedNextLowerBound()
1918                              : S.getNextLowerBound();
1919   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1920                              ? S.getCombinedNextUpperBound()
1921                              : S.getNextUpperBound();
1922 
1923   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1924                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1925                    emitEmptyOrdered);
1926 }
1927 
1928 /// Emit a helper variable and return corresponding lvalue.
1929 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1930                                const DeclRefExpr *Helper) {
1931   auto VDecl = cast<VarDecl>(Helper->getDecl());
1932   CGF.EmitVarDecl(*VDecl);
1933   return CGF.EmitLValue(Helper);
1934 }
1935 
1936 static std::pair<LValue, LValue>
1937 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1938                                      const OMPExecutableDirective &S) {
1939   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1940   LValue LB =
1941       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1942   LValue UB =
1943       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1944 
1945   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1946   // parallel for') we need to use the 'distribute'
1947   // chunk lower and upper bounds rather than the whole loop iteration
1948   // space. These are parameters to the outlined function for 'parallel'
1949   // and we copy the bounds of the previous schedule into the
1950   // the current ones.
1951   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1952   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1953   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1954   PrevLBVal = CGF.EmitScalarConversion(
1955       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1956       LS.getIterationVariable()->getType(), SourceLocation());
1957   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1958   PrevUBVal = CGF.EmitScalarConversion(
1959       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1960       LS.getIterationVariable()->getType(), SourceLocation());
1961 
1962   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1963   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1964 
1965   return {LB, UB};
1966 }
1967 
1968 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1969 /// we need to use the LB and UB expressions generated by the worksharing
1970 /// code generation support, whereas in non combined situations we would
1971 /// just emit 0 and the LastIteration expression
1972 /// This function is necessary due to the difference of the LB and UB
1973 /// types for the RT emission routines for 'for_static_init' and
1974 /// 'for_dispatch_init'
1975 static std::pair<llvm::Value *, llvm::Value *>
1976 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1977                                         const OMPExecutableDirective &S,
1978                                         Address LB, Address UB) {
1979   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1980   const Expr *IVExpr = LS.getIterationVariable();
1981   // when implementing a dynamic schedule for a 'for' combined with a
1982   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1983   // is not normalized as each team only executes its own assigned
1984   // distribute chunk
1985   QualType IteratorTy = IVExpr->getType();
1986   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1987                                             SourceLocation());
1988   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1989                                             SourceLocation());
1990   return {LBVal, UBVal};
1991 }
1992 
1993 static void emitDistributeParallelForDistributeInnerBoundParams(
1994     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1995     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1996   const auto &Dir = cast<OMPLoopDirective>(S);
1997   LValue LB =
1998       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1999   auto LBCast = CGF.Builder.CreateIntCast(
2000       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2001   CapturedVars.push_back(LBCast);
2002   LValue UB =
2003       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2004 
2005   auto UBCast = CGF.Builder.CreateIntCast(
2006       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2007   CapturedVars.push_back(UBCast);
2008 }
2009 
2010 static void
2011 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2012                                  const OMPLoopDirective &S,
2013                                  CodeGenFunction::JumpDest LoopExit) {
2014   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2015                                          PrePostActionTy &) {
2016     bool HasCancel = false;
2017     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2018       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2019         HasCancel = D->hasCancel();
2020       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2021         HasCancel = D->hasCancel();
2022       else if (const auto *D =
2023                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2024         HasCancel = D->hasCancel();
2025     }
2026     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2027                                                      HasCancel);
2028     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2029                                emitDistributeParallelForInnerBounds,
2030                                emitDistributeParallelForDispatchBounds);
2031   };
2032 
2033   emitCommonOMPParallelDirective(
2034       CGF, S,
2035       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2036       CGInlinedWorksharingLoop,
2037       emitDistributeParallelForDistributeInnerBoundParams);
2038 }
2039 
2040 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2041     const OMPDistributeParallelForDirective &S) {
2042   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2043     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2044                               S.getDistInc());
2045   };
2046   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2047   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2048 }
2049 
2050 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2051     const OMPDistributeParallelForSimdDirective &S) {
2052   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2053     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2054                               S.getDistInc());
2055   };
2056   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2057   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2058 }
2059 
2060 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2061     const OMPDistributeSimdDirective &S) {
2062   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2063     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2064   };
2065   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2066   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2067 }
2068 
2069 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2070     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2071   // Emit SPMD target parallel for region as a standalone region.
2072   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2073     emitOMPSimdRegion(CGF, S, Action);
2074   };
2075   llvm::Function *Fn;
2076   llvm::Constant *Addr;
2077   // Emit target region as a standalone region.
2078   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2079       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2080   assert(Fn && Addr && "Target device function emission failed.");
2081 }
2082 
2083 void CodeGenFunction::EmitOMPTargetSimdDirective(
2084     const OMPTargetSimdDirective &S) {
2085   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2086     emitOMPSimdRegion(CGF, S, Action);
2087   };
2088   emitCommonOMPTargetDirective(*this, S, CodeGen);
2089 }
2090 
2091 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2092     const OMPTeamsDistributeSimdDirective &S) {
2093   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2094   CGM.getOpenMPRuntime().emitInlinedDirective(
2095       *this, OMPD_teams_distribute_simd,
2096       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2097         OMPLoopScope PreInitScope(CGF, S);
2098         CGF.EmitStmt(
2099             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2100       });
2101 }
2102 
2103 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2104     const OMPTeamsDistributeParallelForSimdDirective &S) {
2105   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2106   CGM.getOpenMPRuntime().emitInlinedDirective(
2107       *this, OMPD_teams_distribute_parallel_for_simd,
2108       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2109         OMPLoopScope PreInitScope(CGF, S);
2110         CGF.EmitStmt(
2111             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2112       });
2113 }
2114 
2115 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2116     const OMPTargetTeamsDistributeDirective &S) {
2117   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2118   CGM.getOpenMPRuntime().emitInlinedDirective(
2119       *this, OMPD_target_teams_distribute,
2120       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2121         CGF.EmitStmt(
2122             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2123       });
2124 }
2125 
2126 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2127     const OMPTargetTeamsDistributeParallelForDirective &S) {
2128   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2129   CGM.getOpenMPRuntime().emitInlinedDirective(
2130       *this, OMPD_target_teams_distribute_parallel_for,
2131       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2132         CGF.EmitStmt(
2133             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2134       });
2135 }
2136 
2137 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2138     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2139   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2140   CGM.getOpenMPRuntime().emitInlinedDirective(
2141       *this, OMPD_target_teams_distribute_parallel_for_simd,
2142       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2143         CGF.EmitStmt(
2144             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2145       });
2146 }
2147 
2148 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2149     const OMPTargetTeamsDistributeSimdDirective &S) {
2150   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2151   CGM.getOpenMPRuntime().emitInlinedDirective(
2152       *this, OMPD_target_teams_distribute_simd,
2153       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2154         CGF.EmitStmt(
2155             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2156       });
2157 }
2158 
2159 namespace {
2160   struct ScheduleKindModifiersTy {
2161     OpenMPScheduleClauseKind Kind;
2162     OpenMPScheduleClauseModifier M1;
2163     OpenMPScheduleClauseModifier M2;
2164     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2165                             OpenMPScheduleClauseModifier M1,
2166                             OpenMPScheduleClauseModifier M2)
2167         : Kind(Kind), M1(M1), M2(M2) {}
2168   };
2169 } // namespace
2170 
2171 bool CodeGenFunction::EmitOMPWorksharingLoop(
2172     const OMPLoopDirective &S, Expr *EUB,
2173     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2174     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2175   // Emit the loop iteration variable.
2176   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2177   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2178   EmitVarDecl(*IVDecl);
2179 
2180   // Emit the iterations count variable.
2181   // If it is not a variable, Sema decided to calculate iterations count on each
2182   // iteration (e.g., it is foldable into a constant).
2183   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2184     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2185     // Emit calculation of the iterations count.
2186     EmitIgnoredExpr(S.getCalcLastIteration());
2187   }
2188 
2189   auto &RT = CGM.getOpenMPRuntime();
2190 
2191   bool HasLastprivateClause;
2192   // Check pre-condition.
2193   {
2194     OMPLoopScope PreInitScope(*this, S);
2195     // Skip the entire loop if we don't meet the precondition.
2196     // If the condition constant folds and can be elided, avoid emitting the
2197     // whole loop.
2198     bool CondConstant;
2199     llvm::BasicBlock *ContBlock = nullptr;
2200     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2201       if (!CondConstant)
2202         return false;
2203     } else {
2204       auto *ThenBlock = createBasicBlock("omp.precond.then");
2205       ContBlock = createBasicBlock("omp.precond.end");
2206       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2207                   getProfileCount(&S));
2208       EmitBlock(ThenBlock);
2209       incrementProfileCounter(&S);
2210     }
2211 
2212     bool Ordered = false;
2213     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2214       if (OrderedClause->getNumForLoops())
2215         RT.emitDoacrossInit(*this, S);
2216       else
2217         Ordered = true;
2218     }
2219 
2220     llvm::DenseSet<const Expr *> EmittedFinals;
2221     emitAlignedClause(*this, S);
2222     bool HasLinears = EmitOMPLinearClauseInit(S);
2223     // Emit helper vars inits.
2224 
2225     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2226     LValue LB = Bounds.first;
2227     LValue UB = Bounds.second;
2228     LValue ST =
2229         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2230     LValue IL =
2231         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2232 
2233     // Emit 'then' code.
2234     {
2235       OMPPrivateScope LoopScope(*this);
2236       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2237         // Emit implicit barrier to synchronize threads and avoid data races on
2238         // initialization of firstprivate variables and post-update of
2239         // lastprivate variables.
2240         CGM.getOpenMPRuntime().emitBarrierCall(
2241             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2242             /*ForceSimpleCall=*/true);
2243       }
2244       EmitOMPPrivateClause(S, LoopScope);
2245       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2246       EmitOMPReductionClauseInit(S, LoopScope);
2247       EmitOMPPrivateLoopCounters(S, LoopScope);
2248       EmitOMPLinearClause(S, LoopScope);
2249       (void)LoopScope.Privatize();
2250 
2251       // Detect the loop schedule kind and chunk.
2252       llvm::Value *Chunk = nullptr;
2253       OpenMPScheduleTy ScheduleKind;
2254       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2255         ScheduleKind.Schedule = C->getScheduleKind();
2256         ScheduleKind.M1 = C->getFirstScheduleModifier();
2257         ScheduleKind.M2 = C->getSecondScheduleModifier();
2258         if (const auto *Ch = C->getChunkSize()) {
2259           Chunk = EmitScalarExpr(Ch);
2260           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2261                                        S.getIterationVariable()->getType(),
2262                                        S.getLocStart());
2263         }
2264       }
2265       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2266       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2267       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2268       // If the static schedule kind is specified or if the ordered clause is
2269       // specified, and if no monotonic modifier is specified, the effect will
2270       // be as if the monotonic modifier was specified.
2271       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2272                                 /* Chunked */ Chunk != nullptr) &&
2273           !Ordered) {
2274         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2275           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2276         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2277         // When no chunk_size is specified, the iteration space is divided into
2278         // chunks that are approximately equal in size, and at most one chunk is
2279         // distributed to each thread. Note that the size of the chunks is
2280         // unspecified in this case.
2281         CGOpenMPRuntime::StaticRTInput StaticInit(
2282             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2283             UB.getAddress(), ST.getAddress());
2284         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2285                              ScheduleKind, StaticInit);
2286         auto LoopExit =
2287             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2288         // UB = min(UB, GlobalUB);
2289         EmitIgnoredExpr(S.getEnsureUpperBound());
2290         // IV = LB;
2291         EmitIgnoredExpr(S.getInit());
2292         // while (idx <= UB) { BODY; ++idx; }
2293         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2294                          S.getInc(),
2295                          [&S, LoopExit](CodeGenFunction &CGF) {
2296                            CGF.EmitOMPLoopBody(S, LoopExit);
2297                            CGF.EmitStopPoint(&S);
2298                          },
2299                          [](CodeGenFunction &) {});
2300         EmitBlock(LoopExit.getBlock());
2301         // Tell the runtime we are done.
2302         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2303           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2304                                                          S.getDirectiveKind());
2305         };
2306         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2307       } else {
2308         const bool IsMonotonic =
2309             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2310             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2311             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2312             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2313         // Emit the outer loop, which requests its work chunk [LB..UB] from
2314         // runtime and runs the inner loop to process it.
2315         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2316                                              ST.getAddress(), IL.getAddress(),
2317                                              Chunk, EUB);
2318         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2319                             LoopArguments, CGDispatchBounds);
2320       }
2321       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2322         EmitOMPSimdFinal(S,
2323                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2324                            return CGF.Builder.CreateIsNotNull(
2325                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2326                          });
2327       }
2328       EmitOMPReductionClauseFinal(
2329           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2330                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2331                  : /*Parallel only*/ OMPD_parallel);
2332       // Emit post-update of the reduction variables if IsLastIter != 0.
2333       emitPostUpdateForReductionClause(
2334           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2335             return CGF.Builder.CreateIsNotNull(
2336                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2337           });
2338       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2339       if (HasLastprivateClause)
2340         EmitOMPLastprivateClauseFinal(
2341             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2342             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2343     }
2344     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2345       return CGF.Builder.CreateIsNotNull(
2346           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2347     });
2348     // We're now done with the loop, so jump to the continuation block.
2349     if (ContBlock) {
2350       EmitBranch(ContBlock);
2351       EmitBlock(ContBlock, true);
2352     }
2353   }
2354   return HasLastprivateClause;
2355 }
2356 
2357 /// The following two functions generate expressions for the loop lower
2358 /// and upper bounds in case of static and dynamic (dispatch) schedule
2359 /// of the associated 'for' or 'distribute' loop.
2360 static std::pair<LValue, LValue>
2361 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2362   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2363   LValue LB =
2364       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2365   LValue UB =
2366       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2367   return {LB, UB};
2368 }
2369 
2370 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2371 /// consider the lower and upper bound expressions generated by the
2372 /// worksharing loop support, but we use 0 and the iteration space size as
2373 /// constants
2374 static std::pair<llvm::Value *, llvm::Value *>
2375 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2376                           Address LB, Address UB) {
2377   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2378   const Expr *IVExpr = LS.getIterationVariable();
2379   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2380   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2381   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2382   return {LBVal, UBVal};
2383 }
2384 
2385 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2386   bool HasLastprivates = false;
2387   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2388                                           PrePostActionTy &) {
2389     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2390     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2391                                                  emitForLoopBounds,
2392                                                  emitDispatchForLoopBounds);
2393   };
2394   {
2395     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2396     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2397                                                 S.hasCancel());
2398   }
2399 
2400   // Emit an implicit barrier at the end.
2401   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2402     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2403   }
2404 }
2405 
2406 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2407   bool HasLastprivates = false;
2408   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2409                                           PrePostActionTy &) {
2410     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2411                                                  emitForLoopBounds,
2412                                                  emitDispatchForLoopBounds);
2413   };
2414   {
2415     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2416     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2417   }
2418 
2419   // Emit an implicit barrier at the end.
2420   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2421     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2422   }
2423 }
2424 
2425 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2426                                 const Twine &Name,
2427                                 llvm::Value *Init = nullptr) {
2428   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2429   if (Init)
2430     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2431   return LVal;
2432 }
2433 
2434 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2435   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2436   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2437   bool HasLastprivates = false;
2438   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2439                                                     PrePostActionTy &) {
2440     auto &C = CGF.CGM.getContext();
2441     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2442     // Emit helper vars inits.
2443     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2444                                   CGF.Builder.getInt32(0));
2445     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2446                                       : CGF.Builder.getInt32(0);
2447     LValue UB =
2448         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2449     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2450                                   CGF.Builder.getInt32(1));
2451     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2452                                   CGF.Builder.getInt32(0));
2453     // Loop counter.
2454     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2455     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2456     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2457     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2458     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2459     // Generate condition for loop.
2460     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2461                         OK_Ordinary, S.getLocStart(), FPOptions());
2462     // Increment for loop counter.
2463     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2464                       S.getLocStart());
2465     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2466       // Iterate through all sections and emit a switch construct:
2467       // switch (IV) {
2468       //   case 0:
2469       //     <SectionStmt[0]>;
2470       //     break;
2471       // ...
2472       //   case <NumSection> - 1:
2473       //     <SectionStmt[<NumSection> - 1]>;
2474       //     break;
2475       // }
2476       // .omp.sections.exit:
2477       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2478       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2479           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2480           CS == nullptr ? 1 : CS->size());
2481       if (CS) {
2482         unsigned CaseNumber = 0;
2483         for (auto *SubStmt : CS->children()) {
2484           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2485           CGF.EmitBlock(CaseBB);
2486           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2487           CGF.EmitStmt(SubStmt);
2488           CGF.EmitBranch(ExitBB);
2489           ++CaseNumber;
2490         }
2491       } else {
2492         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2493         CGF.EmitBlock(CaseBB);
2494         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2495         CGF.EmitStmt(Stmt);
2496         CGF.EmitBranch(ExitBB);
2497       }
2498       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2499     };
2500 
2501     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2502     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2503       // Emit implicit barrier to synchronize threads and avoid data races on
2504       // initialization of firstprivate variables and post-update of lastprivate
2505       // variables.
2506       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2507           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2508           /*ForceSimpleCall=*/true);
2509     }
2510     CGF.EmitOMPPrivateClause(S, LoopScope);
2511     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2512     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2513     (void)LoopScope.Privatize();
2514 
2515     // Emit static non-chunked loop.
2516     OpenMPScheduleTy ScheduleKind;
2517     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2518     CGOpenMPRuntime::StaticRTInput StaticInit(
2519         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2520         LB.getAddress(), UB.getAddress(), ST.getAddress());
2521     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2522         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2523     // UB = min(UB, GlobalUB);
2524     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2525     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2526         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2527     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2528     // IV = LB;
2529     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2530     // while (idx <= UB) { BODY; ++idx; }
2531     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2532                          [](CodeGenFunction &) {});
2533     // Tell the runtime we are done.
2534     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2535       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2536                                                      S.getDirectiveKind());
2537     };
2538     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2539     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2540     // Emit post-update of the reduction variables if IsLastIter != 0.
2541     emitPostUpdateForReductionClause(
2542         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2543           return CGF.Builder.CreateIsNotNull(
2544               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2545         });
2546 
2547     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2548     if (HasLastprivates)
2549       CGF.EmitOMPLastprivateClauseFinal(
2550           S, /*NoFinals=*/false,
2551           CGF.Builder.CreateIsNotNull(
2552               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2553   };
2554 
2555   bool HasCancel = false;
2556   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2557     HasCancel = OSD->hasCancel();
2558   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2559     HasCancel = OPSD->hasCancel();
2560   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2561   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2562                                               HasCancel);
2563   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2564   // clause. Otherwise the barrier will be generated by the codegen for the
2565   // directive.
2566   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2567     // Emit implicit barrier to synchronize threads and avoid data races on
2568     // initialization of firstprivate variables.
2569     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2570                                            OMPD_unknown);
2571   }
2572 }
2573 
2574 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2575   {
2576     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2577     EmitSections(S);
2578   }
2579   // Emit an implicit barrier at the end.
2580   if (!S.getSingleClause<OMPNowaitClause>()) {
2581     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2582                                            OMPD_sections);
2583   }
2584 }
2585 
2586 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2587   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2588     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2589   };
2590   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2591   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2592                                               S.hasCancel());
2593 }
2594 
2595 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2596   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2597   llvm::SmallVector<const Expr *, 8> DestExprs;
2598   llvm::SmallVector<const Expr *, 8> SrcExprs;
2599   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2600   // Check if there are any 'copyprivate' clauses associated with this
2601   // 'single' construct.
2602   // Build a list of copyprivate variables along with helper expressions
2603   // (<source>, <destination>, <destination>=<source> expressions)
2604   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2605     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2606     DestExprs.append(C->destination_exprs().begin(),
2607                      C->destination_exprs().end());
2608     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2609     AssignmentOps.append(C->assignment_ops().begin(),
2610                          C->assignment_ops().end());
2611   }
2612   // Emit code for 'single' region along with 'copyprivate' clauses
2613   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2614     Action.Enter(CGF);
2615     OMPPrivateScope SingleScope(CGF);
2616     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2617     CGF.EmitOMPPrivateClause(S, SingleScope);
2618     (void)SingleScope.Privatize();
2619     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2620   };
2621   {
2622     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2623     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2624                                             CopyprivateVars, DestExprs,
2625                                             SrcExprs, AssignmentOps);
2626   }
2627   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2628   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2629   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2630     CGM.getOpenMPRuntime().emitBarrierCall(
2631         *this, S.getLocStart(),
2632         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2633   }
2634 }
2635 
2636 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2637   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2638     Action.Enter(CGF);
2639     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2640   };
2641   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2642   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2643 }
2644 
2645 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2646   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2647     Action.Enter(CGF);
2648     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2649   };
2650   Expr *Hint = nullptr;
2651   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2652     Hint = HintClause->getHint();
2653   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2654   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2655                                             S.getDirectiveName().getAsString(),
2656                                             CodeGen, S.getLocStart(), Hint);
2657 }
2658 
2659 void CodeGenFunction::EmitOMPParallelForDirective(
2660     const OMPParallelForDirective &S) {
2661   // Emit directive as a combined directive that consists of two implicit
2662   // directives: 'parallel' with 'for' directive.
2663   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2664     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2665     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2666                                emitDispatchForLoopBounds);
2667   };
2668   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2669                                  emitEmptyBoundParameters);
2670 }
2671 
2672 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2673     const OMPParallelForSimdDirective &S) {
2674   // Emit directive as a combined directive that consists of two implicit
2675   // directives: 'parallel' with 'for' directive.
2676   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2677     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2678                                emitDispatchForLoopBounds);
2679   };
2680   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2681                                  emitEmptyBoundParameters);
2682 }
2683 
2684 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2685     const OMPParallelSectionsDirective &S) {
2686   // Emit directive as a combined directive that consists of two implicit
2687   // directives: 'parallel' with 'sections' directive.
2688   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2689     CGF.EmitSections(S);
2690   };
2691   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2692                                  emitEmptyBoundParameters);
2693 }
2694 
2695 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2696                                                 const RegionCodeGenTy &BodyGen,
2697                                                 const TaskGenTy &TaskGen,
2698                                                 OMPTaskDataTy &Data) {
2699   // Emit outlined function for task construct.
2700   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2701   auto *I = CS->getCapturedDecl()->param_begin();
2702   auto *PartId = std::next(I);
2703   auto *TaskT = std::next(I, 4);
2704   // Check if the task is final
2705   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2706     // If the condition constant folds and can be elided, try to avoid emitting
2707     // the condition and the dead arm of the if/else.
2708     auto *Cond = Clause->getCondition();
2709     bool CondConstant;
2710     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2711       Data.Final.setInt(CondConstant);
2712     else
2713       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2714   } else {
2715     // By default the task is not final.
2716     Data.Final.setInt(/*IntVal=*/false);
2717   }
2718   // Check if the task has 'priority' clause.
2719   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2720     auto *Prio = Clause->getPriority();
2721     Data.Priority.setInt(/*IntVal=*/true);
2722     Data.Priority.setPointer(EmitScalarConversion(
2723         EmitScalarExpr(Prio), Prio->getType(),
2724         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2725         Prio->getExprLoc()));
2726   }
2727   // The first function argument for tasks is a thread id, the second one is a
2728   // part id (0 for tied tasks, >=0 for untied task).
2729   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2730   // Get list of private variables.
2731   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2732     auto IRef = C->varlist_begin();
2733     for (auto *IInit : C->private_copies()) {
2734       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2735       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2736         Data.PrivateVars.push_back(*IRef);
2737         Data.PrivateCopies.push_back(IInit);
2738       }
2739       ++IRef;
2740     }
2741   }
2742   EmittedAsPrivate.clear();
2743   // Get list of firstprivate variables.
2744   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2745     auto IRef = C->varlist_begin();
2746     auto IElemInitRef = C->inits().begin();
2747     for (auto *IInit : C->private_copies()) {
2748       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2749       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2750         Data.FirstprivateVars.push_back(*IRef);
2751         Data.FirstprivateCopies.push_back(IInit);
2752         Data.FirstprivateInits.push_back(*IElemInitRef);
2753       }
2754       ++IRef;
2755       ++IElemInitRef;
2756     }
2757   }
2758   // Get list of lastprivate variables (for taskloops).
2759   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2760   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2761     auto IRef = C->varlist_begin();
2762     auto ID = C->destination_exprs().begin();
2763     for (auto *IInit : C->private_copies()) {
2764       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2765       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2766         Data.LastprivateVars.push_back(*IRef);
2767         Data.LastprivateCopies.push_back(IInit);
2768       }
2769       LastprivateDstsOrigs.insert(
2770           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2771            cast<DeclRefExpr>(*IRef)});
2772       ++IRef;
2773       ++ID;
2774     }
2775   }
2776   SmallVector<const Expr *, 4> LHSs;
2777   SmallVector<const Expr *, 4> RHSs;
2778   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2779     auto IPriv = C->privates().begin();
2780     auto IRed = C->reduction_ops().begin();
2781     auto ILHS = C->lhs_exprs().begin();
2782     auto IRHS = C->rhs_exprs().begin();
2783     for (const auto *Ref : C->varlists()) {
2784       Data.ReductionVars.emplace_back(Ref);
2785       Data.ReductionCopies.emplace_back(*IPriv);
2786       Data.ReductionOps.emplace_back(*IRed);
2787       LHSs.emplace_back(*ILHS);
2788       RHSs.emplace_back(*IRHS);
2789       std::advance(IPriv, 1);
2790       std::advance(IRed, 1);
2791       std::advance(ILHS, 1);
2792       std::advance(IRHS, 1);
2793     }
2794   }
2795   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2796       *this, S.getLocStart(), LHSs, RHSs, Data);
2797   // Build list of dependences.
2798   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2799     for (auto *IRef : C->varlists())
2800       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2801   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2802       CodeGenFunction &CGF, PrePostActionTy &Action) {
2803     // Set proper addresses for generated private copies.
2804     OMPPrivateScope Scope(CGF);
2805     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2806         !Data.LastprivateVars.empty()) {
2807       enum { PrivatesParam = 2, CopyFnParam = 3 };
2808       auto *CopyFn = CGF.Builder.CreateLoad(
2809           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2810       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2811           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2812       // Map privates.
2813       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2814       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2815       CallArgs.push_back(PrivatesPtr);
2816       for (auto *E : Data.PrivateVars) {
2817         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2818         Address PrivatePtr = CGF.CreateMemTemp(
2819             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2820         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2821         CallArgs.push_back(PrivatePtr.getPointer());
2822       }
2823       for (auto *E : Data.FirstprivateVars) {
2824         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2825         Address PrivatePtr =
2826             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2827                               ".firstpriv.ptr.addr");
2828         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2829         CallArgs.push_back(PrivatePtr.getPointer());
2830       }
2831       for (auto *E : Data.LastprivateVars) {
2832         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2833         Address PrivatePtr =
2834             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2835                               ".lastpriv.ptr.addr");
2836         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2837         CallArgs.push_back(PrivatePtr.getPointer());
2838       }
2839       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2840                                                           CopyFn, CallArgs);
2841       for (auto &&Pair : LastprivateDstsOrigs) {
2842         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2843         DeclRefExpr DRE(
2844             const_cast<VarDecl *>(OrigVD),
2845             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2846                 OrigVD) != nullptr,
2847             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2848         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2849           return CGF.EmitLValue(&DRE).getAddress();
2850         });
2851       }
2852       for (auto &&Pair : PrivatePtrs) {
2853         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2854                             CGF.getContext().getDeclAlign(Pair.first));
2855         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2856       }
2857     }
2858     if (Data.Reductions) {
2859       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2860       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2861                              Data.ReductionOps);
2862       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2863           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2864       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2865         RedCG.emitSharedLValue(CGF, Cnt);
2866         RedCG.emitAggregateType(CGF, Cnt);
2867         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2868             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2869         Replacement =
2870             Address(CGF.EmitScalarConversion(
2871                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2872                         CGF.getContext().getPointerType(
2873                             Data.ReductionCopies[Cnt]->getType()),
2874                         SourceLocation()),
2875                     Replacement.getAlignment());
2876         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2877         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2878                          [Replacement]() { return Replacement; });
2879         // FIXME: This must removed once the runtime library is fixed.
2880         // Emit required threadprivate variables for
2881         // initilizer/combiner/finalizer.
2882         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2883                                                            RedCG, Cnt);
2884       }
2885     }
2886     // Privatize all private variables except for in_reduction items.
2887     (void)Scope.Privatize();
2888     SmallVector<const Expr *, 4> InRedVars;
2889     SmallVector<const Expr *, 4> InRedPrivs;
2890     SmallVector<const Expr *, 4> InRedOps;
2891     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2892     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2893       auto IPriv = C->privates().begin();
2894       auto IRed = C->reduction_ops().begin();
2895       auto ITD = C->taskgroup_descriptors().begin();
2896       for (const auto *Ref : C->varlists()) {
2897         InRedVars.emplace_back(Ref);
2898         InRedPrivs.emplace_back(*IPriv);
2899         InRedOps.emplace_back(*IRed);
2900         TaskgroupDescriptors.emplace_back(*ITD);
2901         std::advance(IPriv, 1);
2902         std::advance(IRed, 1);
2903         std::advance(ITD, 1);
2904       }
2905     }
2906     // Privatize in_reduction items here, because taskgroup descriptors must be
2907     // privatized earlier.
2908     OMPPrivateScope InRedScope(CGF);
2909     if (!InRedVars.empty()) {
2910       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2911       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2912         RedCG.emitSharedLValue(CGF, Cnt);
2913         RedCG.emitAggregateType(CGF, Cnt);
2914         // The taskgroup descriptor variable is always implicit firstprivate and
2915         // privatized already during procoessing of the firstprivates.
2916         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2917             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2918         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2919             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2920         Replacement = Address(
2921             CGF.EmitScalarConversion(
2922                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2923                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2924                 SourceLocation()),
2925             Replacement.getAlignment());
2926         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2927         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2928                               [Replacement]() { return Replacement; });
2929         // FIXME: This must removed once the runtime library is fixed.
2930         // Emit required threadprivate variables for
2931         // initilizer/combiner/finalizer.
2932         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2933                                                            RedCG, Cnt);
2934       }
2935     }
2936     (void)InRedScope.Privatize();
2937 
2938     Action.Enter(CGF);
2939     BodyGen(CGF);
2940   };
2941   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2942       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2943       Data.NumberOfParts);
2944   OMPLexicalScope Scope(*this, S);
2945   TaskGen(*this, OutlinedFn, Data);
2946 }
2947 
2948 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2949   // Emit outlined function for task construct.
2950   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2951   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2952   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2953   const Expr *IfCond = nullptr;
2954   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2955     if (C->getNameModifier() == OMPD_unknown ||
2956         C->getNameModifier() == OMPD_task) {
2957       IfCond = C->getCondition();
2958       break;
2959     }
2960   }
2961 
2962   OMPTaskDataTy Data;
2963   // Check if we should emit tied or untied task.
2964   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2965   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2966     CGF.EmitStmt(CS->getCapturedStmt());
2967   };
2968   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2969                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2970                             const OMPTaskDataTy &Data) {
2971     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2972                                             SharedsTy, CapturedStruct, IfCond,
2973                                             Data);
2974   };
2975   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2976 }
2977 
2978 void CodeGenFunction::EmitOMPTaskyieldDirective(
2979     const OMPTaskyieldDirective &S) {
2980   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2981 }
2982 
2983 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2984   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2985 }
2986 
2987 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2988   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2989 }
2990 
2991 void CodeGenFunction::EmitOMPTaskgroupDirective(
2992     const OMPTaskgroupDirective &S) {
2993   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2994     Action.Enter(CGF);
2995     if (const Expr *E = S.getReductionRef()) {
2996       SmallVector<const Expr *, 4> LHSs;
2997       SmallVector<const Expr *, 4> RHSs;
2998       OMPTaskDataTy Data;
2999       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
3000         auto IPriv = C->privates().begin();
3001         auto IRed = C->reduction_ops().begin();
3002         auto ILHS = C->lhs_exprs().begin();
3003         auto IRHS = C->rhs_exprs().begin();
3004         for (const auto *Ref : C->varlists()) {
3005           Data.ReductionVars.emplace_back(Ref);
3006           Data.ReductionCopies.emplace_back(*IPriv);
3007           Data.ReductionOps.emplace_back(*IRed);
3008           LHSs.emplace_back(*ILHS);
3009           RHSs.emplace_back(*IRHS);
3010           std::advance(IPriv, 1);
3011           std::advance(IRed, 1);
3012           std::advance(ILHS, 1);
3013           std::advance(IRHS, 1);
3014         }
3015       }
3016       llvm::Value *ReductionDesc =
3017           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
3018                                                            LHSs, RHSs, Data);
3019       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3020       CGF.EmitVarDecl(*VD);
3021       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3022                             /*Volatile=*/false, E->getType());
3023     }
3024     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3025   };
3026   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3027   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3028 }
3029 
3030 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3031   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3032     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3033       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3034                                 FlushClause->varlist_end());
3035     }
3036     return llvm::None;
3037   }(), S.getLocStart());
3038 }
3039 
3040 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3041                                             const CodeGenLoopTy &CodeGenLoop,
3042                                             Expr *IncExpr) {
3043   // Emit the loop iteration variable.
3044   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3045   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3046   EmitVarDecl(*IVDecl);
3047 
3048   // Emit the iterations count variable.
3049   // If it is not a variable, Sema decided to calculate iterations count on each
3050   // iteration (e.g., it is foldable into a constant).
3051   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3052     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3053     // Emit calculation of the iterations count.
3054     EmitIgnoredExpr(S.getCalcLastIteration());
3055   }
3056 
3057   auto &RT = CGM.getOpenMPRuntime();
3058 
3059   bool HasLastprivateClause = false;
3060   // Check pre-condition.
3061   {
3062     OMPLoopScope PreInitScope(*this, S);
3063     // Skip the entire loop if we don't meet the precondition.
3064     // If the condition constant folds and can be elided, avoid emitting the
3065     // whole loop.
3066     bool CondConstant;
3067     llvm::BasicBlock *ContBlock = nullptr;
3068     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3069       if (!CondConstant)
3070         return;
3071     } else {
3072       auto *ThenBlock = createBasicBlock("omp.precond.then");
3073       ContBlock = createBasicBlock("omp.precond.end");
3074       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3075                   getProfileCount(&S));
3076       EmitBlock(ThenBlock);
3077       incrementProfileCounter(&S);
3078     }
3079 
3080     emitAlignedClause(*this, S);
3081     // Emit 'then' code.
3082     {
3083       // Emit helper vars inits.
3084 
3085       LValue LB = EmitOMPHelperVar(
3086           *this, cast<DeclRefExpr>(
3087                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3088                           ? S.getCombinedLowerBoundVariable()
3089                           : S.getLowerBoundVariable())));
3090       LValue UB = EmitOMPHelperVar(
3091           *this, cast<DeclRefExpr>(
3092                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3093                           ? S.getCombinedUpperBoundVariable()
3094                           : S.getUpperBoundVariable())));
3095       LValue ST =
3096           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3097       LValue IL =
3098           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3099 
3100       OMPPrivateScope LoopScope(*this);
3101       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3102         // Emit implicit barrier to synchronize threads and avoid data races
3103         // on initialization of firstprivate variables and post-update of
3104         // lastprivate variables.
3105         CGM.getOpenMPRuntime().emitBarrierCall(
3106             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3107             /*ForceSimpleCall=*/true);
3108       }
3109       EmitOMPPrivateClause(S, LoopScope);
3110       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3111           !isOpenMPParallelDirective(S.getDirectiveKind()))
3112         EmitOMPReductionClauseInit(S, LoopScope);
3113       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3114       EmitOMPPrivateLoopCounters(S, LoopScope);
3115       (void)LoopScope.Privatize();
3116 
3117       // Detect the distribute schedule kind and chunk.
3118       llvm::Value *Chunk = nullptr;
3119       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3120       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3121         ScheduleKind = C->getDistScheduleKind();
3122         if (const auto *Ch = C->getChunkSize()) {
3123           Chunk = EmitScalarExpr(Ch);
3124           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3125                                        S.getIterationVariable()->getType(),
3126                                        S.getLocStart());
3127         }
3128       }
3129       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3130       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3131 
3132       // OpenMP [2.10.8, distribute Construct, Description]
3133       // If dist_schedule is specified, kind must be static. If specified,
3134       // iterations are divided into chunks of size chunk_size, chunks are
3135       // assigned to the teams of the league in a round-robin fashion in the
3136       // order of the team number. When no chunk_size is specified, the
3137       // iteration space is divided into chunks that are approximately equal
3138       // in size, and at most one chunk is distributed to each team of the
3139       // league. The size of the chunks is unspecified in this case.
3140       if (RT.isStaticNonchunked(ScheduleKind,
3141                                 /* Chunked */ Chunk != nullptr)) {
3142         if (isOpenMPSimdDirective(S.getDirectiveKind()))
3143           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
3144         CGOpenMPRuntime::StaticRTInput StaticInit(
3145             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3146             LB.getAddress(), UB.getAddress(), ST.getAddress());
3147         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3148                                     StaticInit);
3149         auto LoopExit =
3150             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3151         // UB = min(UB, GlobalUB);
3152         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3153                             ? S.getCombinedEnsureUpperBound()
3154                             : S.getEnsureUpperBound());
3155         // IV = LB;
3156         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3157                             ? S.getCombinedInit()
3158                             : S.getInit());
3159 
3160         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3161                          ? S.getCombinedCond()
3162                          : S.getCond();
3163 
3164         // for distribute alone,  codegen
3165         // while (idx <= UB) { BODY; ++idx; }
3166         // when combined with 'for' (e.g. as in 'distribute parallel for')
3167         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3168         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3169                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3170                            CodeGenLoop(CGF, S, LoopExit);
3171                          },
3172                          [](CodeGenFunction &) {});
3173         EmitBlock(LoopExit.getBlock());
3174         // Tell the runtime we are done.
3175         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3176       } else {
3177         // Emit the outer loop, which requests its work chunk [LB..UB] from
3178         // runtime and runs the inner loop to process it.
3179         const OMPLoopArguments LoopArguments = {
3180             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3181             Chunk};
3182         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3183                                    CodeGenLoop);
3184       }
3185       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3186         EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3187           return CGF.Builder.CreateIsNotNull(
3188               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3189         });
3190       }
3191       OpenMPDirectiveKind ReductionKind = OMPD_unknown;
3192       if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
3193           isOpenMPSimdDirective(S.getDirectiveKind())) {
3194         ReductionKind = OMPD_parallel_for_simd;
3195       } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3196         ReductionKind = OMPD_parallel_for;
3197       } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3198         ReductionKind = OMPD_simd;
3199       } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
3200                  S.hasClausesOfKind<OMPReductionClause>()) {
3201         llvm_unreachable(
3202             "No reduction clauses is allowed in distribute directive.");
3203       }
3204       EmitOMPReductionClauseFinal(S, ReductionKind);
3205       // Emit post-update of the reduction variables if IsLastIter != 0.
3206       emitPostUpdateForReductionClause(
3207           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3208             return CGF.Builder.CreateIsNotNull(
3209                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3210           });
3211       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3212       if (HasLastprivateClause) {
3213         EmitOMPLastprivateClauseFinal(
3214             S, /*NoFinals=*/false,
3215             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
3216       }
3217     }
3218 
3219     // We're now done with the loop, so jump to the continuation block.
3220     if (ContBlock) {
3221       EmitBranch(ContBlock);
3222       EmitBlock(ContBlock, true);
3223     }
3224   }
3225 }
3226 
3227 void CodeGenFunction::EmitOMPDistributeDirective(
3228     const OMPDistributeDirective &S) {
3229   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3230 
3231     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3232   };
3233   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3234   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3235 }
3236 
3237 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3238                                                    const CapturedStmt *S) {
3239   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3240   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3241   CGF.CapturedStmtInfo = &CapStmtInfo;
3242   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3243   Fn->addFnAttr(llvm::Attribute::NoInline);
3244   return Fn;
3245 }
3246 
3247 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3248   if (!S.getAssociatedStmt()) {
3249     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3250       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3251     return;
3252   }
3253   auto *C = S.getSingleClause<OMPSIMDClause>();
3254   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3255                                  PrePostActionTy &Action) {
3256     if (C) {
3257       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3258       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3259       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3260       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3261       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3262                                                       OutlinedFn, CapturedVars);
3263     } else {
3264       Action.Enter(CGF);
3265       CGF.EmitStmt(
3266           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3267     }
3268   };
3269   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3270   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3271 }
3272 
3273 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3274                                          QualType SrcType, QualType DestType,
3275                                          SourceLocation Loc) {
3276   assert(CGF.hasScalarEvaluationKind(DestType) &&
3277          "DestType must have scalar evaluation kind.");
3278   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3279   return Val.isScalar()
3280              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3281                                         Loc)
3282              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3283                                                  DestType, Loc);
3284 }
3285 
3286 static CodeGenFunction::ComplexPairTy
3287 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3288                       QualType DestType, SourceLocation Loc) {
3289   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3290          "DestType must have complex evaluation kind.");
3291   CodeGenFunction::ComplexPairTy ComplexVal;
3292   if (Val.isScalar()) {
3293     // Convert the input element to the element type of the complex.
3294     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3295     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3296                                               DestElementType, Loc);
3297     ComplexVal = CodeGenFunction::ComplexPairTy(
3298         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3299   } else {
3300     assert(Val.isComplex() && "Must be a scalar or complex.");
3301     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3302     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3303     ComplexVal.first = CGF.EmitScalarConversion(
3304         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3305     ComplexVal.second = CGF.EmitScalarConversion(
3306         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3307   }
3308   return ComplexVal;
3309 }
3310 
3311 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3312                                   LValue LVal, RValue RVal) {
3313   if (LVal.isGlobalReg()) {
3314     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3315   } else {
3316     CGF.EmitAtomicStore(RVal, LVal,
3317                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3318                                  : llvm::AtomicOrdering::Monotonic,
3319                         LVal.isVolatile(), /*IsInit=*/false);
3320   }
3321 }
3322 
3323 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3324                                          QualType RValTy, SourceLocation Loc) {
3325   switch (getEvaluationKind(LVal.getType())) {
3326   case TEK_Scalar:
3327     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3328                                *this, RVal, RValTy, LVal.getType(), Loc)),
3329                            LVal);
3330     break;
3331   case TEK_Complex:
3332     EmitStoreOfComplex(
3333         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3334         /*isInit=*/false);
3335     break;
3336   case TEK_Aggregate:
3337     llvm_unreachable("Must be a scalar or complex.");
3338   }
3339 }
3340 
3341 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3342                                   const Expr *X, const Expr *V,
3343                                   SourceLocation Loc) {
3344   // v = x;
3345   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3346   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3347   LValue XLValue = CGF.EmitLValue(X);
3348   LValue VLValue = CGF.EmitLValue(V);
3349   RValue Res = XLValue.isGlobalReg()
3350                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3351                    : CGF.EmitAtomicLoad(
3352                          XLValue, Loc,
3353                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3354                                   : llvm::AtomicOrdering::Monotonic,
3355                          XLValue.isVolatile());
3356   // OpenMP, 2.12.6, atomic Construct
3357   // Any atomic construct with a seq_cst clause forces the atomically
3358   // performed operation to include an implicit flush operation without a
3359   // list.
3360   if (IsSeqCst)
3361     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3362   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3363 }
3364 
3365 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3366                                    const Expr *X, const Expr *E,
3367                                    SourceLocation Loc) {
3368   // x = expr;
3369   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3370   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3371   // OpenMP, 2.12.6, atomic Construct
3372   // Any atomic construct with a seq_cst clause forces the atomically
3373   // performed operation to include an implicit flush operation without a
3374   // list.
3375   if (IsSeqCst)
3376     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3377 }
3378 
3379 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3380                                                 RValue Update,
3381                                                 BinaryOperatorKind BO,
3382                                                 llvm::AtomicOrdering AO,
3383                                                 bool IsXLHSInRHSPart) {
3384   auto &Context = CGF.CGM.getContext();
3385   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3386   // expression is simple and atomic is allowed for the given type for the
3387   // target platform.
3388   if (BO == BO_Comma || !Update.isScalar() ||
3389       !Update.getScalarVal()->getType()->isIntegerTy() ||
3390       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3391                         (Update.getScalarVal()->getType() !=
3392                          X.getAddress().getElementType())) ||
3393       !X.getAddress().getElementType()->isIntegerTy() ||
3394       !Context.getTargetInfo().hasBuiltinAtomic(
3395           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3396     return std::make_pair(false, RValue::get(nullptr));
3397 
3398   llvm::AtomicRMWInst::BinOp RMWOp;
3399   switch (BO) {
3400   case BO_Add:
3401     RMWOp = llvm::AtomicRMWInst::Add;
3402     break;
3403   case BO_Sub:
3404     if (!IsXLHSInRHSPart)
3405       return std::make_pair(false, RValue::get(nullptr));
3406     RMWOp = llvm::AtomicRMWInst::Sub;
3407     break;
3408   case BO_And:
3409     RMWOp = llvm::AtomicRMWInst::And;
3410     break;
3411   case BO_Or:
3412     RMWOp = llvm::AtomicRMWInst::Or;
3413     break;
3414   case BO_Xor:
3415     RMWOp = llvm::AtomicRMWInst::Xor;
3416     break;
3417   case BO_LT:
3418     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3419                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3420                                    : llvm::AtomicRMWInst::Max)
3421                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3422                                    : llvm::AtomicRMWInst::UMax);
3423     break;
3424   case BO_GT:
3425     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3426                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3427                                    : llvm::AtomicRMWInst::Min)
3428                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3429                                    : llvm::AtomicRMWInst::UMin);
3430     break;
3431   case BO_Assign:
3432     RMWOp = llvm::AtomicRMWInst::Xchg;
3433     break;
3434   case BO_Mul:
3435   case BO_Div:
3436   case BO_Rem:
3437   case BO_Shl:
3438   case BO_Shr:
3439   case BO_LAnd:
3440   case BO_LOr:
3441     return std::make_pair(false, RValue::get(nullptr));
3442   case BO_PtrMemD:
3443   case BO_PtrMemI:
3444   case BO_LE:
3445   case BO_GE:
3446   case BO_EQ:
3447   case BO_NE:
3448   case BO_AddAssign:
3449   case BO_SubAssign:
3450   case BO_AndAssign:
3451   case BO_OrAssign:
3452   case BO_XorAssign:
3453   case BO_MulAssign:
3454   case BO_DivAssign:
3455   case BO_RemAssign:
3456   case BO_ShlAssign:
3457   case BO_ShrAssign:
3458   case BO_Comma:
3459     llvm_unreachable("Unsupported atomic update operation");
3460   }
3461   auto *UpdateVal = Update.getScalarVal();
3462   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3463     UpdateVal = CGF.Builder.CreateIntCast(
3464         IC, X.getAddress().getElementType(),
3465         X.getType()->hasSignedIntegerRepresentation());
3466   }
3467   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3468   return std::make_pair(true, RValue::get(Res));
3469 }
3470 
3471 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3472     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3473     llvm::AtomicOrdering AO, SourceLocation Loc,
3474     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3475   // Update expressions are allowed to have the following forms:
3476   // x binop= expr; -> xrval + expr;
3477   // x++, ++x -> xrval + 1;
3478   // x--, --x -> xrval - 1;
3479   // x = x binop expr; -> xrval binop expr
3480   // x = expr Op x; - > expr binop xrval;
3481   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3482   if (!Res.first) {
3483     if (X.isGlobalReg()) {
3484       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3485       // 'xrval'.
3486       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3487     } else {
3488       // Perform compare-and-swap procedure.
3489       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3490     }
3491   }
3492   return Res;
3493 }
3494 
3495 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3496                                     const Expr *X, const Expr *E,
3497                                     const Expr *UE, bool IsXLHSInRHSPart,
3498                                     SourceLocation Loc) {
3499   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3500          "Update expr in 'atomic update' must be a binary operator.");
3501   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3502   // Update expressions are allowed to have the following forms:
3503   // x binop= expr; -> xrval + expr;
3504   // x++, ++x -> xrval + 1;
3505   // x--, --x -> xrval - 1;
3506   // x = x binop expr; -> xrval binop expr
3507   // x = expr Op x; - > expr binop xrval;
3508   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3509   LValue XLValue = CGF.EmitLValue(X);
3510   RValue ExprRValue = CGF.EmitAnyExpr(E);
3511   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3512                      : llvm::AtomicOrdering::Monotonic;
3513   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3514   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3515   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3516   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3517   auto Gen =
3518       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3519         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3520         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3521         return CGF.EmitAnyExpr(UE);
3522       };
3523   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3524       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3525   // OpenMP, 2.12.6, atomic Construct
3526   // Any atomic construct with a seq_cst clause forces the atomically
3527   // performed operation to include an implicit flush operation without a
3528   // list.
3529   if (IsSeqCst)
3530     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3531 }
3532 
3533 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3534                             QualType SourceType, QualType ResType,
3535                             SourceLocation Loc) {
3536   switch (CGF.getEvaluationKind(ResType)) {
3537   case TEK_Scalar:
3538     return RValue::get(
3539         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3540   case TEK_Complex: {
3541     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3542     return RValue::getComplex(Res.first, Res.second);
3543   }
3544   case TEK_Aggregate:
3545     break;
3546   }
3547   llvm_unreachable("Must be a scalar or complex.");
3548 }
3549 
3550 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3551                                      bool IsPostfixUpdate, const Expr *V,
3552                                      const Expr *X, const Expr *E,
3553                                      const Expr *UE, bool IsXLHSInRHSPart,
3554                                      SourceLocation Loc) {
3555   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3556   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3557   RValue NewVVal;
3558   LValue VLValue = CGF.EmitLValue(V);
3559   LValue XLValue = CGF.EmitLValue(X);
3560   RValue ExprRValue = CGF.EmitAnyExpr(E);
3561   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3562                      : llvm::AtomicOrdering::Monotonic;
3563   QualType NewVValType;
3564   if (UE) {
3565     // 'x' is updated with some additional value.
3566     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3567            "Update expr in 'atomic capture' must be a binary operator.");
3568     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3569     // Update expressions are allowed to have the following forms:
3570     // x binop= expr; -> xrval + expr;
3571     // x++, ++x -> xrval + 1;
3572     // x--, --x -> xrval - 1;
3573     // x = x binop expr; -> xrval binop expr
3574     // x = expr Op x; - > expr binop xrval;
3575     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3576     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3577     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3578     NewVValType = XRValExpr->getType();
3579     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3580     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3581                   IsPostfixUpdate](RValue XRValue) -> RValue {
3582       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3583       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3584       RValue Res = CGF.EmitAnyExpr(UE);
3585       NewVVal = IsPostfixUpdate ? XRValue : Res;
3586       return Res;
3587     };
3588     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3589         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3590     if (Res.first) {
3591       // 'atomicrmw' instruction was generated.
3592       if (IsPostfixUpdate) {
3593         // Use old value from 'atomicrmw'.
3594         NewVVal = Res.second;
3595       } else {
3596         // 'atomicrmw' does not provide new value, so evaluate it using old
3597         // value of 'x'.
3598         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3599         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3600         NewVVal = CGF.EmitAnyExpr(UE);
3601       }
3602     }
3603   } else {
3604     // 'x' is simply rewritten with some 'expr'.
3605     NewVValType = X->getType().getNonReferenceType();
3606     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3607                                X->getType().getNonReferenceType(), Loc);
3608     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3609       NewVVal = XRValue;
3610       return ExprRValue;
3611     };
3612     // Try to perform atomicrmw xchg, otherwise simple exchange.
3613     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3614         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3615         Loc, Gen);
3616     if (Res.first) {
3617       // 'atomicrmw' instruction was generated.
3618       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3619     }
3620   }
3621   // Emit post-update store to 'v' of old/new 'x' value.
3622   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3623   // OpenMP, 2.12.6, atomic Construct
3624   // Any atomic construct with a seq_cst clause forces the atomically
3625   // performed operation to include an implicit flush operation without a
3626   // list.
3627   if (IsSeqCst)
3628     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3629 }
3630 
3631 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3632                               bool IsSeqCst, bool IsPostfixUpdate,
3633                               const Expr *X, const Expr *V, const Expr *E,
3634                               const Expr *UE, bool IsXLHSInRHSPart,
3635                               SourceLocation Loc) {
3636   switch (Kind) {
3637   case OMPC_read:
3638     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3639     break;
3640   case OMPC_write:
3641     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3642     break;
3643   case OMPC_unknown:
3644   case OMPC_update:
3645     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3646     break;
3647   case OMPC_capture:
3648     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3649                              IsXLHSInRHSPart, Loc);
3650     break;
3651   case OMPC_if:
3652   case OMPC_final:
3653   case OMPC_num_threads:
3654   case OMPC_private:
3655   case OMPC_firstprivate:
3656   case OMPC_lastprivate:
3657   case OMPC_reduction:
3658   case OMPC_task_reduction:
3659   case OMPC_in_reduction:
3660   case OMPC_safelen:
3661   case OMPC_simdlen:
3662   case OMPC_collapse:
3663   case OMPC_default:
3664   case OMPC_seq_cst:
3665   case OMPC_shared:
3666   case OMPC_linear:
3667   case OMPC_aligned:
3668   case OMPC_copyin:
3669   case OMPC_copyprivate:
3670   case OMPC_flush:
3671   case OMPC_proc_bind:
3672   case OMPC_schedule:
3673   case OMPC_ordered:
3674   case OMPC_nowait:
3675   case OMPC_untied:
3676   case OMPC_threadprivate:
3677   case OMPC_depend:
3678   case OMPC_mergeable:
3679   case OMPC_device:
3680   case OMPC_threads:
3681   case OMPC_simd:
3682   case OMPC_map:
3683   case OMPC_num_teams:
3684   case OMPC_thread_limit:
3685   case OMPC_priority:
3686   case OMPC_grainsize:
3687   case OMPC_nogroup:
3688   case OMPC_num_tasks:
3689   case OMPC_hint:
3690   case OMPC_dist_schedule:
3691   case OMPC_defaultmap:
3692   case OMPC_uniform:
3693   case OMPC_to:
3694   case OMPC_from:
3695   case OMPC_use_device_ptr:
3696   case OMPC_is_device_ptr:
3697     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3698   }
3699 }
3700 
3701 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3702   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3703   OpenMPClauseKind Kind = OMPC_unknown;
3704   for (auto *C : S.clauses()) {
3705     // Find first clause (skip seq_cst clause, if it is first).
3706     if (C->getClauseKind() != OMPC_seq_cst) {
3707       Kind = C->getClauseKind();
3708       break;
3709     }
3710   }
3711 
3712   const auto *CS =
3713       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3714   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3715     enterFullExpression(EWC);
3716   }
3717   // Processing for statements under 'atomic capture'.
3718   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3719     for (const auto *C : Compound->body()) {
3720       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3721         enterFullExpression(EWC);
3722       }
3723     }
3724   }
3725 
3726   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3727                                             PrePostActionTy &) {
3728     CGF.EmitStopPoint(CS);
3729     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3730                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3731                       S.isXLHSInRHSPart(), S.getLocStart());
3732   };
3733   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3734   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3735 }
3736 
3737 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3738                                          const OMPExecutableDirective &S,
3739                                          const RegionCodeGenTy &CodeGen) {
3740   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3741   CodeGenModule &CGM = CGF.CGM;
3742   const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target);
3743 
3744   llvm::Function *Fn = nullptr;
3745   llvm::Constant *FnID = nullptr;
3746 
3747   const Expr *IfCond = nullptr;
3748   // Check for the at most one if clause associated with the target region.
3749   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3750     if (C->getNameModifier() == OMPD_unknown ||
3751         C->getNameModifier() == OMPD_target) {
3752       IfCond = C->getCondition();
3753       break;
3754     }
3755   }
3756 
3757   // Check if we have any device clause associated with the directive.
3758   const Expr *Device = nullptr;
3759   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3760     Device = C->getDevice();
3761   }
3762 
3763   // Check if we have an if clause whose conditional always evaluates to false
3764   // or if we do not have any targets specified. If so the target region is not
3765   // an offload entry point.
3766   bool IsOffloadEntry = true;
3767   if (IfCond) {
3768     bool Val;
3769     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3770       IsOffloadEntry = false;
3771   }
3772   if (CGM.getLangOpts().OMPTargetTriples.empty())
3773     IsOffloadEntry = false;
3774 
3775   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3776   StringRef ParentName;
3777   // In case we have Ctors/Dtors we use the complete type variant to produce
3778   // the mangling of the device outlined kernel.
3779   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3780     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3781   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3782     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3783   else
3784     ParentName =
3785         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3786 
3787   // Emit target region as a standalone region.
3788   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3789                                                     IsOffloadEntry, CodeGen);
3790   OMPLexicalScope Scope(CGF, S);
3791   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3792   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3793   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3794                                         CapturedVars);
3795 }
3796 
3797 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3798                              PrePostActionTy &Action) {
3799   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3800   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3801   CGF.EmitOMPPrivateClause(S, PrivateScope);
3802   (void)PrivateScope.Privatize();
3803 
3804   Action.Enter(CGF);
3805   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3806 }
3807 
3808 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3809                                                   StringRef ParentName,
3810                                                   const OMPTargetDirective &S) {
3811   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3812     emitTargetRegion(CGF, S, Action);
3813   };
3814   llvm::Function *Fn;
3815   llvm::Constant *Addr;
3816   // Emit target region as a standalone region.
3817   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3818       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3819   assert(Fn && Addr && "Target device function emission failed.");
3820 }
3821 
3822 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3823   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3824     emitTargetRegion(CGF, S, Action);
3825   };
3826   emitCommonOMPTargetDirective(*this, S, CodeGen);
3827 }
3828 
3829 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3830                                         const OMPExecutableDirective &S,
3831                                         OpenMPDirectiveKind InnermostKind,
3832                                         const RegionCodeGenTy &CodeGen) {
3833   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3834   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3835       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3836 
3837   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3838   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3839   if (NT || TL) {
3840     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3841     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3842 
3843     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3844                                                   S.getLocStart());
3845   }
3846 
3847   OMPTeamsScope Scope(CGF, S);
3848   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3849   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3850   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3851                                            CapturedVars);
3852 }
3853 
3854 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3855   // Emit teams region as a standalone region.
3856   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3857     OMPPrivateScope PrivateScope(CGF);
3858     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3859     CGF.EmitOMPPrivateClause(S, PrivateScope);
3860     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3861     (void)PrivateScope.Privatize();
3862     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3863     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3864   };
3865   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
3866   emitPostUpdateForReductionClause(
3867       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3868 }
3869 
3870 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3871                                   const OMPTargetTeamsDirective &S) {
3872   auto *CS = S.getCapturedStmt(OMPD_teams);
3873   Action.Enter(CGF);
3874   // Emit teams region as a standalone region.
3875   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
3876     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3877     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3878     CGF.EmitOMPPrivateClause(S, PrivateScope);
3879     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3880     (void)PrivateScope.Privatize();
3881     Action.Enter(CGF);
3882     CGF.EmitStmt(CS->getCapturedStmt());
3883     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3884   };
3885   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3886   emitPostUpdateForReductionClause(
3887       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3888 }
3889 
3890 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3891     CodeGenModule &CGM, StringRef ParentName,
3892     const OMPTargetTeamsDirective &S) {
3893   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3894     emitTargetTeamsRegion(CGF, Action, S);
3895   };
3896   llvm::Function *Fn;
3897   llvm::Constant *Addr;
3898   // Emit target region as a standalone region.
3899   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3900       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3901   assert(Fn && Addr && "Target device function emission failed.");
3902 }
3903 
3904 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3905     const OMPTargetTeamsDirective &S) {
3906   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3907     emitTargetTeamsRegion(CGF, Action, S);
3908   };
3909   emitCommonOMPTargetDirective(*this, S, CodeGen);
3910 }
3911 
3912 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
3913     const OMPTeamsDistributeDirective &S) {
3914 
3915   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3916     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3917   };
3918 
3919   // Emit teams region as a standalone region.
3920   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3921                                             PrePostActionTy &) {
3922     OMPPrivateScope PrivateScope(CGF);
3923     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3924     (void)PrivateScope.Privatize();
3925     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3926                                                     CodeGenDistribute);
3927     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3928   };
3929   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
3930   emitPostUpdateForReductionClause(*this, S,
3931                                    [](CodeGenFunction &) { return nullptr; });
3932 }
3933 
3934 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
3935     const OMPTeamsDistributeParallelForDirective &S) {
3936   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3937     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3938                               S.getDistInc());
3939   };
3940 
3941   // Emit teams region as a standalone region.
3942   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3943                                             PrePostActionTy &) {
3944     OMPPrivateScope PrivateScope(CGF);
3945     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3946     (void)PrivateScope.Privatize();
3947     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3948                                                     CodeGenDistribute);
3949     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3950   };
3951   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
3952   emitPostUpdateForReductionClause(*this, S,
3953                                    [](CodeGenFunction &) { return nullptr; });
3954 }
3955 
3956 void CodeGenFunction::EmitOMPCancellationPointDirective(
3957     const OMPCancellationPointDirective &S) {
3958   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3959                                                    S.getCancelRegion());
3960 }
3961 
3962 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3963   const Expr *IfCond = nullptr;
3964   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3965     if (C->getNameModifier() == OMPD_unknown ||
3966         C->getNameModifier() == OMPD_cancel) {
3967       IfCond = C->getCondition();
3968       break;
3969     }
3970   }
3971   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3972                                         S.getCancelRegion());
3973 }
3974 
3975 CodeGenFunction::JumpDest
3976 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3977   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3978       Kind == OMPD_target_parallel)
3979     return ReturnBlock;
3980   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3981          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3982          Kind == OMPD_distribute_parallel_for ||
3983          Kind == OMPD_target_parallel_for ||
3984          Kind == OMPD_teams_distribute_parallel_for ||
3985          Kind == OMPD_target_teams_distribute_parallel_for);
3986   return OMPCancelStack.getExitBlock();
3987 }
3988 
3989 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3990     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3991     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3992   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3993   auto OrigVarIt = C.varlist_begin();
3994   auto InitIt = C.inits().begin();
3995   for (auto PvtVarIt : C.private_copies()) {
3996     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3997     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3998     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3999 
4000     // In order to identify the right initializer we need to match the
4001     // declaration used by the mapping logic. In some cases we may get
4002     // OMPCapturedExprDecl that refers to the original declaration.
4003     const ValueDecl *MatchingVD = OrigVD;
4004     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
4005       // OMPCapturedExprDecl are used to privative fields of the current
4006       // structure.
4007       auto *ME = cast<MemberExpr>(OED->getInit());
4008       assert(isa<CXXThisExpr>(ME->getBase()) &&
4009              "Base should be the current struct!");
4010       MatchingVD = ME->getMemberDecl();
4011     }
4012 
4013     // If we don't have information about the current list item, move on to
4014     // the next one.
4015     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
4016     if (InitAddrIt == CaptureDeviceAddrMap.end())
4017       continue;
4018 
4019     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
4020       // Initialize the temporary initialization variable with the address we
4021       // get from the runtime library. We have to cast the source address
4022       // because it is always a void *. References are materialized in the
4023       // privatization scope, so the initialization here disregards the fact
4024       // the original variable is a reference.
4025       QualType AddrQTy =
4026           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
4027       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
4028       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
4029       setAddrOfLocalVar(InitVD, InitAddr);
4030 
4031       // Emit private declaration, it will be initialized by the value we
4032       // declaration we just added to the local declarations map.
4033       EmitDecl(*PvtVD);
4034 
4035       // The initialization variables reached its purpose in the emission
4036       // ofthe previous declaration, so we don't need it anymore.
4037       LocalDeclMap.erase(InitVD);
4038 
4039       // Return the address of the private variable.
4040       return GetAddrOfLocalVar(PvtVD);
4041     });
4042     assert(IsRegistered && "firstprivate var already registered as private");
4043     // Silence the warning about unused variable.
4044     (void)IsRegistered;
4045 
4046     ++OrigVarIt;
4047     ++InitIt;
4048   }
4049 }
4050 
4051 // Generate the instructions for '#pragma omp target data' directive.
4052 void CodeGenFunction::EmitOMPTargetDataDirective(
4053     const OMPTargetDataDirective &S) {
4054   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4055 
4056   // Create a pre/post action to signal the privatization of the device pointer.
4057   // This action can be replaced by the OpenMP runtime code generation to
4058   // deactivate privatization.
4059   bool PrivatizeDevicePointers = false;
4060   class DevicePointerPrivActionTy : public PrePostActionTy {
4061     bool &PrivatizeDevicePointers;
4062 
4063   public:
4064     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4065         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4066     void Enter(CodeGenFunction &CGF) override {
4067       PrivatizeDevicePointers = true;
4068     }
4069   };
4070   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4071 
4072   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4073       CodeGenFunction &CGF, PrePostActionTy &Action) {
4074     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4075       CGF.EmitStmt(
4076           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
4077     };
4078 
4079     // Codegen that selects wheather to generate the privatization code or not.
4080     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4081                           &InnermostCodeGen](CodeGenFunction &CGF,
4082                                              PrePostActionTy &Action) {
4083       RegionCodeGenTy RCG(InnermostCodeGen);
4084       PrivatizeDevicePointers = false;
4085 
4086       // Call the pre-action to change the status of PrivatizeDevicePointers if
4087       // needed.
4088       Action.Enter(CGF);
4089 
4090       if (PrivatizeDevicePointers) {
4091         OMPPrivateScope PrivateScope(CGF);
4092         // Emit all instances of the use_device_ptr clause.
4093         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4094           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4095                                         Info.CaptureDeviceAddrMap);
4096         (void)PrivateScope.Privatize();
4097         RCG(CGF);
4098       } else
4099         RCG(CGF);
4100     };
4101 
4102     // Forward the provided action to the privatization codegen.
4103     RegionCodeGenTy PrivRCG(PrivCodeGen);
4104     PrivRCG.setAction(Action);
4105 
4106     // Notwithstanding the body of the region is emitted as inlined directive,
4107     // we don't use an inline scope as changes in the references inside the
4108     // region are expected to be visible outside, so we do not privative them.
4109     OMPLexicalScope Scope(CGF, S);
4110     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4111                                                     PrivRCG);
4112   };
4113 
4114   RegionCodeGenTy RCG(CodeGen);
4115 
4116   // If we don't have target devices, don't bother emitting the data mapping
4117   // code.
4118   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4119     RCG(*this);
4120     return;
4121   }
4122 
4123   // Check if we have any if clause associated with the directive.
4124   const Expr *IfCond = nullptr;
4125   if (auto *C = S.getSingleClause<OMPIfClause>())
4126     IfCond = C->getCondition();
4127 
4128   // Check if we have any device clause associated with the directive.
4129   const Expr *Device = nullptr;
4130   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4131     Device = C->getDevice();
4132 
4133   // Set the action to signal privatization of device pointers.
4134   RCG.setAction(PrivAction);
4135 
4136   // Emit region code.
4137   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4138                                              Info);
4139 }
4140 
4141 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4142     const OMPTargetEnterDataDirective &S) {
4143   // If we don't have target devices, don't bother emitting the data mapping
4144   // code.
4145   if (CGM.getLangOpts().OMPTargetTriples.empty())
4146     return;
4147 
4148   // Check if we have any if clause associated with the directive.
4149   const Expr *IfCond = nullptr;
4150   if (auto *C = S.getSingleClause<OMPIfClause>())
4151     IfCond = C->getCondition();
4152 
4153   // Check if we have any device clause associated with the directive.
4154   const Expr *Device = nullptr;
4155   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4156     Device = C->getDevice();
4157 
4158   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4159                                         PrePostActionTy &) {
4160     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4161                                                             Device);
4162   };
4163   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4164   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data,
4165                                               CodeGen);
4166 }
4167 
4168 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4169     const OMPTargetExitDataDirective &S) {
4170   // If we don't have target devices, don't bother emitting the data mapping
4171   // code.
4172   if (CGM.getLangOpts().OMPTargetTriples.empty())
4173     return;
4174 
4175   // Check if we have any if clause associated with the directive.
4176   const Expr *IfCond = nullptr;
4177   if (auto *C = S.getSingleClause<OMPIfClause>())
4178     IfCond = C->getCondition();
4179 
4180   // Check if we have any device clause associated with the directive.
4181   const Expr *Device = nullptr;
4182   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4183     Device = C->getDevice();
4184 
4185   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4186                                         PrePostActionTy &) {
4187     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4188                                                             Device);
4189   };
4190   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4191   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data,
4192                                               CodeGen);
4193 }
4194 
4195 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4196                                      const OMPTargetParallelDirective &S,
4197                                      PrePostActionTy &Action) {
4198   // Get the captured statement associated with the 'parallel' region.
4199   auto *CS = S.getCapturedStmt(OMPD_parallel);
4200   Action.Enter(CGF);
4201   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4202     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4203     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4204     CGF.EmitOMPPrivateClause(S, PrivateScope);
4205     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4206     (void)PrivateScope.Privatize();
4207     // TODO: Add support for clauses.
4208     CGF.EmitStmt(CS->getCapturedStmt());
4209     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4210   };
4211   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4212                                  emitEmptyBoundParameters);
4213   emitPostUpdateForReductionClause(
4214       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4215 }
4216 
4217 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4218     CodeGenModule &CGM, StringRef ParentName,
4219     const OMPTargetParallelDirective &S) {
4220   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4221     emitTargetParallelRegion(CGF, S, Action);
4222   };
4223   llvm::Function *Fn;
4224   llvm::Constant *Addr;
4225   // Emit target region as a standalone region.
4226   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4227       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4228   assert(Fn && Addr && "Target device function emission failed.");
4229 }
4230 
4231 void CodeGenFunction::EmitOMPTargetParallelDirective(
4232     const OMPTargetParallelDirective &S) {
4233   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4234     emitTargetParallelRegion(CGF, S, Action);
4235   };
4236   emitCommonOMPTargetDirective(*this, S, CodeGen);
4237 }
4238 
4239 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4240                                         const OMPTargetParallelForDirective &S,
4241                                         PrePostActionTy &Action) {
4242   Action.Enter(CGF);
4243   // Emit directive as a combined directive that consists of two implicit
4244   // directives: 'parallel' with 'for' directive.
4245   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4246     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4247         CGF, OMPD_target_parallel_for, S.hasCancel());
4248     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4249                                emitDispatchForLoopBounds);
4250   };
4251   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4252                                  emitEmptyBoundParameters);
4253 }
4254 
4255 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4256     CodeGenModule &CGM, StringRef ParentName,
4257     const OMPTargetParallelForDirective &S) {
4258   // Emit SPMD target parallel for region as a standalone region.
4259   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4260     emitTargetParallelForRegion(CGF, S, Action);
4261   };
4262   llvm::Function *Fn;
4263   llvm::Constant *Addr;
4264   // Emit target region as a standalone region.
4265   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4266       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4267   assert(Fn && Addr && "Target device function emission failed.");
4268 }
4269 
4270 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4271     const OMPTargetParallelForDirective &S) {
4272   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4273     emitTargetParallelForRegion(CGF, S, Action);
4274   };
4275   emitCommonOMPTargetDirective(*this, S, CodeGen);
4276 }
4277 
4278 static void
4279 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4280                                 const OMPTargetParallelForSimdDirective &S,
4281                                 PrePostActionTy &Action) {
4282   Action.Enter(CGF);
4283   // Emit directive as a combined directive that consists of two implicit
4284   // directives: 'parallel' with 'for' directive.
4285   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4286     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4287                                emitDispatchForLoopBounds);
4288   };
4289   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4290                                  emitEmptyBoundParameters);
4291 }
4292 
4293 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4294     CodeGenModule &CGM, StringRef ParentName,
4295     const OMPTargetParallelForSimdDirective &S) {
4296   // Emit SPMD target parallel for region as a standalone region.
4297   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4298     emitTargetParallelForSimdRegion(CGF, S, Action);
4299   };
4300   llvm::Function *Fn;
4301   llvm::Constant *Addr;
4302   // Emit target region as a standalone region.
4303   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4304       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4305   assert(Fn && Addr && "Target device function emission failed.");
4306 }
4307 
4308 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4309     const OMPTargetParallelForSimdDirective &S) {
4310   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4311     emitTargetParallelForSimdRegion(CGF, S, Action);
4312   };
4313   emitCommonOMPTargetDirective(*this, S, CodeGen);
4314 }
4315 
4316 /// Emit a helper variable and return corresponding lvalue.
4317 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4318                      const ImplicitParamDecl *PVD,
4319                      CodeGenFunction::OMPPrivateScope &Privates) {
4320   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4321   Privates.addPrivate(
4322       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4323 }
4324 
4325 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4326   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4327   // Emit outlined function for task construct.
4328   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4329   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4330   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4331   const Expr *IfCond = nullptr;
4332   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4333     if (C->getNameModifier() == OMPD_unknown ||
4334         C->getNameModifier() == OMPD_taskloop) {
4335       IfCond = C->getCondition();
4336       break;
4337     }
4338   }
4339 
4340   OMPTaskDataTy Data;
4341   // Check if taskloop must be emitted without taskgroup.
4342   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4343   // TODO: Check if we should emit tied or untied task.
4344   Data.Tied = true;
4345   // Set scheduling for taskloop
4346   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4347     // grainsize clause
4348     Data.Schedule.setInt(/*IntVal=*/false);
4349     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4350   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4351     // num_tasks clause
4352     Data.Schedule.setInt(/*IntVal=*/true);
4353     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4354   }
4355 
4356   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4357     // if (PreCond) {
4358     //   for (IV in 0..LastIteration) BODY;
4359     //   <Final counter/linear vars updates>;
4360     // }
4361     //
4362 
4363     // Emit: if (PreCond) - begin.
4364     // If the condition constant folds and can be elided, avoid emitting the
4365     // whole loop.
4366     bool CondConstant;
4367     llvm::BasicBlock *ContBlock = nullptr;
4368     OMPLoopScope PreInitScope(CGF, S);
4369     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4370       if (!CondConstant)
4371         return;
4372     } else {
4373       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4374       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4375       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4376                   CGF.getProfileCount(&S));
4377       CGF.EmitBlock(ThenBlock);
4378       CGF.incrementProfileCounter(&S);
4379     }
4380 
4381     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4382       CGF.EmitOMPSimdInit(S);
4383 
4384     OMPPrivateScope LoopScope(CGF);
4385     // Emit helper vars inits.
4386     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4387     auto *I = CS->getCapturedDecl()->param_begin();
4388     auto *LBP = std::next(I, LowerBound);
4389     auto *UBP = std::next(I, UpperBound);
4390     auto *STP = std::next(I, Stride);
4391     auto *LIP = std::next(I, LastIter);
4392     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4393              LoopScope);
4394     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4395              LoopScope);
4396     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4397     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4398              LoopScope);
4399     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4400     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4401     (void)LoopScope.Privatize();
4402     // Emit the loop iteration variable.
4403     const Expr *IVExpr = S.getIterationVariable();
4404     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4405     CGF.EmitVarDecl(*IVDecl);
4406     CGF.EmitIgnoredExpr(S.getInit());
4407 
4408     // Emit the iterations count variable.
4409     // If it is not a variable, Sema decided to calculate iterations count on
4410     // each iteration (e.g., it is foldable into a constant).
4411     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4412       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4413       // Emit calculation of the iterations count.
4414       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4415     }
4416 
4417     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4418                          S.getInc(),
4419                          [&S](CodeGenFunction &CGF) {
4420                            CGF.EmitOMPLoopBody(S, JumpDest());
4421                            CGF.EmitStopPoint(&S);
4422                          },
4423                          [](CodeGenFunction &) {});
4424     // Emit: if (PreCond) - end.
4425     if (ContBlock) {
4426       CGF.EmitBranch(ContBlock);
4427       CGF.EmitBlock(ContBlock, true);
4428     }
4429     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4430     if (HasLastprivateClause) {
4431       CGF.EmitOMPLastprivateClauseFinal(
4432           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4433           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4434               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4435               (*LIP)->getType(), S.getLocStart())));
4436     }
4437   };
4438   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4439                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4440                             const OMPTaskDataTy &Data) {
4441     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4442       OMPLoopScope PreInitScope(CGF, S);
4443       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4444                                                   OutlinedFn, SharedsTy,
4445                                                   CapturedStruct, IfCond, Data);
4446     };
4447     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4448                                                     CodeGen);
4449   };
4450   if (Data.Nogroup)
4451     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4452   else {
4453     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4454         *this,
4455         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4456                                         PrePostActionTy &Action) {
4457           Action.Enter(CGF);
4458           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4459         },
4460         S.getLocStart());
4461   }
4462 }
4463 
4464 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4465   EmitOMPTaskLoopBasedDirective(S);
4466 }
4467 
4468 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4469     const OMPTaskLoopSimdDirective &S) {
4470   EmitOMPTaskLoopBasedDirective(S);
4471 }
4472 
4473 // Generate the instructions for '#pragma omp target update' directive.
4474 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4475     const OMPTargetUpdateDirective &S) {
4476   // If we don't have target devices, don't bother emitting the data mapping
4477   // code.
4478   if (CGM.getLangOpts().OMPTargetTriples.empty())
4479     return;
4480 
4481   // Check if we have any if clause associated with the directive.
4482   const Expr *IfCond = nullptr;
4483   if (auto *C = S.getSingleClause<OMPIfClause>())
4484     IfCond = C->getCondition();
4485 
4486   // Check if we have any device clause associated with the directive.
4487   const Expr *Device = nullptr;
4488   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4489     Device = C->getDevice();
4490 
4491   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4492                                         PrePostActionTy &) {
4493     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4494                                                             Device);
4495   };
4496   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4497   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update,
4498                                               CodeGen);
4499 }
4500