1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             assert(VD == VD->getCanonicalDecl() &&
69                         "Canonical decl must be captured.");
70             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
71                             isCapturedVar(CGF, VD) ||
72                                 (CGF.CapturedStmtInfo &&
73                                  InlinedShareds.isGlobalVarCaptured(VD)),
74                             VD->getType().getNonReferenceType(), VK_LValue,
75                             SourceLocation());
76             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
77               return CGF.EmitLValue(&DRE).getAddress();
78             });
79           }
80         }
81         (void)InlinedShareds.Privatize();
82       }
83     }
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S,
100                         /*AsInlined=*/false,
101                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S,
116                         /*AsInlined=*/false,
117                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 };
119 
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
123   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
125       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
126         for (const auto *I : PreInits->decls())
127           CGF.EmitVarDecl(cast<VarDecl>(*I));
128       }
129     }
130   }
131 
132 public:
133   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
134       : CodeGenFunction::RunCleanupsScope(CGF) {
135     emitPreInitStmt(CGF, S);
136   }
137 };
138 
139 } // namespace
140 
141 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
142                                          const OMPExecutableDirective &S,
143                                          const RegionCodeGenTy &CodeGen);
144 
145 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
146   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
147     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
148       OrigVD = OrigVD->getCanonicalDecl();
149       bool IsCaptured =
150           LambdaCaptureFields.lookup(OrigVD) ||
151           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
152           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
153       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
154                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
155       return EmitLValue(&DRE);
156     }
157   }
158   return EmitLValue(E);
159 }
160 
161 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
162   auto &C = getContext();
163   llvm::Value *Size = nullptr;
164   auto SizeInChars = C.getTypeSizeInChars(Ty);
165   if (SizeInChars.isZero()) {
166     // getTypeSizeInChars() returns 0 for a VLA.
167     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
168       llvm::Value *ArraySize;
169       std::tie(ArraySize, Ty) = getVLASize(VAT);
170       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
171     }
172     SizeInChars = C.getTypeSizeInChars(Ty);
173     if (SizeInChars.isZero())
174       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
175     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
176   } else
177     Size = CGM.getSize(SizeInChars);
178   return Size;
179 }
180 
181 void CodeGenFunction::GenerateOpenMPCapturedVars(
182     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
183   const RecordDecl *RD = S.getCapturedRecordDecl();
184   auto CurField = RD->field_begin();
185   auto CurCap = S.captures().begin();
186   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
187                                                  E = S.capture_init_end();
188        I != E; ++I, ++CurField, ++CurCap) {
189     if (CurField->hasCapturedVLAType()) {
190       auto VAT = CurField->getCapturedVLAType();
191       auto *Val = VLASizeMap[VAT->getSizeExpr()];
192       CapturedVars.push_back(Val);
193     } else if (CurCap->capturesThis())
194       CapturedVars.push_back(CXXThisValue);
195     else if (CurCap->capturesVariableByCopy()) {
196       llvm::Value *CV =
197           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
198 
199       // If the field is not a pointer, we need to save the actual value
200       // and load it as a void pointer.
201       if (!CurField->getType()->isAnyPointerType()) {
202         auto &Ctx = getContext();
203         auto DstAddr = CreateMemTemp(
204             Ctx.getUIntPtrType(),
205             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
206         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
207 
208         auto *SrcAddrVal = EmitScalarConversion(
209             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
210             Ctx.getPointerType(CurField->getType()), SourceLocation());
211         LValue SrcLV =
212             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
213 
214         // Store the value using the source type pointer.
215         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
216 
217         // Load the value using the destination type pointer.
218         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
219       }
220       CapturedVars.push_back(CV);
221     } else {
222       assert(CurCap->capturesVariable() && "Expected capture by reference.");
223       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
224     }
225   }
226 }
227 
228 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
229                                     StringRef Name, LValue AddrLV,
230                                     bool isReferenceType = false) {
231   ASTContext &Ctx = CGF.getContext();
232 
233   auto *CastedPtr = CGF.EmitScalarConversion(
234       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
235       Ctx.getPointerType(DstType), SourceLocation());
236   auto TmpAddr =
237       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
238           .getAddress();
239 
240   // If we are dealing with references we need to return the address of the
241   // reference instead of the reference of the value.
242   if (isReferenceType) {
243     QualType RefType = Ctx.getLValueReferenceType(DstType);
244     auto *RefVal = TmpAddr.getPointer();
245     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
246     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
247     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
248   }
249 
250   return TmpAddr;
251 }
252 
253 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
254   if (T->isLValueReferenceType()) {
255     return C.getLValueReferenceType(
256         getCanonicalParamType(C, T.getNonReferenceType()),
257         /*SpelledAsLValue=*/false);
258   }
259   if (T->isPointerType())
260     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
261   if (auto *A = T->getAsArrayTypeUnsafe()) {
262     if (auto *VLA = dyn_cast<VariableArrayType>(A))
263       return getCanonicalParamType(C, VLA->getElementType());
264     else if (!A->isVariablyModifiedType())
265       return C.getCanonicalType(T);
266   }
267   return C.getCanonicalParamType(T);
268 }
269 
270 namespace {
271   /// Contains required data for proper outlined function codegen.
272   struct FunctionOptions {
273     /// Captured statement for which the function is generated.
274     const CapturedStmt *S = nullptr;
275     /// true if cast to/from  UIntPtr is required for variables captured by
276     /// value.
277     const bool UIntPtrCastRequired = true;
278     /// true if only casted arguments must be registered as local args or VLA
279     /// sizes.
280     const bool RegisterCastedArgsOnly = false;
281     /// Name of the generated function.
282     const StringRef FunctionName;
283     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
284                              bool RegisterCastedArgsOnly,
285                              StringRef FunctionName)
286         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
287           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
288           FunctionName(FunctionName) {}
289   };
290 }
291 
292 static llvm::Function *emitOutlinedFunctionPrologue(
293     CodeGenFunction &CGF, FunctionArgList &Args,
294     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
295         &LocalAddrs,
296     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
297         &VLASizes,
298     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
299   const CapturedDecl *CD = FO.S->getCapturedDecl();
300   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
301   assert(CD->hasBody() && "missing CapturedDecl body");
302 
303   CXXThisValue = nullptr;
304   // Build the argument list.
305   CodeGenModule &CGM = CGF.CGM;
306   ASTContext &Ctx = CGM.getContext();
307   FunctionArgList TargetArgs;
308   Args.append(CD->param_begin(),
309               std::next(CD->param_begin(), CD->getContextParamPosition()));
310   TargetArgs.append(
311       CD->param_begin(),
312       std::next(CD->param_begin(), CD->getContextParamPosition()));
313   auto I = FO.S->captures().begin();
314   FunctionDecl *DebugFunctionDecl = nullptr;
315   if (!FO.UIntPtrCastRequired) {
316     FunctionProtoType::ExtProtoInfo EPI;
317     DebugFunctionDecl = FunctionDecl::Create(
318         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
319         SourceLocation(), DeclarationName(), Ctx.VoidTy,
320         Ctx.getTrivialTypeSourceInfo(
321             Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
322         SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
323   }
324   for (auto *FD : RD->fields()) {
325     QualType ArgType = FD->getType();
326     IdentifierInfo *II = nullptr;
327     VarDecl *CapVar = nullptr;
328 
329     // If this is a capture by copy and the type is not a pointer, the outlined
330     // function argument type should be uintptr and the value properly casted to
331     // uintptr. This is necessary given that the runtime library is only able to
332     // deal with pointers. We can pass in the same way the VLA type sizes to the
333     // outlined function.
334     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
335         I->capturesVariableArrayType()) {
336       if (FO.UIntPtrCastRequired)
337         ArgType = Ctx.getUIntPtrType();
338     }
339 
340     if (I->capturesVariable() || I->capturesVariableByCopy()) {
341       CapVar = I->getCapturedVar();
342       II = CapVar->getIdentifier();
343     } else if (I->capturesThis())
344       II = &Ctx.Idents.get("this");
345     else {
346       assert(I->capturesVariableArrayType());
347       II = &Ctx.Idents.get("vla");
348     }
349     if (ArgType->isVariablyModifiedType())
350       ArgType = getCanonicalParamType(Ctx, ArgType);
351     VarDecl *Arg;
352     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
353       Arg = ParmVarDecl::Create(
354           Ctx, DebugFunctionDecl,
355           CapVar ? CapVar->getLocStart() : FD->getLocStart(),
356           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
357           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
358     } else {
359       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
360                                       II, ArgType, ImplicitParamDecl::Other);
361     }
362     Args.emplace_back(Arg);
363     // Do not cast arguments if we emit function with non-original types.
364     TargetArgs.emplace_back(
365         FO.UIntPtrCastRequired
366             ? Arg
367             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
368     ++I;
369   }
370   Args.append(
371       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
372       CD->param_end());
373   TargetArgs.append(
374       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
375       CD->param_end());
376 
377   // Create the function declaration.
378   const CGFunctionInfo &FuncInfo =
379       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
380   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
381 
382   llvm::Function *F =
383       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
384                              FO.FunctionName, &CGM.getModule());
385   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
386   if (CD->isNothrow())
387     F->setDoesNotThrow();
388 
389   // Generate the function.
390   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
391                     FO.S->getLocStart(), CD->getBody()->getLocStart());
392   unsigned Cnt = CD->getContextParamPosition();
393   I = FO.S->captures().begin();
394   for (auto *FD : RD->fields()) {
395     // Do not map arguments if we emit function with non-original types.
396     Address LocalAddr(Address::invalid());
397     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
398       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
399                                                              TargetArgs[Cnt]);
400     } else {
401       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
402     }
403     // If we are capturing a pointer by copy we don't need to do anything, just
404     // use the value that we get from the arguments.
405     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
406       const VarDecl *CurVD = I->getCapturedVar();
407       // If the variable is a reference we need to materialize it here.
408       if (CurVD->getType()->isReferenceType()) {
409         Address RefAddr = CGF.CreateMemTemp(
410             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
411         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
412                               /*Volatile=*/false, CurVD->getType());
413         LocalAddr = RefAddr;
414       }
415       if (!FO.RegisterCastedArgsOnly)
416         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
417       ++Cnt;
418       ++I;
419       continue;
420     }
421 
422     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
423                                         AlignmentSource::Decl);
424     if (FD->hasCapturedVLAType()) {
425       if (FO.UIntPtrCastRequired) {
426         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
427                                                           Args[Cnt]->getName(),
428                                                           ArgLVal),
429                                      FD->getType(), AlignmentSource::Decl);
430       }
431       auto *ExprArg =
432           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
433       auto VAT = FD->getCapturedVLAType();
434       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
435     } else if (I->capturesVariable()) {
436       auto *Var = I->getCapturedVar();
437       QualType VarTy = Var->getType();
438       Address ArgAddr = ArgLVal.getAddress();
439       if (!VarTy->isReferenceType()) {
440         if (ArgLVal.getType()->isLValueReferenceType()) {
441           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
442         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
443           assert(ArgLVal.getType()->isPointerType());
444           ArgAddr = CGF.EmitLoadOfPointer(
445               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
446         }
447       }
448       if (!FO.RegisterCastedArgsOnly) {
449         LocalAddrs.insert(
450             {Args[Cnt],
451              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
452       }
453     } else if (I->capturesVariableByCopy()) {
454       assert(!FD->getType()->isAnyPointerType() &&
455              "Not expecting a captured pointer.");
456       auto *Var = I->getCapturedVar();
457       QualType VarTy = Var->getType();
458       LocalAddrs.insert(
459           {Args[Cnt],
460            {Var,
461             FO.UIntPtrCastRequired
462                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
463                                        ArgLVal, VarTy->isReferenceType())
464                 : ArgLVal.getAddress()}});
465     } else {
466       // If 'this' is captured, load it into CXXThisValue.
467       assert(I->capturesThis());
468       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
469                          .getScalarVal();
470       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
471     }
472     ++Cnt;
473     ++I;
474   }
475 
476   return F;
477 }
478 
479 llvm::Function *
480 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
481   assert(
482       CapturedStmtInfo &&
483       "CapturedStmtInfo should be set when generating the captured function");
484   const CapturedDecl *CD = S.getCapturedDecl();
485   // Build the argument list.
486   bool NeedWrapperFunction =
487       getDebugInfo() &&
488       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
489   FunctionArgList Args;
490   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
491   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
492   SmallString<256> Buffer;
493   llvm::raw_svector_ostream Out(Buffer);
494   Out << CapturedStmtInfo->getHelperName();
495   if (NeedWrapperFunction)
496     Out << "_debug__";
497   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
498                      Out.str());
499   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
500                                                    VLASizes, CXXThisValue, FO);
501   for (const auto &LocalAddrPair : LocalAddrs) {
502     if (LocalAddrPair.second.first) {
503       setAddrOfLocalVar(LocalAddrPair.second.first,
504                         LocalAddrPair.second.second);
505     }
506   }
507   for (const auto &VLASizePair : VLASizes)
508     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
509   PGO.assignRegionCounters(GlobalDecl(CD), F);
510   CapturedStmtInfo->EmitBody(*this, CD->getBody());
511   FinishFunction(CD->getBodyRBrace());
512   if (!NeedWrapperFunction)
513     return F;
514 
515   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
516                             /*RegisterCastedArgsOnly=*/true,
517                             CapturedStmtInfo->getHelperName());
518   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
519   Args.clear();
520   LocalAddrs.clear();
521   VLASizes.clear();
522   llvm::Function *WrapperF =
523       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
524                                    WrapperCGF.CXXThisValue, WrapperFO);
525   llvm::SmallVector<llvm::Value *, 4> CallArgs;
526   for (const auto *Arg : Args) {
527     llvm::Value *CallArg;
528     auto I = LocalAddrs.find(Arg);
529     if (I != LocalAddrs.end()) {
530       LValue LV = WrapperCGF.MakeAddrLValue(
531           I->second.second,
532           I->second.first ? I->second.first->getType() : Arg->getType(),
533           AlignmentSource::Decl);
534       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
535     } else {
536       auto EI = VLASizes.find(Arg);
537       if (EI != VLASizes.end())
538         CallArg = EI->second.second;
539       else {
540         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
541                                               Arg->getType(),
542                                               AlignmentSource::Decl);
543         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
544       }
545     }
546     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
547   }
548   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
549                                                   F, CallArgs);
550   WrapperCGF.FinishFunction();
551   return WrapperF;
552 }
553 
554 //===----------------------------------------------------------------------===//
555 //                              OpenMP Directive Emission
556 //===----------------------------------------------------------------------===//
557 void CodeGenFunction::EmitOMPAggregateAssign(
558     Address DestAddr, Address SrcAddr, QualType OriginalType,
559     const llvm::function_ref<void(Address, Address)> &CopyGen) {
560   // Perform element-by-element initialization.
561   QualType ElementTy;
562 
563   // Drill down to the base element type on both arrays.
564   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
565   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
566   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
567 
568   auto SrcBegin = SrcAddr.getPointer();
569   auto DestBegin = DestAddr.getPointer();
570   // Cast from pointer to array type to pointer to single element.
571   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
572   // The basic structure here is a while-do loop.
573   auto BodyBB = createBasicBlock("omp.arraycpy.body");
574   auto DoneBB = createBasicBlock("omp.arraycpy.done");
575   auto IsEmpty =
576       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
577   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
578 
579   // Enter the loop body, making that address the current address.
580   auto EntryBB = Builder.GetInsertBlock();
581   EmitBlock(BodyBB);
582 
583   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
584 
585   llvm::PHINode *SrcElementPHI =
586     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
587   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
588   Address SrcElementCurrent =
589       Address(SrcElementPHI,
590               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
591 
592   llvm::PHINode *DestElementPHI =
593     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
594   DestElementPHI->addIncoming(DestBegin, EntryBB);
595   Address DestElementCurrent =
596     Address(DestElementPHI,
597             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
598 
599   // Emit copy.
600   CopyGen(DestElementCurrent, SrcElementCurrent);
601 
602   // Shift the address forward by one element.
603   auto DestElementNext = Builder.CreateConstGEP1_32(
604       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
605   auto SrcElementNext = Builder.CreateConstGEP1_32(
606       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
607   // Check whether we've reached the end.
608   auto Done =
609       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
610   Builder.CreateCondBr(Done, DoneBB, BodyBB);
611   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
612   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
613 
614   // Done.
615   EmitBlock(DoneBB, /*IsFinished=*/true);
616 }
617 
618 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
619                                   Address SrcAddr, const VarDecl *DestVD,
620                                   const VarDecl *SrcVD, const Expr *Copy) {
621   if (OriginalType->isArrayType()) {
622     auto *BO = dyn_cast<BinaryOperator>(Copy);
623     if (BO && BO->getOpcode() == BO_Assign) {
624       // Perform simple memcpy for simple copying.
625       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
626     } else {
627       // For arrays with complex element types perform element by element
628       // copying.
629       EmitOMPAggregateAssign(
630           DestAddr, SrcAddr, OriginalType,
631           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
632             // Working with the single array element, so have to remap
633             // destination and source variables to corresponding array
634             // elements.
635             CodeGenFunction::OMPPrivateScope Remap(*this);
636             Remap.addPrivate(DestVD, [DestElement]() -> Address {
637               return DestElement;
638             });
639             Remap.addPrivate(
640                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
641             (void)Remap.Privatize();
642             EmitIgnoredExpr(Copy);
643           });
644     }
645   } else {
646     // Remap pseudo source variable to private copy.
647     CodeGenFunction::OMPPrivateScope Remap(*this);
648     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
649     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
650     (void)Remap.Privatize();
651     // Emit copying of the whole variable.
652     EmitIgnoredExpr(Copy);
653   }
654 }
655 
656 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
657                                                 OMPPrivateScope &PrivateScope) {
658   if (!HaveInsertPoint())
659     return false;
660   bool FirstprivateIsLastprivate = false;
661   llvm::DenseSet<const VarDecl *> Lastprivates;
662   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
663     for (const auto *D : C->varlists())
664       Lastprivates.insert(
665           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
666   }
667   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
668   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
669   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
670     auto IRef = C->varlist_begin();
671     auto InitsRef = C->inits().begin();
672     for (auto IInit : C->private_copies()) {
673       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
674       bool ThisFirstprivateIsLastprivate =
675           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
676       auto *CapFD = CapturesInfo.lookup(OrigVD);
677       auto *FD = CapturedStmtInfo->lookup(OrigVD);
678       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
679           !FD->getType()->isReferenceType()) {
680         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
681         ++IRef;
682         ++InitsRef;
683         continue;
684       }
685       FirstprivateIsLastprivate =
686           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
687       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
688         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
689         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
690         bool IsRegistered;
691         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
692                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
693                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
694         Address OriginalAddr = EmitLValue(&DRE).getAddress();
695         QualType Type = VD->getType();
696         if (Type->isArrayType()) {
697           // Emit VarDecl with copy init for arrays.
698           // Get the address of the original variable captured in current
699           // captured region.
700           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
701             auto Emission = EmitAutoVarAlloca(*VD);
702             auto *Init = VD->getInit();
703             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
704               // Perform simple memcpy.
705               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
706                                   Type);
707             } else {
708               EmitOMPAggregateAssign(
709                   Emission.getAllocatedAddress(), OriginalAddr, Type,
710                   [this, VDInit, Init](Address DestElement,
711                                        Address SrcElement) {
712                     // Clean up any temporaries needed by the initialization.
713                     RunCleanupsScope InitScope(*this);
714                     // Emit initialization for single element.
715                     setAddrOfLocalVar(VDInit, SrcElement);
716                     EmitAnyExprToMem(Init, DestElement,
717                                      Init->getType().getQualifiers(),
718                                      /*IsInitializer*/ false);
719                     LocalDeclMap.erase(VDInit);
720                   });
721             }
722             EmitAutoVarCleanups(Emission);
723             return Emission.getAllocatedAddress();
724           });
725         } else {
726           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
727             // Emit private VarDecl with copy init.
728             // Remap temp VDInit variable to the address of the original
729             // variable
730             // (for proper handling of captured global variables).
731             setAddrOfLocalVar(VDInit, OriginalAddr);
732             EmitDecl(*VD);
733             LocalDeclMap.erase(VDInit);
734             return GetAddrOfLocalVar(VD);
735           });
736         }
737         assert(IsRegistered &&
738                "firstprivate var already registered as private");
739         // Silence the warning about unused variable.
740         (void)IsRegistered;
741       }
742       ++IRef;
743       ++InitsRef;
744     }
745   }
746   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
747 }
748 
749 void CodeGenFunction::EmitOMPPrivateClause(
750     const OMPExecutableDirective &D,
751     CodeGenFunction::OMPPrivateScope &PrivateScope) {
752   if (!HaveInsertPoint())
753     return;
754   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
755   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
756     auto IRef = C->varlist_begin();
757     for (auto IInit : C->private_copies()) {
758       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
759       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
760         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
761         bool IsRegistered =
762             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
763               // Emit private VarDecl with copy init.
764               EmitDecl(*VD);
765               return GetAddrOfLocalVar(VD);
766             });
767         assert(IsRegistered && "private var already registered as private");
768         // Silence the warning about unused variable.
769         (void)IsRegistered;
770       }
771       ++IRef;
772     }
773   }
774 }
775 
776 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
777   if (!HaveInsertPoint())
778     return false;
779   // threadprivate_var1 = master_threadprivate_var1;
780   // operator=(threadprivate_var2, master_threadprivate_var2);
781   // ...
782   // __kmpc_barrier(&loc, global_tid);
783   llvm::DenseSet<const VarDecl *> CopiedVars;
784   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
785   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
786     auto IRef = C->varlist_begin();
787     auto ISrcRef = C->source_exprs().begin();
788     auto IDestRef = C->destination_exprs().begin();
789     for (auto *AssignOp : C->assignment_ops()) {
790       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
791       QualType Type = VD->getType();
792       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
793         // Get the address of the master variable. If we are emitting code with
794         // TLS support, the address is passed from the master as field in the
795         // captured declaration.
796         Address MasterAddr = Address::invalid();
797         if (getLangOpts().OpenMPUseTLS &&
798             getContext().getTargetInfo().isTLSSupported()) {
799           assert(CapturedStmtInfo->lookup(VD) &&
800                  "Copyin threadprivates should have been captured!");
801           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
802                           VK_LValue, (*IRef)->getExprLoc());
803           MasterAddr = EmitLValue(&DRE).getAddress();
804           LocalDeclMap.erase(VD);
805         } else {
806           MasterAddr =
807             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
808                                         : CGM.GetAddrOfGlobal(VD),
809                     getContext().getDeclAlign(VD));
810         }
811         // Get the address of the threadprivate variable.
812         Address PrivateAddr = EmitLValue(*IRef).getAddress();
813         if (CopiedVars.size() == 1) {
814           // At first check if current thread is a master thread. If it is, no
815           // need to copy data.
816           CopyBegin = createBasicBlock("copyin.not.master");
817           CopyEnd = createBasicBlock("copyin.not.master.end");
818           Builder.CreateCondBr(
819               Builder.CreateICmpNE(
820                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
821                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
822               CopyBegin, CopyEnd);
823           EmitBlock(CopyBegin);
824         }
825         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
826         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
827         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
828       }
829       ++IRef;
830       ++ISrcRef;
831       ++IDestRef;
832     }
833   }
834   if (CopyEnd) {
835     // Exit out of copying procedure for non-master thread.
836     EmitBlock(CopyEnd, /*IsFinished=*/true);
837     return true;
838   }
839   return false;
840 }
841 
842 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
843     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
844   if (!HaveInsertPoint())
845     return false;
846   bool HasAtLeastOneLastprivate = false;
847   llvm::DenseSet<const VarDecl *> SIMDLCVs;
848   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
849     auto *LoopDirective = cast<OMPLoopDirective>(&D);
850     for (auto *C : LoopDirective->counters()) {
851       SIMDLCVs.insert(
852           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
853     }
854   }
855   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
856   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
857     HasAtLeastOneLastprivate = true;
858     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
859       break;
860     auto IRef = C->varlist_begin();
861     auto IDestRef = C->destination_exprs().begin();
862     for (auto *IInit : C->private_copies()) {
863       // Keep the address of the original variable for future update at the end
864       // of the loop.
865       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
866       // Taskloops do not require additional initialization, it is done in
867       // runtime support library.
868       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
869         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
870         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
871           DeclRefExpr DRE(
872               const_cast<VarDecl *>(OrigVD),
873               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
874                   OrigVD) != nullptr,
875               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
876           return EmitLValue(&DRE).getAddress();
877         });
878         // Check if the variable is also a firstprivate: in this case IInit is
879         // not generated. Initialization of this variable will happen in codegen
880         // for 'firstprivate' clause.
881         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
882           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
883           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
884             // Emit private VarDecl with copy init.
885             EmitDecl(*VD);
886             return GetAddrOfLocalVar(VD);
887           });
888           assert(IsRegistered &&
889                  "lastprivate var already registered as private");
890           (void)IsRegistered;
891         }
892       }
893       ++IRef;
894       ++IDestRef;
895     }
896   }
897   return HasAtLeastOneLastprivate;
898 }
899 
900 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
901     const OMPExecutableDirective &D, bool NoFinals,
902     llvm::Value *IsLastIterCond) {
903   if (!HaveInsertPoint())
904     return;
905   // Emit following code:
906   // if (<IsLastIterCond>) {
907   //   orig_var1 = private_orig_var1;
908   //   ...
909   //   orig_varn = private_orig_varn;
910   // }
911   llvm::BasicBlock *ThenBB = nullptr;
912   llvm::BasicBlock *DoneBB = nullptr;
913   if (IsLastIterCond) {
914     ThenBB = createBasicBlock(".omp.lastprivate.then");
915     DoneBB = createBasicBlock(".omp.lastprivate.done");
916     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
917     EmitBlock(ThenBB);
918   }
919   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
920   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
921   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
922     auto IC = LoopDirective->counters().begin();
923     for (auto F : LoopDirective->finals()) {
924       auto *D =
925           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
926       if (NoFinals)
927         AlreadyEmittedVars.insert(D);
928       else
929         LoopCountersAndUpdates[D] = F;
930       ++IC;
931     }
932   }
933   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
934     auto IRef = C->varlist_begin();
935     auto ISrcRef = C->source_exprs().begin();
936     auto IDestRef = C->destination_exprs().begin();
937     for (auto *AssignOp : C->assignment_ops()) {
938       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
939       QualType Type = PrivateVD->getType();
940       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
941       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
942         // If lastprivate variable is a loop control variable for loop-based
943         // directive, update its value before copyin back to original
944         // variable.
945         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
946           EmitIgnoredExpr(FinalExpr);
947         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
948         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
949         // Get the address of the original variable.
950         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
951         // Get the address of the private variable.
952         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
953         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
954           PrivateAddr =
955               Address(Builder.CreateLoad(PrivateAddr),
956                       getNaturalTypeAlignment(RefTy->getPointeeType()));
957         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
958       }
959       ++IRef;
960       ++ISrcRef;
961       ++IDestRef;
962     }
963     if (auto *PostUpdate = C->getPostUpdateExpr())
964       EmitIgnoredExpr(PostUpdate);
965   }
966   if (IsLastIterCond)
967     EmitBlock(DoneBB, /*IsFinished=*/true);
968 }
969 
970 void CodeGenFunction::EmitOMPReductionClauseInit(
971     const OMPExecutableDirective &D,
972     CodeGenFunction::OMPPrivateScope &PrivateScope) {
973   if (!HaveInsertPoint())
974     return;
975   SmallVector<const Expr *, 4> Shareds;
976   SmallVector<const Expr *, 4> Privates;
977   SmallVector<const Expr *, 4> ReductionOps;
978   SmallVector<const Expr *, 4> LHSs;
979   SmallVector<const Expr *, 4> RHSs;
980   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
981     auto IPriv = C->privates().begin();
982     auto IRed = C->reduction_ops().begin();
983     auto ILHS = C->lhs_exprs().begin();
984     auto IRHS = C->rhs_exprs().begin();
985     for (const auto *Ref : C->varlists()) {
986       Shareds.emplace_back(Ref);
987       Privates.emplace_back(*IPriv);
988       ReductionOps.emplace_back(*IRed);
989       LHSs.emplace_back(*ILHS);
990       RHSs.emplace_back(*IRHS);
991       std::advance(IPriv, 1);
992       std::advance(IRed, 1);
993       std::advance(ILHS, 1);
994       std::advance(IRHS, 1);
995     }
996   }
997   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
998   unsigned Count = 0;
999   auto ILHS = LHSs.begin();
1000   auto IRHS = RHSs.begin();
1001   auto IPriv = Privates.begin();
1002   for (const auto *IRef : Shareds) {
1003     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1004     // Emit private VarDecl with reduction init.
1005     RedCG.emitSharedLValue(*this, Count);
1006     RedCG.emitAggregateType(*this, Count);
1007     auto Emission = EmitAutoVarAlloca(*PrivateVD);
1008     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1009                              RedCG.getSharedLValue(Count),
1010                              [&Emission](CodeGenFunction &CGF) {
1011                                CGF.EmitAutoVarInit(Emission);
1012                                return true;
1013                              });
1014     EmitAutoVarCleanups(Emission);
1015     Address BaseAddr = RedCG.adjustPrivateAddress(
1016         *this, Count, Emission.getAllocatedAddress());
1017     bool IsRegistered = PrivateScope.addPrivate(
1018         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
1019     assert(IsRegistered && "private var already registered as private");
1020     // Silence the warning about unused variable.
1021     (void)IsRegistered;
1022 
1023     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1024     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1025     QualType Type = PrivateVD->getType();
1026     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1027     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1028       // Store the address of the original variable associated with the LHS
1029       // implicit variable.
1030       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1031         return RedCG.getSharedLValue(Count).getAddress();
1032       });
1033       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1034         return GetAddrOfLocalVar(PrivateVD);
1035       });
1036     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1037                isa<ArraySubscriptExpr>(IRef)) {
1038       // Store the address of the original variable associated with the LHS
1039       // implicit variable.
1040       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1041         return RedCG.getSharedLValue(Count).getAddress();
1042       });
1043       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1044         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1045                                             ConvertTypeForMem(RHSVD->getType()),
1046                                             "rhs.begin");
1047       });
1048     } else {
1049       QualType Type = PrivateVD->getType();
1050       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1051       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1052       // Store the address of the original variable associated with the LHS
1053       // implicit variable.
1054       if (IsArray) {
1055         OriginalAddr = Builder.CreateElementBitCast(
1056             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1057       }
1058       PrivateScope.addPrivate(
1059           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1060       PrivateScope.addPrivate(
1061           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1062             return IsArray
1063                        ? Builder.CreateElementBitCast(
1064                              GetAddrOfLocalVar(PrivateVD),
1065                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1066                        : GetAddrOfLocalVar(PrivateVD);
1067           });
1068     }
1069     ++ILHS;
1070     ++IRHS;
1071     ++IPriv;
1072     ++Count;
1073   }
1074 }
1075 
1076 void CodeGenFunction::EmitOMPReductionClauseFinal(
1077     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1078   if (!HaveInsertPoint())
1079     return;
1080   llvm::SmallVector<const Expr *, 8> Privates;
1081   llvm::SmallVector<const Expr *, 8> LHSExprs;
1082   llvm::SmallVector<const Expr *, 8> RHSExprs;
1083   llvm::SmallVector<const Expr *, 8> ReductionOps;
1084   bool HasAtLeastOneReduction = false;
1085   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1086     HasAtLeastOneReduction = true;
1087     Privates.append(C->privates().begin(), C->privates().end());
1088     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1089     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1090     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1091   }
1092   if (HasAtLeastOneReduction) {
1093     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1094                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1095                       D.getDirectiveKind() == OMPD_simd;
1096     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1097     // Emit nowait reduction if nowait clause is present or directive is a
1098     // parallel directive (it always has implicit barrier).
1099     CGM.getOpenMPRuntime().emitReduction(
1100         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1101         {WithNowait, SimpleReduction, ReductionKind});
1102   }
1103 }
1104 
1105 static void emitPostUpdateForReductionClause(
1106     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1107     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1108   if (!CGF.HaveInsertPoint())
1109     return;
1110   llvm::BasicBlock *DoneBB = nullptr;
1111   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1112     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1113       if (!DoneBB) {
1114         if (auto *Cond = CondGen(CGF)) {
1115           // If the first post-update expression is found, emit conditional
1116           // block if it was requested.
1117           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1118           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1119           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1120           CGF.EmitBlock(ThenBB);
1121         }
1122       }
1123       CGF.EmitIgnoredExpr(PostUpdate);
1124     }
1125   }
1126   if (DoneBB)
1127     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1128 }
1129 
1130 namespace {
1131 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1132 /// parallel function. This is necessary for combined constructs such as
1133 /// 'distribute parallel for'
1134 typedef llvm::function_ref<void(CodeGenFunction &,
1135                                 const OMPExecutableDirective &,
1136                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1137     CodeGenBoundParametersTy;
1138 } // anonymous namespace
1139 
1140 static void emitCommonOMPParallelDirective(
1141     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1142     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1143     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1144   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1145   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1146       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1147   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1148     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1149     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1150                                          /*IgnoreResultAssign*/ true);
1151     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1152         CGF, NumThreads, NumThreadsClause->getLocStart());
1153   }
1154   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1155     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1156     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1157         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1158   }
1159   const Expr *IfCond = nullptr;
1160   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1161     if (C->getNameModifier() == OMPD_unknown ||
1162         C->getNameModifier() == OMPD_parallel) {
1163       IfCond = C->getCondition();
1164       break;
1165     }
1166   }
1167 
1168   OMPParallelScope Scope(CGF, S);
1169   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1170   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1171   // lower and upper bounds with the pragma 'for' chunking mechanism.
1172   // The following lambda takes care of appending the lower and upper bound
1173   // parameters when necessary
1174   CodeGenBoundParameters(CGF, S, CapturedVars);
1175   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1176   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1177                                               CapturedVars, IfCond);
1178 }
1179 
1180 static void emitEmptyBoundParameters(CodeGenFunction &,
1181                                      const OMPExecutableDirective &,
1182                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1183 
1184 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1185   // Emit parallel region as a standalone region.
1186   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1187     OMPPrivateScope PrivateScope(CGF);
1188     bool Copyins = CGF.EmitOMPCopyinClause(S);
1189     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1190     if (Copyins) {
1191       // Emit implicit barrier to synchronize threads and avoid data races on
1192       // propagation master's thread values of threadprivate variables to local
1193       // instances of that variables of all other implicit threads.
1194       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1195           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1196           /*ForceSimpleCall=*/true);
1197     }
1198     CGF.EmitOMPPrivateClause(S, PrivateScope);
1199     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1200     (void)PrivateScope.Privatize();
1201     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1202     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1203   };
1204   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1205                                  emitEmptyBoundParameters);
1206   emitPostUpdateForReductionClause(
1207       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1208 }
1209 
1210 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1211                                       JumpDest LoopExit) {
1212   RunCleanupsScope BodyScope(*this);
1213   // Update counters values on current iteration.
1214   for (auto I : D.updates()) {
1215     EmitIgnoredExpr(I);
1216   }
1217   // Update the linear variables.
1218   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1219     for (auto *U : C->updates())
1220       EmitIgnoredExpr(U);
1221   }
1222 
1223   // On a continue in the body, jump to the end.
1224   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1225   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1226   // Emit loop body.
1227   EmitStmt(D.getBody());
1228   // The end (updates/cleanups).
1229   EmitBlock(Continue.getBlock());
1230   BreakContinueStack.pop_back();
1231 }
1232 
1233 void CodeGenFunction::EmitOMPInnerLoop(
1234     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1235     const Expr *IncExpr,
1236     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1237     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1238   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1239 
1240   // Start the loop with a block that tests the condition.
1241   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1242   EmitBlock(CondBlock);
1243   const SourceRange &R = S.getSourceRange();
1244   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1245                  SourceLocToDebugLoc(R.getEnd()));
1246 
1247   // If there are any cleanups between here and the loop-exit scope,
1248   // create a block to stage a loop exit along.
1249   auto ExitBlock = LoopExit.getBlock();
1250   if (RequiresCleanup)
1251     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1252 
1253   auto LoopBody = createBasicBlock("omp.inner.for.body");
1254 
1255   // Emit condition.
1256   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1257   if (ExitBlock != LoopExit.getBlock()) {
1258     EmitBlock(ExitBlock);
1259     EmitBranchThroughCleanup(LoopExit);
1260   }
1261 
1262   EmitBlock(LoopBody);
1263   incrementProfileCounter(&S);
1264 
1265   // Create a block for the increment.
1266   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1267   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1268 
1269   BodyGen(*this);
1270 
1271   // Emit "IV = IV + 1" and a back-edge to the condition block.
1272   EmitBlock(Continue.getBlock());
1273   EmitIgnoredExpr(IncExpr);
1274   PostIncGen(*this);
1275   BreakContinueStack.pop_back();
1276   EmitBranch(CondBlock);
1277   LoopStack.pop();
1278   // Emit the fall-through block.
1279   EmitBlock(LoopExit.getBlock());
1280 }
1281 
1282 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1283   if (!HaveInsertPoint())
1284     return false;
1285   // Emit inits for the linear variables.
1286   bool HasLinears = false;
1287   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1288     for (auto *Init : C->inits()) {
1289       HasLinears = true;
1290       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1291       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1292         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1293         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1294         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1295                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1296                         VD->getInit()->getType(), VK_LValue,
1297                         VD->getInit()->getExprLoc());
1298         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1299                                                 VD->getType()),
1300                        /*capturedByInit=*/false);
1301         EmitAutoVarCleanups(Emission);
1302       } else
1303         EmitVarDecl(*VD);
1304     }
1305     // Emit the linear steps for the linear clauses.
1306     // If a step is not constant, it is pre-calculated before the loop.
1307     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1308       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1309         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1310         // Emit calculation of the linear step.
1311         EmitIgnoredExpr(CS);
1312       }
1313   }
1314   return HasLinears;
1315 }
1316 
1317 void CodeGenFunction::EmitOMPLinearClauseFinal(
1318     const OMPLoopDirective &D,
1319     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1320   if (!HaveInsertPoint())
1321     return;
1322   llvm::BasicBlock *DoneBB = nullptr;
1323   // Emit the final values of the linear variables.
1324   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1325     auto IC = C->varlist_begin();
1326     for (auto *F : C->finals()) {
1327       if (!DoneBB) {
1328         if (auto *Cond = CondGen(*this)) {
1329           // If the first post-update expression is found, emit conditional
1330           // block if it was requested.
1331           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1332           DoneBB = createBasicBlock(".omp.linear.pu.done");
1333           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1334           EmitBlock(ThenBB);
1335         }
1336       }
1337       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1338       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1339                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1340                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1341       Address OrigAddr = EmitLValue(&DRE).getAddress();
1342       CodeGenFunction::OMPPrivateScope VarScope(*this);
1343       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1344       (void)VarScope.Privatize();
1345       EmitIgnoredExpr(F);
1346       ++IC;
1347     }
1348     if (auto *PostUpdate = C->getPostUpdateExpr())
1349       EmitIgnoredExpr(PostUpdate);
1350   }
1351   if (DoneBB)
1352     EmitBlock(DoneBB, /*IsFinished=*/true);
1353 }
1354 
1355 static void emitAlignedClause(CodeGenFunction &CGF,
1356                               const OMPExecutableDirective &D) {
1357   if (!CGF.HaveInsertPoint())
1358     return;
1359   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1360     unsigned ClauseAlignment = 0;
1361     if (auto AlignmentExpr = Clause->getAlignment()) {
1362       auto AlignmentCI =
1363           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1364       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1365     }
1366     for (auto E : Clause->varlists()) {
1367       unsigned Alignment = ClauseAlignment;
1368       if (Alignment == 0) {
1369         // OpenMP [2.8.1, Description]
1370         // If no optional parameter is specified, implementation-defined default
1371         // alignments for SIMD instructions on the target platforms are assumed.
1372         Alignment =
1373             CGF.getContext()
1374                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1375                     E->getType()->getPointeeType()))
1376                 .getQuantity();
1377       }
1378       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1379              "alignment is not power of 2");
1380       if (Alignment != 0) {
1381         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1382         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1383       }
1384     }
1385   }
1386 }
1387 
1388 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1389     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1390   if (!HaveInsertPoint())
1391     return;
1392   auto I = S.private_counters().begin();
1393   for (auto *E : S.counters()) {
1394     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1395     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1396     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1397       // Emit var without initialization.
1398       if (!LocalDeclMap.count(PrivateVD)) {
1399         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1400         EmitAutoVarCleanups(VarEmission);
1401       }
1402       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1403                       /*RefersToEnclosingVariableOrCapture=*/false,
1404                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1405       return EmitLValue(&DRE).getAddress();
1406     });
1407     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1408         VD->hasGlobalStorage()) {
1409       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1410         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1411                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1412                         E->getType(), VK_LValue, E->getExprLoc());
1413         return EmitLValue(&DRE).getAddress();
1414       });
1415     }
1416     ++I;
1417   }
1418 }
1419 
1420 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1421                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1422                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1423   if (!CGF.HaveInsertPoint())
1424     return;
1425   {
1426     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1427     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1428     (void)PreCondScope.Privatize();
1429     // Get initial values of real counters.
1430     for (auto I : S.inits()) {
1431       CGF.EmitIgnoredExpr(I);
1432     }
1433   }
1434   // Check that loop is executed at least one time.
1435   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1436 }
1437 
1438 void CodeGenFunction::EmitOMPLinearClause(
1439     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1440   if (!HaveInsertPoint())
1441     return;
1442   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1443   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1444     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1445     for (auto *C : LoopDirective->counters()) {
1446       SIMDLCVs.insert(
1447           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1448     }
1449   }
1450   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1451     auto CurPrivate = C->privates().begin();
1452     for (auto *E : C->varlists()) {
1453       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1454       auto *PrivateVD =
1455           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1456       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1457         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1458           // Emit private VarDecl with copy init.
1459           EmitVarDecl(*PrivateVD);
1460           return GetAddrOfLocalVar(PrivateVD);
1461         });
1462         assert(IsRegistered && "linear var already registered as private");
1463         // Silence the warning about unused variable.
1464         (void)IsRegistered;
1465       } else
1466         EmitVarDecl(*PrivateVD);
1467       ++CurPrivate;
1468     }
1469   }
1470 }
1471 
1472 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1473                                      const OMPExecutableDirective &D,
1474                                      bool IsMonotonic) {
1475   if (!CGF.HaveInsertPoint())
1476     return;
1477   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1478     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1479                                  /*ignoreResult=*/true);
1480     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1481     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1482     // In presence of finite 'safelen', it may be unsafe to mark all
1483     // the memory instructions parallel, because loop-carried
1484     // dependences of 'safelen' iterations are possible.
1485     if (!IsMonotonic)
1486       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1487   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1488     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1489                                  /*ignoreResult=*/true);
1490     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1491     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1492     // In presence of finite 'safelen', it may be unsafe to mark all
1493     // the memory instructions parallel, because loop-carried
1494     // dependences of 'safelen' iterations are possible.
1495     CGF.LoopStack.setParallel(false);
1496   }
1497 }
1498 
1499 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1500                                       bool IsMonotonic) {
1501   // Walk clauses and process safelen/lastprivate.
1502   LoopStack.setParallel(!IsMonotonic);
1503   LoopStack.setVectorizeEnable(true);
1504   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1505 }
1506 
1507 void CodeGenFunction::EmitOMPSimdFinal(
1508     const OMPLoopDirective &D,
1509     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1510   if (!HaveInsertPoint())
1511     return;
1512   llvm::BasicBlock *DoneBB = nullptr;
1513   auto IC = D.counters().begin();
1514   auto IPC = D.private_counters().begin();
1515   for (auto F : D.finals()) {
1516     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1517     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1518     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1519     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1520         OrigVD->hasGlobalStorage() || CED) {
1521       if (!DoneBB) {
1522         if (auto *Cond = CondGen(*this)) {
1523           // If the first post-update expression is found, emit conditional
1524           // block if it was requested.
1525           auto *ThenBB = createBasicBlock(".omp.final.then");
1526           DoneBB = createBasicBlock(".omp.final.done");
1527           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1528           EmitBlock(ThenBB);
1529         }
1530       }
1531       Address OrigAddr = Address::invalid();
1532       if (CED)
1533         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1534       else {
1535         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1536                         /*RefersToEnclosingVariableOrCapture=*/false,
1537                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1538         OrigAddr = EmitLValue(&DRE).getAddress();
1539       }
1540       OMPPrivateScope VarScope(*this);
1541       VarScope.addPrivate(OrigVD,
1542                           [OrigAddr]() -> Address { return OrigAddr; });
1543       (void)VarScope.Privatize();
1544       EmitIgnoredExpr(F);
1545     }
1546     ++IC;
1547     ++IPC;
1548   }
1549   if (DoneBB)
1550     EmitBlock(DoneBB, /*IsFinished=*/true);
1551 }
1552 
1553 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1554                                          const OMPLoopDirective &S,
1555                                          CodeGenFunction::JumpDest LoopExit) {
1556   CGF.EmitOMPLoopBody(S, LoopExit);
1557   CGF.EmitStopPoint(&S);
1558 }
1559 
1560 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1561                               PrePostActionTy &Action) {
1562   Action.Enter(CGF);
1563   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1564          "Expected simd directive");
1565   OMPLoopScope PreInitScope(CGF, S);
1566   // if (PreCond) {
1567   //   for (IV in 0..LastIteration) BODY;
1568   //   <Final counter/linear vars updates>;
1569   // }
1570   //
1571 
1572   // Emit: if (PreCond) - begin.
1573   // If the condition constant folds and can be elided, avoid emitting the
1574   // whole loop.
1575   bool CondConstant;
1576   llvm::BasicBlock *ContBlock = nullptr;
1577   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1578     if (!CondConstant)
1579       return;
1580   } else {
1581     auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1582     ContBlock = CGF.createBasicBlock("simd.if.end");
1583     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1584                 CGF.getProfileCount(&S));
1585     CGF.EmitBlock(ThenBlock);
1586     CGF.incrementProfileCounter(&S);
1587   }
1588 
1589   // Emit the loop iteration variable.
1590   const Expr *IVExpr = S.getIterationVariable();
1591   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1592   CGF.EmitVarDecl(*IVDecl);
1593   CGF.EmitIgnoredExpr(S.getInit());
1594 
1595   // Emit the iterations count variable.
1596   // If it is not a variable, Sema decided to calculate iterations count on
1597   // each iteration (e.g., it is foldable into a constant).
1598   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1599     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1600     // Emit calculation of the iterations count.
1601     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1602   }
1603 
1604   CGF.EmitOMPSimdInit(S);
1605 
1606   emitAlignedClause(CGF, S);
1607   (void)CGF.EmitOMPLinearClauseInit(S);
1608   {
1609     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1610     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1611     CGF.EmitOMPLinearClause(S, LoopScope);
1612     CGF.EmitOMPPrivateClause(S, LoopScope);
1613     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1614     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1615     (void)LoopScope.Privatize();
1616     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1617                          S.getInc(),
1618                          [&S](CodeGenFunction &CGF) {
1619                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1620                            CGF.EmitStopPoint(&S);
1621                          },
1622                          [](CodeGenFunction &) {});
1623     CGF.EmitOMPSimdFinal(
1624         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1625     // Emit final copy of the lastprivate variables at the end of loops.
1626     if (HasLastprivateClause)
1627       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1628     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1629     emitPostUpdateForReductionClause(
1630         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1631   }
1632   CGF.EmitOMPLinearClauseFinal(
1633       S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1634   // Emit: if (PreCond) - end.
1635   if (ContBlock) {
1636     CGF.EmitBranch(ContBlock);
1637     CGF.EmitBlock(ContBlock, true);
1638   }
1639 }
1640 
1641 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1642   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1643     emitOMPSimdRegion(CGF, S, Action);
1644   };
1645   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1646   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1647 }
1648 
1649 void CodeGenFunction::EmitOMPOuterLoop(
1650     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1651     CodeGenFunction::OMPPrivateScope &LoopScope,
1652     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1653     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1654     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1655   auto &RT = CGM.getOpenMPRuntime();
1656 
1657   const Expr *IVExpr = S.getIterationVariable();
1658   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1659   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1660 
1661   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1662 
1663   // Start the loop with a block that tests the condition.
1664   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1665   EmitBlock(CondBlock);
1666   const SourceRange &R = S.getSourceRange();
1667   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1668                  SourceLocToDebugLoc(R.getEnd()));
1669 
1670   llvm::Value *BoolCondVal = nullptr;
1671   if (!DynamicOrOrdered) {
1672     // UB = min(UB, GlobalUB) or
1673     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1674     // 'distribute parallel for')
1675     EmitIgnoredExpr(LoopArgs.EUB);
1676     // IV = LB
1677     EmitIgnoredExpr(LoopArgs.Init);
1678     // IV < UB
1679     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1680   } else {
1681     BoolCondVal =
1682         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1683                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1684   }
1685 
1686   // If there are any cleanups between here and the loop-exit scope,
1687   // create a block to stage a loop exit along.
1688   auto ExitBlock = LoopExit.getBlock();
1689   if (LoopScope.requiresCleanups())
1690     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1691 
1692   auto LoopBody = createBasicBlock("omp.dispatch.body");
1693   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1694   if (ExitBlock != LoopExit.getBlock()) {
1695     EmitBlock(ExitBlock);
1696     EmitBranchThroughCleanup(LoopExit);
1697   }
1698   EmitBlock(LoopBody);
1699 
1700   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1701   // LB for loop condition and emitted it above).
1702   if (DynamicOrOrdered)
1703     EmitIgnoredExpr(LoopArgs.Init);
1704 
1705   // Create a block for the increment.
1706   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1707   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1708 
1709   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1710   // with dynamic/guided scheduling and without ordered clause.
1711   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1712     LoopStack.setParallel(!IsMonotonic);
1713   else
1714     EmitOMPSimdInit(S, IsMonotonic);
1715 
1716   SourceLocation Loc = S.getLocStart();
1717 
1718   // when 'distribute' is not combined with a 'for':
1719   // while (idx <= UB) { BODY; ++idx; }
1720   // when 'distribute' is combined with a 'for'
1721   // (e.g. 'distribute parallel for')
1722   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1723   EmitOMPInnerLoop(
1724       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1725       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1726         CodeGenLoop(CGF, S, LoopExit);
1727       },
1728       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1729         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1730       });
1731 
1732   EmitBlock(Continue.getBlock());
1733   BreakContinueStack.pop_back();
1734   if (!DynamicOrOrdered) {
1735     // Emit "LB = LB + Stride", "UB = UB + Stride".
1736     EmitIgnoredExpr(LoopArgs.NextLB);
1737     EmitIgnoredExpr(LoopArgs.NextUB);
1738   }
1739 
1740   EmitBranch(CondBlock);
1741   LoopStack.pop();
1742   // Emit the fall-through block.
1743   EmitBlock(LoopExit.getBlock());
1744 
1745   // Tell the runtime we are done.
1746   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1747     if (!DynamicOrOrdered)
1748       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1749                                                      S.getDirectiveKind());
1750   };
1751   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1752 }
1753 
1754 void CodeGenFunction::EmitOMPForOuterLoop(
1755     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1756     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1757     const OMPLoopArguments &LoopArgs,
1758     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1759   auto &RT = CGM.getOpenMPRuntime();
1760 
1761   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1762   const bool DynamicOrOrdered =
1763       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1764 
1765   assert((Ordered ||
1766           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1767                                  LoopArgs.Chunk != nullptr)) &&
1768          "static non-chunked schedule does not need outer loop");
1769 
1770   // Emit outer loop.
1771   //
1772   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1773   // When schedule(dynamic,chunk_size) is specified, the iterations are
1774   // distributed to threads in the team in chunks as the threads request them.
1775   // Each thread executes a chunk of iterations, then requests another chunk,
1776   // until no chunks remain to be distributed. Each chunk contains chunk_size
1777   // iterations, except for the last chunk to be distributed, which may have
1778   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1779   //
1780   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1781   // to threads in the team in chunks as the executing threads request them.
1782   // Each thread executes a chunk of iterations, then requests another chunk,
1783   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1784   // each chunk is proportional to the number of unassigned iterations divided
1785   // by the number of threads in the team, decreasing to 1. For a chunk_size
1786   // with value k (greater than 1), the size of each chunk is determined in the
1787   // same way, with the restriction that the chunks do not contain fewer than k
1788   // iterations (except for the last chunk to be assigned, which may have fewer
1789   // than k iterations).
1790   //
1791   // When schedule(auto) is specified, the decision regarding scheduling is
1792   // delegated to the compiler and/or runtime system. The programmer gives the
1793   // implementation the freedom to choose any possible mapping of iterations to
1794   // threads in the team.
1795   //
1796   // When schedule(runtime) is specified, the decision regarding scheduling is
1797   // deferred until run time, and the schedule and chunk size are taken from the
1798   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1799   // implementation defined
1800   //
1801   // while(__kmpc_dispatch_next(&LB, &UB)) {
1802   //   idx = LB;
1803   //   while (idx <= UB) { BODY; ++idx;
1804   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1805   //   } // inner loop
1806   // }
1807   //
1808   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1809   // When schedule(static, chunk_size) is specified, iterations are divided into
1810   // chunks of size chunk_size, and the chunks are assigned to the threads in
1811   // the team in a round-robin fashion in the order of the thread number.
1812   //
1813   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1814   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1815   //   LB = LB + ST;
1816   //   UB = UB + ST;
1817   // }
1818   //
1819 
1820   const Expr *IVExpr = S.getIterationVariable();
1821   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1822   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1823 
1824   if (DynamicOrOrdered) {
1825     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1826     llvm::Value *LBVal = DispatchBounds.first;
1827     llvm::Value *UBVal = DispatchBounds.second;
1828     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1829                                                              LoopArgs.Chunk};
1830     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1831                            IVSigned, Ordered, DipatchRTInputValues);
1832   } else {
1833     CGOpenMPRuntime::StaticRTInput StaticInit(
1834         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1835         LoopArgs.ST, LoopArgs.Chunk);
1836     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1837                          ScheduleKind, StaticInit);
1838   }
1839 
1840   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1841                                     const unsigned IVSize,
1842                                     const bool IVSigned) {
1843     if (Ordered) {
1844       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1845                                                             IVSigned);
1846     }
1847   };
1848 
1849   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1850                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1851   OuterLoopArgs.IncExpr = S.getInc();
1852   OuterLoopArgs.Init = S.getInit();
1853   OuterLoopArgs.Cond = S.getCond();
1854   OuterLoopArgs.NextLB = S.getNextLowerBound();
1855   OuterLoopArgs.NextUB = S.getNextUpperBound();
1856   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1857                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1858 }
1859 
1860 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1861                              const unsigned IVSize, const bool IVSigned) {}
1862 
1863 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1864     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1865     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1866     const CodeGenLoopTy &CodeGenLoopContent) {
1867 
1868   auto &RT = CGM.getOpenMPRuntime();
1869 
1870   // Emit outer loop.
1871   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1872   // dynamic
1873   //
1874 
1875   const Expr *IVExpr = S.getIterationVariable();
1876   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1877   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1878 
1879   CGOpenMPRuntime::StaticRTInput StaticInit(
1880       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1881       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1882   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1883 
1884   // for combined 'distribute' and 'for' the increment expression of distribute
1885   // is store in DistInc. For 'distribute' alone, it is in Inc.
1886   Expr *IncExpr;
1887   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1888     IncExpr = S.getDistInc();
1889   else
1890     IncExpr = S.getInc();
1891 
1892   // this routine is shared by 'omp distribute parallel for' and
1893   // 'omp distribute': select the right EUB expression depending on the
1894   // directive
1895   OMPLoopArguments OuterLoopArgs;
1896   OuterLoopArgs.LB = LoopArgs.LB;
1897   OuterLoopArgs.UB = LoopArgs.UB;
1898   OuterLoopArgs.ST = LoopArgs.ST;
1899   OuterLoopArgs.IL = LoopArgs.IL;
1900   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1901   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1902                           ? S.getCombinedEnsureUpperBound()
1903                           : S.getEnsureUpperBound();
1904   OuterLoopArgs.IncExpr = IncExpr;
1905   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1906                            ? S.getCombinedInit()
1907                            : S.getInit();
1908   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1909                            ? S.getCombinedCond()
1910                            : S.getCond();
1911   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1912                              ? S.getCombinedNextLowerBound()
1913                              : S.getNextLowerBound();
1914   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1915                              ? S.getCombinedNextUpperBound()
1916                              : S.getNextUpperBound();
1917 
1918   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1919                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1920                    emitEmptyOrdered);
1921 }
1922 
1923 /// Emit a helper variable and return corresponding lvalue.
1924 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1925                                const DeclRefExpr *Helper) {
1926   auto VDecl = cast<VarDecl>(Helper->getDecl());
1927   CGF.EmitVarDecl(*VDecl);
1928   return CGF.EmitLValue(Helper);
1929 }
1930 
1931 static std::pair<LValue, LValue>
1932 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1933                                      const OMPExecutableDirective &S) {
1934   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1935   LValue LB =
1936       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1937   LValue UB =
1938       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1939 
1940   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1941   // parallel for') we need to use the 'distribute'
1942   // chunk lower and upper bounds rather than the whole loop iteration
1943   // space. These are parameters to the outlined function for 'parallel'
1944   // and we copy the bounds of the previous schedule into the
1945   // the current ones.
1946   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1947   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1948   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1949   PrevLBVal = CGF.EmitScalarConversion(
1950       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1951       LS.getIterationVariable()->getType(), SourceLocation());
1952   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1953   PrevUBVal = CGF.EmitScalarConversion(
1954       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1955       LS.getIterationVariable()->getType(), SourceLocation());
1956 
1957   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1958   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1959 
1960   return {LB, UB};
1961 }
1962 
1963 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1964 /// we need to use the LB and UB expressions generated by the worksharing
1965 /// code generation support, whereas in non combined situations we would
1966 /// just emit 0 and the LastIteration expression
1967 /// This function is necessary due to the difference of the LB and UB
1968 /// types for the RT emission routines for 'for_static_init' and
1969 /// 'for_dispatch_init'
1970 static std::pair<llvm::Value *, llvm::Value *>
1971 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1972                                         const OMPExecutableDirective &S,
1973                                         Address LB, Address UB) {
1974   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1975   const Expr *IVExpr = LS.getIterationVariable();
1976   // when implementing a dynamic schedule for a 'for' combined with a
1977   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1978   // is not normalized as each team only executes its own assigned
1979   // distribute chunk
1980   QualType IteratorTy = IVExpr->getType();
1981   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1982                                             SourceLocation());
1983   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1984                                             SourceLocation());
1985   return {LBVal, UBVal};
1986 }
1987 
1988 static void emitDistributeParallelForDistributeInnerBoundParams(
1989     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1990     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1991   const auto &Dir = cast<OMPLoopDirective>(S);
1992   LValue LB =
1993       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1994   auto LBCast = CGF.Builder.CreateIntCast(
1995       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1996   CapturedVars.push_back(LBCast);
1997   LValue UB =
1998       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1999 
2000   auto UBCast = CGF.Builder.CreateIntCast(
2001       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2002   CapturedVars.push_back(UBCast);
2003 }
2004 
2005 static void
2006 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2007                                  const OMPLoopDirective &S,
2008                                  CodeGenFunction::JumpDest LoopExit) {
2009   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2010                                          PrePostActionTy &) {
2011     bool HasCancel = false;
2012     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2013       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2014         HasCancel = D->hasCancel();
2015       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2016         HasCancel = D->hasCancel();
2017       else if (const auto *D =
2018                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2019         HasCancel = D->hasCancel();
2020     }
2021     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2022                                                      HasCancel);
2023     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2024                                emitDistributeParallelForInnerBounds,
2025                                emitDistributeParallelForDispatchBounds);
2026   };
2027 
2028   emitCommonOMPParallelDirective(
2029       CGF, S,
2030       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2031       CGInlinedWorksharingLoop,
2032       emitDistributeParallelForDistributeInnerBoundParams);
2033 }
2034 
2035 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2036     const OMPDistributeParallelForDirective &S) {
2037   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2038     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2039                               S.getDistInc());
2040   };
2041   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2042   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2043 }
2044 
2045 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2046     const OMPDistributeParallelForSimdDirective &S) {
2047   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2048     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2049                               S.getDistInc());
2050   };
2051   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2052   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2053 }
2054 
2055 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2056     const OMPDistributeSimdDirective &S) {
2057   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2058   CGM.getOpenMPRuntime().emitInlinedDirective(
2059       *this, OMPD_distribute_simd,
2060       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2061         OMPLoopScope PreInitScope(CGF, S);
2062         CGF.EmitStmt(
2063             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2064       });
2065 }
2066 
2067 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2068     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2069   // Emit SPMD target parallel for region as a standalone region.
2070   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2071     emitOMPSimdRegion(CGF, S, Action);
2072   };
2073   llvm::Function *Fn;
2074   llvm::Constant *Addr;
2075   // Emit target region as a standalone region.
2076   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2077       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2078   assert(Fn && Addr && "Target device function emission failed.");
2079 }
2080 
2081 void CodeGenFunction::EmitOMPTargetSimdDirective(
2082     const OMPTargetSimdDirective &S) {
2083   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2084     emitOMPSimdRegion(CGF, S, Action);
2085   };
2086   emitCommonOMPTargetDirective(*this, S, CodeGen);
2087 }
2088 
2089 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2090     const OMPTeamsDistributeSimdDirective &S) {
2091   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2092   CGM.getOpenMPRuntime().emitInlinedDirective(
2093       *this, OMPD_teams_distribute_simd,
2094       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2095         OMPLoopScope PreInitScope(CGF, S);
2096         CGF.EmitStmt(
2097             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2098       });
2099 }
2100 
2101 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2102     const OMPTeamsDistributeParallelForSimdDirective &S) {
2103   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2104   CGM.getOpenMPRuntime().emitInlinedDirective(
2105       *this, OMPD_teams_distribute_parallel_for_simd,
2106       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2107         OMPLoopScope PreInitScope(CGF, S);
2108         CGF.EmitStmt(
2109             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2110       });
2111 }
2112 
2113 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2114     const OMPTargetTeamsDistributeDirective &S) {
2115   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2116   CGM.getOpenMPRuntime().emitInlinedDirective(
2117       *this, OMPD_target_teams_distribute,
2118       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2119         CGF.EmitStmt(
2120             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2121       });
2122 }
2123 
2124 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2125     const OMPTargetTeamsDistributeParallelForDirective &S) {
2126   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2127   CGM.getOpenMPRuntime().emitInlinedDirective(
2128       *this, OMPD_target_teams_distribute_parallel_for,
2129       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2130         CGF.EmitStmt(
2131             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2132       });
2133 }
2134 
2135 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2136     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2137   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2138   CGM.getOpenMPRuntime().emitInlinedDirective(
2139       *this, OMPD_target_teams_distribute_parallel_for_simd,
2140       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2141         CGF.EmitStmt(
2142             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2143       });
2144 }
2145 
2146 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2147     const OMPTargetTeamsDistributeSimdDirective &S) {
2148   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2149   CGM.getOpenMPRuntime().emitInlinedDirective(
2150       *this, OMPD_target_teams_distribute_simd,
2151       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2152         CGF.EmitStmt(
2153             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2154       });
2155 }
2156 
2157 namespace {
2158   struct ScheduleKindModifiersTy {
2159     OpenMPScheduleClauseKind Kind;
2160     OpenMPScheduleClauseModifier M1;
2161     OpenMPScheduleClauseModifier M2;
2162     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2163                             OpenMPScheduleClauseModifier M1,
2164                             OpenMPScheduleClauseModifier M2)
2165         : Kind(Kind), M1(M1), M2(M2) {}
2166   };
2167 } // namespace
2168 
2169 bool CodeGenFunction::EmitOMPWorksharingLoop(
2170     const OMPLoopDirective &S, Expr *EUB,
2171     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2172     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2173   // Emit the loop iteration variable.
2174   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2175   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2176   EmitVarDecl(*IVDecl);
2177 
2178   // Emit the iterations count variable.
2179   // If it is not a variable, Sema decided to calculate iterations count on each
2180   // iteration (e.g., it is foldable into a constant).
2181   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2182     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2183     // Emit calculation of the iterations count.
2184     EmitIgnoredExpr(S.getCalcLastIteration());
2185   }
2186 
2187   auto &RT = CGM.getOpenMPRuntime();
2188 
2189   bool HasLastprivateClause;
2190   // Check pre-condition.
2191   {
2192     OMPLoopScope PreInitScope(*this, S);
2193     // Skip the entire loop if we don't meet the precondition.
2194     // If the condition constant folds and can be elided, avoid emitting the
2195     // whole loop.
2196     bool CondConstant;
2197     llvm::BasicBlock *ContBlock = nullptr;
2198     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2199       if (!CondConstant)
2200         return false;
2201     } else {
2202       auto *ThenBlock = createBasicBlock("omp.precond.then");
2203       ContBlock = createBasicBlock("omp.precond.end");
2204       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2205                   getProfileCount(&S));
2206       EmitBlock(ThenBlock);
2207       incrementProfileCounter(&S);
2208     }
2209 
2210     bool Ordered = false;
2211     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2212       if (OrderedClause->getNumForLoops())
2213         RT.emitDoacrossInit(*this, S);
2214       else
2215         Ordered = true;
2216     }
2217 
2218     llvm::DenseSet<const Expr *> EmittedFinals;
2219     emitAlignedClause(*this, S);
2220     bool HasLinears = EmitOMPLinearClauseInit(S);
2221     // Emit helper vars inits.
2222 
2223     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2224     LValue LB = Bounds.first;
2225     LValue UB = Bounds.second;
2226     LValue ST =
2227         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2228     LValue IL =
2229         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2230 
2231     // Emit 'then' code.
2232     {
2233       OMPPrivateScope LoopScope(*this);
2234       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2235         // Emit implicit barrier to synchronize threads and avoid data races on
2236         // initialization of firstprivate variables and post-update of
2237         // lastprivate variables.
2238         CGM.getOpenMPRuntime().emitBarrierCall(
2239             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2240             /*ForceSimpleCall=*/true);
2241       }
2242       EmitOMPPrivateClause(S, LoopScope);
2243       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2244       EmitOMPReductionClauseInit(S, LoopScope);
2245       EmitOMPPrivateLoopCounters(S, LoopScope);
2246       EmitOMPLinearClause(S, LoopScope);
2247       (void)LoopScope.Privatize();
2248 
2249       // Detect the loop schedule kind and chunk.
2250       llvm::Value *Chunk = nullptr;
2251       OpenMPScheduleTy ScheduleKind;
2252       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2253         ScheduleKind.Schedule = C->getScheduleKind();
2254         ScheduleKind.M1 = C->getFirstScheduleModifier();
2255         ScheduleKind.M2 = C->getSecondScheduleModifier();
2256         if (const auto *Ch = C->getChunkSize()) {
2257           Chunk = EmitScalarExpr(Ch);
2258           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2259                                        S.getIterationVariable()->getType(),
2260                                        S.getLocStart());
2261         }
2262       }
2263       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2264       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2265       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2266       // If the static schedule kind is specified or if the ordered clause is
2267       // specified, and if no monotonic modifier is specified, the effect will
2268       // be as if the monotonic modifier was specified.
2269       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2270                                 /* Chunked */ Chunk != nullptr) &&
2271           !Ordered) {
2272         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2273           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2274         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2275         // When no chunk_size is specified, the iteration space is divided into
2276         // chunks that are approximately equal in size, and at most one chunk is
2277         // distributed to each thread. Note that the size of the chunks is
2278         // unspecified in this case.
2279         CGOpenMPRuntime::StaticRTInput StaticInit(
2280             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2281             UB.getAddress(), ST.getAddress());
2282         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2283                              ScheduleKind, StaticInit);
2284         auto LoopExit =
2285             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2286         // UB = min(UB, GlobalUB);
2287         EmitIgnoredExpr(S.getEnsureUpperBound());
2288         // IV = LB;
2289         EmitIgnoredExpr(S.getInit());
2290         // while (idx <= UB) { BODY; ++idx; }
2291         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2292                          S.getInc(),
2293                          [&S, LoopExit](CodeGenFunction &CGF) {
2294                            CGF.EmitOMPLoopBody(S, LoopExit);
2295                            CGF.EmitStopPoint(&S);
2296                          },
2297                          [](CodeGenFunction &) {});
2298         EmitBlock(LoopExit.getBlock());
2299         // Tell the runtime we are done.
2300         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2301           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2302                                                          S.getDirectiveKind());
2303         };
2304         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2305       } else {
2306         const bool IsMonotonic =
2307             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2308             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2309             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2310             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2311         // Emit the outer loop, which requests its work chunk [LB..UB] from
2312         // runtime and runs the inner loop to process it.
2313         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2314                                              ST.getAddress(), IL.getAddress(),
2315                                              Chunk, EUB);
2316         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2317                             LoopArguments, CGDispatchBounds);
2318       }
2319       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2320         EmitOMPSimdFinal(S,
2321                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2322                            return CGF.Builder.CreateIsNotNull(
2323                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2324                          });
2325       }
2326       EmitOMPReductionClauseFinal(
2327           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2328                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2329                  : /*Parallel only*/ OMPD_parallel);
2330       // Emit post-update of the reduction variables if IsLastIter != 0.
2331       emitPostUpdateForReductionClause(
2332           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2333             return CGF.Builder.CreateIsNotNull(
2334                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2335           });
2336       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2337       if (HasLastprivateClause)
2338         EmitOMPLastprivateClauseFinal(
2339             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2340             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2341     }
2342     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2343       return CGF.Builder.CreateIsNotNull(
2344           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2345     });
2346     // We're now done with the loop, so jump to the continuation block.
2347     if (ContBlock) {
2348       EmitBranch(ContBlock);
2349       EmitBlock(ContBlock, true);
2350     }
2351   }
2352   return HasLastprivateClause;
2353 }
2354 
2355 /// The following two functions generate expressions for the loop lower
2356 /// and upper bounds in case of static and dynamic (dispatch) schedule
2357 /// of the associated 'for' or 'distribute' loop.
2358 static std::pair<LValue, LValue>
2359 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2360   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2361   LValue LB =
2362       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2363   LValue UB =
2364       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2365   return {LB, UB};
2366 }
2367 
2368 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2369 /// consider the lower and upper bound expressions generated by the
2370 /// worksharing loop support, but we use 0 and the iteration space size as
2371 /// constants
2372 static std::pair<llvm::Value *, llvm::Value *>
2373 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2374                           Address LB, Address UB) {
2375   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2376   const Expr *IVExpr = LS.getIterationVariable();
2377   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2378   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2379   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2380   return {LBVal, UBVal};
2381 }
2382 
2383 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2384   bool HasLastprivates = false;
2385   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2386                                           PrePostActionTy &) {
2387     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2388     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2389                                                  emitForLoopBounds,
2390                                                  emitDispatchForLoopBounds);
2391   };
2392   {
2393     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2394     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2395                                                 S.hasCancel());
2396   }
2397 
2398   // Emit an implicit barrier at the end.
2399   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2400     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2401   }
2402 }
2403 
2404 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2405   bool HasLastprivates = false;
2406   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2407                                           PrePostActionTy &) {
2408     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2409                                                  emitForLoopBounds,
2410                                                  emitDispatchForLoopBounds);
2411   };
2412   {
2413     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2414     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2415   }
2416 
2417   // Emit an implicit barrier at the end.
2418   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2419     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2420   }
2421 }
2422 
2423 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2424                                 const Twine &Name,
2425                                 llvm::Value *Init = nullptr) {
2426   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2427   if (Init)
2428     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2429   return LVal;
2430 }
2431 
2432 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2433   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2434   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2435   bool HasLastprivates = false;
2436   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2437                                                     PrePostActionTy &) {
2438     auto &C = CGF.CGM.getContext();
2439     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2440     // Emit helper vars inits.
2441     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2442                                   CGF.Builder.getInt32(0));
2443     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2444                                       : CGF.Builder.getInt32(0);
2445     LValue UB =
2446         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2447     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2448                                   CGF.Builder.getInt32(1));
2449     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2450                                   CGF.Builder.getInt32(0));
2451     // Loop counter.
2452     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2453     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2454     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2455     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2456     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2457     // Generate condition for loop.
2458     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2459                         OK_Ordinary, S.getLocStart(), FPOptions());
2460     // Increment for loop counter.
2461     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2462                       S.getLocStart());
2463     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2464       // Iterate through all sections and emit a switch construct:
2465       // switch (IV) {
2466       //   case 0:
2467       //     <SectionStmt[0]>;
2468       //     break;
2469       // ...
2470       //   case <NumSection> - 1:
2471       //     <SectionStmt[<NumSection> - 1]>;
2472       //     break;
2473       // }
2474       // .omp.sections.exit:
2475       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2476       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2477           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2478           CS == nullptr ? 1 : CS->size());
2479       if (CS) {
2480         unsigned CaseNumber = 0;
2481         for (auto *SubStmt : CS->children()) {
2482           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2483           CGF.EmitBlock(CaseBB);
2484           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2485           CGF.EmitStmt(SubStmt);
2486           CGF.EmitBranch(ExitBB);
2487           ++CaseNumber;
2488         }
2489       } else {
2490         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2491         CGF.EmitBlock(CaseBB);
2492         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2493         CGF.EmitStmt(Stmt);
2494         CGF.EmitBranch(ExitBB);
2495       }
2496       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2497     };
2498 
2499     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2500     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2501       // Emit implicit barrier to synchronize threads and avoid data races on
2502       // initialization of firstprivate variables and post-update of lastprivate
2503       // variables.
2504       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2505           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2506           /*ForceSimpleCall=*/true);
2507     }
2508     CGF.EmitOMPPrivateClause(S, LoopScope);
2509     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2510     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2511     (void)LoopScope.Privatize();
2512 
2513     // Emit static non-chunked loop.
2514     OpenMPScheduleTy ScheduleKind;
2515     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2516     CGOpenMPRuntime::StaticRTInput StaticInit(
2517         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2518         LB.getAddress(), UB.getAddress(), ST.getAddress());
2519     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2520         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2521     // UB = min(UB, GlobalUB);
2522     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2523     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2524         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2525     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2526     // IV = LB;
2527     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2528     // while (idx <= UB) { BODY; ++idx; }
2529     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2530                          [](CodeGenFunction &) {});
2531     // Tell the runtime we are done.
2532     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2533       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2534                                                      S.getDirectiveKind());
2535     };
2536     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2537     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2538     // Emit post-update of the reduction variables if IsLastIter != 0.
2539     emitPostUpdateForReductionClause(
2540         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2541           return CGF.Builder.CreateIsNotNull(
2542               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2543         });
2544 
2545     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2546     if (HasLastprivates)
2547       CGF.EmitOMPLastprivateClauseFinal(
2548           S, /*NoFinals=*/false,
2549           CGF.Builder.CreateIsNotNull(
2550               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2551   };
2552 
2553   bool HasCancel = false;
2554   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2555     HasCancel = OSD->hasCancel();
2556   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2557     HasCancel = OPSD->hasCancel();
2558   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2559   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2560                                               HasCancel);
2561   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2562   // clause. Otherwise the barrier will be generated by the codegen for the
2563   // directive.
2564   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2565     // Emit implicit barrier to synchronize threads and avoid data races on
2566     // initialization of firstprivate variables.
2567     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2568                                            OMPD_unknown);
2569   }
2570 }
2571 
2572 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2573   {
2574     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2575     EmitSections(S);
2576   }
2577   // Emit an implicit barrier at the end.
2578   if (!S.getSingleClause<OMPNowaitClause>()) {
2579     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2580                                            OMPD_sections);
2581   }
2582 }
2583 
2584 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2585   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2586     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2587   };
2588   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2589   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2590                                               S.hasCancel());
2591 }
2592 
2593 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2594   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2595   llvm::SmallVector<const Expr *, 8> DestExprs;
2596   llvm::SmallVector<const Expr *, 8> SrcExprs;
2597   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2598   // Check if there are any 'copyprivate' clauses associated with this
2599   // 'single' construct.
2600   // Build a list of copyprivate variables along with helper expressions
2601   // (<source>, <destination>, <destination>=<source> expressions)
2602   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2603     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2604     DestExprs.append(C->destination_exprs().begin(),
2605                      C->destination_exprs().end());
2606     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2607     AssignmentOps.append(C->assignment_ops().begin(),
2608                          C->assignment_ops().end());
2609   }
2610   // Emit code for 'single' region along with 'copyprivate' clauses
2611   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2612     Action.Enter(CGF);
2613     OMPPrivateScope SingleScope(CGF);
2614     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2615     CGF.EmitOMPPrivateClause(S, SingleScope);
2616     (void)SingleScope.Privatize();
2617     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2618   };
2619   {
2620     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2621     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2622                                             CopyprivateVars, DestExprs,
2623                                             SrcExprs, AssignmentOps);
2624   }
2625   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2626   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2627   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2628     CGM.getOpenMPRuntime().emitBarrierCall(
2629         *this, S.getLocStart(),
2630         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2631   }
2632 }
2633 
2634 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2635   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2636     Action.Enter(CGF);
2637     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2638   };
2639   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2640   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2641 }
2642 
2643 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2644   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2645     Action.Enter(CGF);
2646     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2647   };
2648   Expr *Hint = nullptr;
2649   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2650     Hint = HintClause->getHint();
2651   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2652   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2653                                             S.getDirectiveName().getAsString(),
2654                                             CodeGen, S.getLocStart(), Hint);
2655 }
2656 
2657 void CodeGenFunction::EmitOMPParallelForDirective(
2658     const OMPParallelForDirective &S) {
2659   // Emit directive as a combined directive that consists of two implicit
2660   // directives: 'parallel' with 'for' directive.
2661   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2662     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2663     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2664                                emitDispatchForLoopBounds);
2665   };
2666   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2667                                  emitEmptyBoundParameters);
2668 }
2669 
2670 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2671     const OMPParallelForSimdDirective &S) {
2672   // Emit directive as a combined directive that consists of two implicit
2673   // directives: 'parallel' with 'for' directive.
2674   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2675     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2676                                emitDispatchForLoopBounds);
2677   };
2678   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2679                                  emitEmptyBoundParameters);
2680 }
2681 
2682 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2683     const OMPParallelSectionsDirective &S) {
2684   // Emit directive as a combined directive that consists of two implicit
2685   // directives: 'parallel' with 'sections' directive.
2686   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2687     CGF.EmitSections(S);
2688   };
2689   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2690                                  emitEmptyBoundParameters);
2691 }
2692 
2693 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2694                                                 const RegionCodeGenTy &BodyGen,
2695                                                 const TaskGenTy &TaskGen,
2696                                                 OMPTaskDataTy &Data) {
2697   // Emit outlined function for task construct.
2698   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2699   auto *I = CS->getCapturedDecl()->param_begin();
2700   auto *PartId = std::next(I);
2701   auto *TaskT = std::next(I, 4);
2702   // Check if the task is final
2703   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2704     // If the condition constant folds and can be elided, try to avoid emitting
2705     // the condition and the dead arm of the if/else.
2706     auto *Cond = Clause->getCondition();
2707     bool CondConstant;
2708     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2709       Data.Final.setInt(CondConstant);
2710     else
2711       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2712   } else {
2713     // By default the task is not final.
2714     Data.Final.setInt(/*IntVal=*/false);
2715   }
2716   // Check if the task has 'priority' clause.
2717   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2718     auto *Prio = Clause->getPriority();
2719     Data.Priority.setInt(/*IntVal=*/true);
2720     Data.Priority.setPointer(EmitScalarConversion(
2721         EmitScalarExpr(Prio), Prio->getType(),
2722         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2723         Prio->getExprLoc()));
2724   }
2725   // The first function argument for tasks is a thread id, the second one is a
2726   // part id (0 for tied tasks, >=0 for untied task).
2727   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2728   // Get list of private variables.
2729   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2730     auto IRef = C->varlist_begin();
2731     for (auto *IInit : C->private_copies()) {
2732       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2733       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2734         Data.PrivateVars.push_back(*IRef);
2735         Data.PrivateCopies.push_back(IInit);
2736       }
2737       ++IRef;
2738     }
2739   }
2740   EmittedAsPrivate.clear();
2741   // Get list of firstprivate variables.
2742   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2743     auto IRef = C->varlist_begin();
2744     auto IElemInitRef = C->inits().begin();
2745     for (auto *IInit : C->private_copies()) {
2746       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2747       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2748         Data.FirstprivateVars.push_back(*IRef);
2749         Data.FirstprivateCopies.push_back(IInit);
2750         Data.FirstprivateInits.push_back(*IElemInitRef);
2751       }
2752       ++IRef;
2753       ++IElemInitRef;
2754     }
2755   }
2756   // Get list of lastprivate variables (for taskloops).
2757   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2758   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2759     auto IRef = C->varlist_begin();
2760     auto ID = C->destination_exprs().begin();
2761     for (auto *IInit : C->private_copies()) {
2762       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2763       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2764         Data.LastprivateVars.push_back(*IRef);
2765         Data.LastprivateCopies.push_back(IInit);
2766       }
2767       LastprivateDstsOrigs.insert(
2768           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2769            cast<DeclRefExpr>(*IRef)});
2770       ++IRef;
2771       ++ID;
2772     }
2773   }
2774   SmallVector<const Expr *, 4> LHSs;
2775   SmallVector<const Expr *, 4> RHSs;
2776   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2777     auto IPriv = C->privates().begin();
2778     auto IRed = C->reduction_ops().begin();
2779     auto ILHS = C->lhs_exprs().begin();
2780     auto IRHS = C->rhs_exprs().begin();
2781     for (const auto *Ref : C->varlists()) {
2782       Data.ReductionVars.emplace_back(Ref);
2783       Data.ReductionCopies.emplace_back(*IPriv);
2784       Data.ReductionOps.emplace_back(*IRed);
2785       LHSs.emplace_back(*ILHS);
2786       RHSs.emplace_back(*IRHS);
2787       std::advance(IPriv, 1);
2788       std::advance(IRed, 1);
2789       std::advance(ILHS, 1);
2790       std::advance(IRHS, 1);
2791     }
2792   }
2793   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2794       *this, S.getLocStart(), LHSs, RHSs, Data);
2795   // Build list of dependences.
2796   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2797     for (auto *IRef : C->varlists())
2798       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2799   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2800       CodeGenFunction &CGF, PrePostActionTy &Action) {
2801     // Set proper addresses for generated private copies.
2802     OMPPrivateScope Scope(CGF);
2803     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2804         !Data.LastprivateVars.empty()) {
2805       enum { PrivatesParam = 2, CopyFnParam = 3 };
2806       auto *CopyFn = CGF.Builder.CreateLoad(
2807           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2808       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2809           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2810       // Map privates.
2811       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2812       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2813       CallArgs.push_back(PrivatesPtr);
2814       for (auto *E : Data.PrivateVars) {
2815         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2816         Address PrivatePtr = CGF.CreateMemTemp(
2817             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2818         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2819         CallArgs.push_back(PrivatePtr.getPointer());
2820       }
2821       for (auto *E : Data.FirstprivateVars) {
2822         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2823         Address PrivatePtr =
2824             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2825                               ".firstpriv.ptr.addr");
2826         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2827         CallArgs.push_back(PrivatePtr.getPointer());
2828       }
2829       for (auto *E : Data.LastprivateVars) {
2830         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2831         Address PrivatePtr =
2832             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2833                               ".lastpriv.ptr.addr");
2834         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2835         CallArgs.push_back(PrivatePtr.getPointer());
2836       }
2837       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2838                                                           CopyFn, CallArgs);
2839       for (auto &&Pair : LastprivateDstsOrigs) {
2840         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2841         DeclRefExpr DRE(
2842             const_cast<VarDecl *>(OrigVD),
2843             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2844                 OrigVD) != nullptr,
2845             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2846         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2847           return CGF.EmitLValue(&DRE).getAddress();
2848         });
2849       }
2850       for (auto &&Pair : PrivatePtrs) {
2851         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2852                             CGF.getContext().getDeclAlign(Pair.first));
2853         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2854       }
2855     }
2856     if (Data.Reductions) {
2857       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2858       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2859                              Data.ReductionOps);
2860       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2861           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2862       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2863         RedCG.emitSharedLValue(CGF, Cnt);
2864         RedCG.emitAggregateType(CGF, Cnt);
2865         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2866             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2867         Replacement =
2868             Address(CGF.EmitScalarConversion(
2869                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2870                         CGF.getContext().getPointerType(
2871                             Data.ReductionCopies[Cnt]->getType()),
2872                         SourceLocation()),
2873                     Replacement.getAlignment());
2874         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2875         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2876                          [Replacement]() { return Replacement; });
2877         // FIXME: This must removed once the runtime library is fixed.
2878         // Emit required threadprivate variables for
2879         // initilizer/combiner/finalizer.
2880         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2881                                                            RedCG, Cnt);
2882       }
2883     }
2884     // Privatize all private variables except for in_reduction items.
2885     (void)Scope.Privatize();
2886     SmallVector<const Expr *, 4> InRedVars;
2887     SmallVector<const Expr *, 4> InRedPrivs;
2888     SmallVector<const Expr *, 4> InRedOps;
2889     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2890     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2891       auto IPriv = C->privates().begin();
2892       auto IRed = C->reduction_ops().begin();
2893       auto ITD = C->taskgroup_descriptors().begin();
2894       for (const auto *Ref : C->varlists()) {
2895         InRedVars.emplace_back(Ref);
2896         InRedPrivs.emplace_back(*IPriv);
2897         InRedOps.emplace_back(*IRed);
2898         TaskgroupDescriptors.emplace_back(*ITD);
2899         std::advance(IPriv, 1);
2900         std::advance(IRed, 1);
2901         std::advance(ITD, 1);
2902       }
2903     }
2904     // Privatize in_reduction items here, because taskgroup descriptors must be
2905     // privatized earlier.
2906     OMPPrivateScope InRedScope(CGF);
2907     if (!InRedVars.empty()) {
2908       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2909       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2910         RedCG.emitSharedLValue(CGF, Cnt);
2911         RedCG.emitAggregateType(CGF, Cnt);
2912         // The taskgroup descriptor variable is always implicit firstprivate and
2913         // privatized already during procoessing of the firstprivates.
2914         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2915             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2916         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2917             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2918         Replacement = Address(
2919             CGF.EmitScalarConversion(
2920                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2921                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2922                 SourceLocation()),
2923             Replacement.getAlignment());
2924         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2925         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2926                               [Replacement]() { return Replacement; });
2927         // FIXME: This must removed once the runtime library is fixed.
2928         // Emit required threadprivate variables for
2929         // initilizer/combiner/finalizer.
2930         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2931                                                            RedCG, Cnt);
2932       }
2933     }
2934     (void)InRedScope.Privatize();
2935 
2936     Action.Enter(CGF);
2937     BodyGen(CGF);
2938   };
2939   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2940       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2941       Data.NumberOfParts);
2942   OMPLexicalScope Scope(*this, S);
2943   TaskGen(*this, OutlinedFn, Data);
2944 }
2945 
2946 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2947   // Emit outlined function for task construct.
2948   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2949   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2950   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2951   const Expr *IfCond = nullptr;
2952   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2953     if (C->getNameModifier() == OMPD_unknown ||
2954         C->getNameModifier() == OMPD_task) {
2955       IfCond = C->getCondition();
2956       break;
2957     }
2958   }
2959 
2960   OMPTaskDataTy Data;
2961   // Check if we should emit tied or untied task.
2962   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2963   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2964     CGF.EmitStmt(CS->getCapturedStmt());
2965   };
2966   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2967                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2968                             const OMPTaskDataTy &Data) {
2969     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2970                                             SharedsTy, CapturedStruct, IfCond,
2971                                             Data);
2972   };
2973   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2974 }
2975 
2976 void CodeGenFunction::EmitOMPTaskyieldDirective(
2977     const OMPTaskyieldDirective &S) {
2978   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2979 }
2980 
2981 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2982   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2983 }
2984 
2985 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2986   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2987 }
2988 
2989 void CodeGenFunction::EmitOMPTaskgroupDirective(
2990     const OMPTaskgroupDirective &S) {
2991   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2992     Action.Enter(CGF);
2993     if (const Expr *E = S.getReductionRef()) {
2994       SmallVector<const Expr *, 4> LHSs;
2995       SmallVector<const Expr *, 4> RHSs;
2996       OMPTaskDataTy Data;
2997       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2998         auto IPriv = C->privates().begin();
2999         auto IRed = C->reduction_ops().begin();
3000         auto ILHS = C->lhs_exprs().begin();
3001         auto IRHS = C->rhs_exprs().begin();
3002         for (const auto *Ref : C->varlists()) {
3003           Data.ReductionVars.emplace_back(Ref);
3004           Data.ReductionCopies.emplace_back(*IPriv);
3005           Data.ReductionOps.emplace_back(*IRed);
3006           LHSs.emplace_back(*ILHS);
3007           RHSs.emplace_back(*IRHS);
3008           std::advance(IPriv, 1);
3009           std::advance(IRed, 1);
3010           std::advance(ILHS, 1);
3011           std::advance(IRHS, 1);
3012         }
3013       }
3014       llvm::Value *ReductionDesc =
3015           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
3016                                                            LHSs, RHSs, Data);
3017       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3018       CGF.EmitVarDecl(*VD);
3019       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3020                             /*Volatile=*/false, E->getType());
3021     }
3022     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3023   };
3024   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3025   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3026 }
3027 
3028 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3029   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3030     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3031       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3032                                 FlushClause->varlist_end());
3033     }
3034     return llvm::None;
3035   }(), S.getLocStart());
3036 }
3037 
3038 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3039                                             const CodeGenLoopTy &CodeGenLoop,
3040                                             Expr *IncExpr) {
3041   // Emit the loop iteration variable.
3042   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3043   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3044   EmitVarDecl(*IVDecl);
3045 
3046   // Emit the iterations count variable.
3047   // If it is not a variable, Sema decided to calculate iterations count on each
3048   // iteration (e.g., it is foldable into a constant).
3049   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3050     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3051     // Emit calculation of the iterations count.
3052     EmitIgnoredExpr(S.getCalcLastIteration());
3053   }
3054 
3055   auto &RT = CGM.getOpenMPRuntime();
3056 
3057   bool HasLastprivateClause = false;
3058   // Check pre-condition.
3059   {
3060     OMPLoopScope PreInitScope(*this, S);
3061     // Skip the entire loop if we don't meet the precondition.
3062     // If the condition constant folds and can be elided, avoid emitting the
3063     // whole loop.
3064     bool CondConstant;
3065     llvm::BasicBlock *ContBlock = nullptr;
3066     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3067       if (!CondConstant)
3068         return;
3069     } else {
3070       auto *ThenBlock = createBasicBlock("omp.precond.then");
3071       ContBlock = createBasicBlock("omp.precond.end");
3072       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3073                   getProfileCount(&S));
3074       EmitBlock(ThenBlock);
3075       incrementProfileCounter(&S);
3076     }
3077 
3078     // Emit 'then' code.
3079     {
3080       // Emit helper vars inits.
3081 
3082       LValue LB = EmitOMPHelperVar(
3083           *this, cast<DeclRefExpr>(
3084                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3085                           ? S.getCombinedLowerBoundVariable()
3086                           : S.getLowerBoundVariable())));
3087       LValue UB = EmitOMPHelperVar(
3088           *this, cast<DeclRefExpr>(
3089                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3090                           ? S.getCombinedUpperBoundVariable()
3091                           : S.getUpperBoundVariable())));
3092       LValue ST =
3093           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3094       LValue IL =
3095           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3096 
3097       OMPPrivateScope LoopScope(*this);
3098       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3099         // Emit implicit barrier to synchronize threads and avoid data races on
3100         // initialization of firstprivate variables and post-update of
3101         // lastprivate variables.
3102         CGM.getOpenMPRuntime().emitBarrierCall(
3103           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3104           /*ForceSimpleCall=*/true);
3105       }
3106       EmitOMPPrivateClause(S, LoopScope);
3107       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3108       EmitOMPPrivateLoopCounters(S, LoopScope);
3109       (void)LoopScope.Privatize();
3110 
3111       // Detect the distribute schedule kind and chunk.
3112       llvm::Value *Chunk = nullptr;
3113       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3114       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3115         ScheduleKind = C->getDistScheduleKind();
3116         if (const auto *Ch = C->getChunkSize()) {
3117           Chunk = EmitScalarExpr(Ch);
3118           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3119           S.getIterationVariable()->getType(),
3120           S.getLocStart());
3121         }
3122       }
3123       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3124       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3125 
3126       // OpenMP [2.10.8, distribute Construct, Description]
3127       // If dist_schedule is specified, kind must be static. If specified,
3128       // iterations are divided into chunks of size chunk_size, chunks are
3129       // assigned to the teams of the league in a round-robin fashion in the
3130       // order of the team number. When no chunk_size is specified, the
3131       // iteration space is divided into chunks that are approximately equal
3132       // in size, and at most one chunk is distributed to each team of the
3133       // league. The size of the chunks is unspecified in this case.
3134       if (RT.isStaticNonchunked(ScheduleKind,
3135                                 /* Chunked */ Chunk != nullptr)) {
3136         CGOpenMPRuntime::StaticRTInput StaticInit(
3137             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3138             LB.getAddress(), UB.getAddress(), ST.getAddress());
3139         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3140                                     StaticInit);
3141         auto LoopExit =
3142             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3143         // UB = min(UB, GlobalUB);
3144         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3145                             ? S.getCombinedEnsureUpperBound()
3146                             : S.getEnsureUpperBound());
3147         // IV = LB;
3148         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3149                             ? S.getCombinedInit()
3150                             : S.getInit());
3151 
3152         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3153                          ? S.getCombinedCond()
3154                          : S.getCond();
3155 
3156         // for distribute alone,  codegen
3157         // while (idx <= UB) { BODY; ++idx; }
3158         // when combined with 'for' (e.g. as in 'distribute parallel for')
3159         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3160         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3161                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3162                            CodeGenLoop(CGF, S, LoopExit);
3163                          },
3164                          [](CodeGenFunction &) {});
3165         EmitBlock(LoopExit.getBlock());
3166         // Tell the runtime we are done.
3167         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3168       } else {
3169         // Emit the outer loop, which requests its work chunk [LB..UB] from
3170         // runtime and runs the inner loop to process it.
3171         const OMPLoopArguments LoopArguments = {
3172             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3173             Chunk};
3174         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3175                                    CodeGenLoop);
3176       }
3177 
3178       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3179       if (HasLastprivateClause)
3180         EmitOMPLastprivateClauseFinal(
3181             S, /*NoFinals=*/false,
3182             Builder.CreateIsNotNull(
3183                 EmitLoadOfScalar(IL, S.getLocStart())));
3184     }
3185 
3186     // We're now done with the loop, so jump to the continuation block.
3187     if (ContBlock) {
3188       EmitBranch(ContBlock);
3189       EmitBlock(ContBlock, true);
3190     }
3191   }
3192 }
3193 
3194 void CodeGenFunction::EmitOMPDistributeDirective(
3195     const OMPDistributeDirective &S) {
3196   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3197 
3198     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3199   };
3200   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3201   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3202 }
3203 
3204 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3205                                                    const CapturedStmt *S) {
3206   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3207   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3208   CGF.CapturedStmtInfo = &CapStmtInfo;
3209   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3210   Fn->addFnAttr(llvm::Attribute::NoInline);
3211   return Fn;
3212 }
3213 
3214 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3215   if (!S.getAssociatedStmt()) {
3216     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3217       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3218     return;
3219   }
3220   auto *C = S.getSingleClause<OMPSIMDClause>();
3221   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3222                                  PrePostActionTy &Action) {
3223     if (C) {
3224       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3225       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3226       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3227       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3228       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3229                                                       OutlinedFn, CapturedVars);
3230     } else {
3231       Action.Enter(CGF);
3232       CGF.EmitStmt(
3233           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3234     }
3235   };
3236   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3237   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3238 }
3239 
3240 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3241                                          QualType SrcType, QualType DestType,
3242                                          SourceLocation Loc) {
3243   assert(CGF.hasScalarEvaluationKind(DestType) &&
3244          "DestType must have scalar evaluation kind.");
3245   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3246   return Val.isScalar()
3247              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3248                                         Loc)
3249              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3250                                                  DestType, Loc);
3251 }
3252 
3253 static CodeGenFunction::ComplexPairTy
3254 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3255                       QualType DestType, SourceLocation Loc) {
3256   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3257          "DestType must have complex evaluation kind.");
3258   CodeGenFunction::ComplexPairTy ComplexVal;
3259   if (Val.isScalar()) {
3260     // Convert the input element to the element type of the complex.
3261     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3262     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3263                                               DestElementType, Loc);
3264     ComplexVal = CodeGenFunction::ComplexPairTy(
3265         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3266   } else {
3267     assert(Val.isComplex() && "Must be a scalar or complex.");
3268     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3269     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3270     ComplexVal.first = CGF.EmitScalarConversion(
3271         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3272     ComplexVal.second = CGF.EmitScalarConversion(
3273         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3274   }
3275   return ComplexVal;
3276 }
3277 
3278 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3279                                   LValue LVal, RValue RVal) {
3280   if (LVal.isGlobalReg()) {
3281     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3282   } else {
3283     CGF.EmitAtomicStore(RVal, LVal,
3284                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3285                                  : llvm::AtomicOrdering::Monotonic,
3286                         LVal.isVolatile(), /*IsInit=*/false);
3287   }
3288 }
3289 
3290 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3291                                          QualType RValTy, SourceLocation Loc) {
3292   switch (getEvaluationKind(LVal.getType())) {
3293   case TEK_Scalar:
3294     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3295                                *this, RVal, RValTy, LVal.getType(), Loc)),
3296                            LVal);
3297     break;
3298   case TEK_Complex:
3299     EmitStoreOfComplex(
3300         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3301         /*isInit=*/false);
3302     break;
3303   case TEK_Aggregate:
3304     llvm_unreachable("Must be a scalar or complex.");
3305   }
3306 }
3307 
3308 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3309                                   const Expr *X, const Expr *V,
3310                                   SourceLocation Loc) {
3311   // v = x;
3312   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3313   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3314   LValue XLValue = CGF.EmitLValue(X);
3315   LValue VLValue = CGF.EmitLValue(V);
3316   RValue Res = XLValue.isGlobalReg()
3317                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3318                    : CGF.EmitAtomicLoad(
3319                          XLValue, Loc,
3320                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3321                                   : llvm::AtomicOrdering::Monotonic,
3322                          XLValue.isVolatile());
3323   // OpenMP, 2.12.6, atomic Construct
3324   // Any atomic construct with a seq_cst clause forces the atomically
3325   // performed operation to include an implicit flush operation without a
3326   // list.
3327   if (IsSeqCst)
3328     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3329   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3330 }
3331 
3332 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3333                                    const Expr *X, const Expr *E,
3334                                    SourceLocation Loc) {
3335   // x = expr;
3336   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3337   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3338   // OpenMP, 2.12.6, atomic Construct
3339   // Any atomic construct with a seq_cst clause forces the atomically
3340   // performed operation to include an implicit flush operation without a
3341   // list.
3342   if (IsSeqCst)
3343     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3344 }
3345 
3346 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3347                                                 RValue Update,
3348                                                 BinaryOperatorKind BO,
3349                                                 llvm::AtomicOrdering AO,
3350                                                 bool IsXLHSInRHSPart) {
3351   auto &Context = CGF.CGM.getContext();
3352   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3353   // expression is simple and atomic is allowed for the given type for the
3354   // target platform.
3355   if (BO == BO_Comma || !Update.isScalar() ||
3356       !Update.getScalarVal()->getType()->isIntegerTy() ||
3357       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3358                         (Update.getScalarVal()->getType() !=
3359                          X.getAddress().getElementType())) ||
3360       !X.getAddress().getElementType()->isIntegerTy() ||
3361       !Context.getTargetInfo().hasBuiltinAtomic(
3362           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3363     return std::make_pair(false, RValue::get(nullptr));
3364 
3365   llvm::AtomicRMWInst::BinOp RMWOp;
3366   switch (BO) {
3367   case BO_Add:
3368     RMWOp = llvm::AtomicRMWInst::Add;
3369     break;
3370   case BO_Sub:
3371     if (!IsXLHSInRHSPart)
3372       return std::make_pair(false, RValue::get(nullptr));
3373     RMWOp = llvm::AtomicRMWInst::Sub;
3374     break;
3375   case BO_And:
3376     RMWOp = llvm::AtomicRMWInst::And;
3377     break;
3378   case BO_Or:
3379     RMWOp = llvm::AtomicRMWInst::Or;
3380     break;
3381   case BO_Xor:
3382     RMWOp = llvm::AtomicRMWInst::Xor;
3383     break;
3384   case BO_LT:
3385     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3386                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3387                                    : llvm::AtomicRMWInst::Max)
3388                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3389                                    : llvm::AtomicRMWInst::UMax);
3390     break;
3391   case BO_GT:
3392     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3393                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3394                                    : llvm::AtomicRMWInst::Min)
3395                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3396                                    : llvm::AtomicRMWInst::UMin);
3397     break;
3398   case BO_Assign:
3399     RMWOp = llvm::AtomicRMWInst::Xchg;
3400     break;
3401   case BO_Mul:
3402   case BO_Div:
3403   case BO_Rem:
3404   case BO_Shl:
3405   case BO_Shr:
3406   case BO_LAnd:
3407   case BO_LOr:
3408     return std::make_pair(false, RValue::get(nullptr));
3409   case BO_PtrMemD:
3410   case BO_PtrMemI:
3411   case BO_LE:
3412   case BO_GE:
3413   case BO_EQ:
3414   case BO_NE:
3415   case BO_AddAssign:
3416   case BO_SubAssign:
3417   case BO_AndAssign:
3418   case BO_OrAssign:
3419   case BO_XorAssign:
3420   case BO_MulAssign:
3421   case BO_DivAssign:
3422   case BO_RemAssign:
3423   case BO_ShlAssign:
3424   case BO_ShrAssign:
3425   case BO_Comma:
3426     llvm_unreachable("Unsupported atomic update operation");
3427   }
3428   auto *UpdateVal = Update.getScalarVal();
3429   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3430     UpdateVal = CGF.Builder.CreateIntCast(
3431         IC, X.getAddress().getElementType(),
3432         X.getType()->hasSignedIntegerRepresentation());
3433   }
3434   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3435   return std::make_pair(true, RValue::get(Res));
3436 }
3437 
3438 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3439     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3440     llvm::AtomicOrdering AO, SourceLocation Loc,
3441     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3442   // Update expressions are allowed to have the following forms:
3443   // x binop= expr; -> xrval + expr;
3444   // x++, ++x -> xrval + 1;
3445   // x--, --x -> xrval - 1;
3446   // x = x binop expr; -> xrval binop expr
3447   // x = expr Op x; - > expr binop xrval;
3448   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3449   if (!Res.first) {
3450     if (X.isGlobalReg()) {
3451       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3452       // 'xrval'.
3453       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3454     } else {
3455       // Perform compare-and-swap procedure.
3456       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3457     }
3458   }
3459   return Res;
3460 }
3461 
3462 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3463                                     const Expr *X, const Expr *E,
3464                                     const Expr *UE, bool IsXLHSInRHSPart,
3465                                     SourceLocation Loc) {
3466   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3467          "Update expr in 'atomic update' must be a binary operator.");
3468   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3469   // Update expressions are allowed to have the following forms:
3470   // x binop= expr; -> xrval + expr;
3471   // x++, ++x -> xrval + 1;
3472   // x--, --x -> xrval - 1;
3473   // x = x binop expr; -> xrval binop expr
3474   // x = expr Op x; - > expr binop xrval;
3475   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3476   LValue XLValue = CGF.EmitLValue(X);
3477   RValue ExprRValue = CGF.EmitAnyExpr(E);
3478   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3479                      : llvm::AtomicOrdering::Monotonic;
3480   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3481   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3482   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3483   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3484   auto Gen =
3485       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3486         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3487         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3488         return CGF.EmitAnyExpr(UE);
3489       };
3490   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3491       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3492   // OpenMP, 2.12.6, atomic Construct
3493   // Any atomic construct with a seq_cst clause forces the atomically
3494   // performed operation to include an implicit flush operation without a
3495   // list.
3496   if (IsSeqCst)
3497     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3498 }
3499 
3500 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3501                             QualType SourceType, QualType ResType,
3502                             SourceLocation Loc) {
3503   switch (CGF.getEvaluationKind(ResType)) {
3504   case TEK_Scalar:
3505     return RValue::get(
3506         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3507   case TEK_Complex: {
3508     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3509     return RValue::getComplex(Res.first, Res.second);
3510   }
3511   case TEK_Aggregate:
3512     break;
3513   }
3514   llvm_unreachable("Must be a scalar or complex.");
3515 }
3516 
3517 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3518                                      bool IsPostfixUpdate, const Expr *V,
3519                                      const Expr *X, const Expr *E,
3520                                      const Expr *UE, bool IsXLHSInRHSPart,
3521                                      SourceLocation Loc) {
3522   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3523   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3524   RValue NewVVal;
3525   LValue VLValue = CGF.EmitLValue(V);
3526   LValue XLValue = CGF.EmitLValue(X);
3527   RValue ExprRValue = CGF.EmitAnyExpr(E);
3528   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3529                      : llvm::AtomicOrdering::Monotonic;
3530   QualType NewVValType;
3531   if (UE) {
3532     // 'x' is updated with some additional value.
3533     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3534            "Update expr in 'atomic capture' must be a binary operator.");
3535     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3536     // Update expressions are allowed to have the following forms:
3537     // x binop= expr; -> xrval + expr;
3538     // x++, ++x -> xrval + 1;
3539     // x--, --x -> xrval - 1;
3540     // x = x binop expr; -> xrval binop expr
3541     // x = expr Op x; - > expr binop xrval;
3542     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3543     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3544     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3545     NewVValType = XRValExpr->getType();
3546     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3547     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3548                   IsPostfixUpdate](RValue XRValue) -> RValue {
3549       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3550       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3551       RValue Res = CGF.EmitAnyExpr(UE);
3552       NewVVal = IsPostfixUpdate ? XRValue : Res;
3553       return Res;
3554     };
3555     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3556         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3557     if (Res.first) {
3558       // 'atomicrmw' instruction was generated.
3559       if (IsPostfixUpdate) {
3560         // Use old value from 'atomicrmw'.
3561         NewVVal = Res.second;
3562       } else {
3563         // 'atomicrmw' does not provide new value, so evaluate it using old
3564         // value of 'x'.
3565         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3566         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3567         NewVVal = CGF.EmitAnyExpr(UE);
3568       }
3569     }
3570   } else {
3571     // 'x' is simply rewritten with some 'expr'.
3572     NewVValType = X->getType().getNonReferenceType();
3573     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3574                                X->getType().getNonReferenceType(), Loc);
3575     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3576       NewVVal = XRValue;
3577       return ExprRValue;
3578     };
3579     // Try to perform atomicrmw xchg, otherwise simple exchange.
3580     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3581         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3582         Loc, Gen);
3583     if (Res.first) {
3584       // 'atomicrmw' instruction was generated.
3585       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3586     }
3587   }
3588   // Emit post-update store to 'v' of old/new 'x' value.
3589   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3590   // OpenMP, 2.12.6, atomic Construct
3591   // Any atomic construct with a seq_cst clause forces the atomically
3592   // performed operation to include an implicit flush operation without a
3593   // list.
3594   if (IsSeqCst)
3595     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3596 }
3597 
3598 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3599                               bool IsSeqCst, bool IsPostfixUpdate,
3600                               const Expr *X, const Expr *V, const Expr *E,
3601                               const Expr *UE, bool IsXLHSInRHSPart,
3602                               SourceLocation Loc) {
3603   switch (Kind) {
3604   case OMPC_read:
3605     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3606     break;
3607   case OMPC_write:
3608     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3609     break;
3610   case OMPC_unknown:
3611   case OMPC_update:
3612     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3613     break;
3614   case OMPC_capture:
3615     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3616                              IsXLHSInRHSPart, Loc);
3617     break;
3618   case OMPC_if:
3619   case OMPC_final:
3620   case OMPC_num_threads:
3621   case OMPC_private:
3622   case OMPC_firstprivate:
3623   case OMPC_lastprivate:
3624   case OMPC_reduction:
3625   case OMPC_task_reduction:
3626   case OMPC_in_reduction:
3627   case OMPC_safelen:
3628   case OMPC_simdlen:
3629   case OMPC_collapse:
3630   case OMPC_default:
3631   case OMPC_seq_cst:
3632   case OMPC_shared:
3633   case OMPC_linear:
3634   case OMPC_aligned:
3635   case OMPC_copyin:
3636   case OMPC_copyprivate:
3637   case OMPC_flush:
3638   case OMPC_proc_bind:
3639   case OMPC_schedule:
3640   case OMPC_ordered:
3641   case OMPC_nowait:
3642   case OMPC_untied:
3643   case OMPC_threadprivate:
3644   case OMPC_depend:
3645   case OMPC_mergeable:
3646   case OMPC_device:
3647   case OMPC_threads:
3648   case OMPC_simd:
3649   case OMPC_map:
3650   case OMPC_num_teams:
3651   case OMPC_thread_limit:
3652   case OMPC_priority:
3653   case OMPC_grainsize:
3654   case OMPC_nogroup:
3655   case OMPC_num_tasks:
3656   case OMPC_hint:
3657   case OMPC_dist_schedule:
3658   case OMPC_defaultmap:
3659   case OMPC_uniform:
3660   case OMPC_to:
3661   case OMPC_from:
3662   case OMPC_use_device_ptr:
3663   case OMPC_is_device_ptr:
3664     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3665   }
3666 }
3667 
3668 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3669   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3670   OpenMPClauseKind Kind = OMPC_unknown;
3671   for (auto *C : S.clauses()) {
3672     // Find first clause (skip seq_cst clause, if it is first).
3673     if (C->getClauseKind() != OMPC_seq_cst) {
3674       Kind = C->getClauseKind();
3675       break;
3676     }
3677   }
3678 
3679   const auto *CS =
3680       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3681   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3682     enterFullExpression(EWC);
3683   }
3684   // Processing for statements under 'atomic capture'.
3685   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3686     for (const auto *C : Compound->body()) {
3687       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3688         enterFullExpression(EWC);
3689       }
3690     }
3691   }
3692 
3693   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3694                                             PrePostActionTy &) {
3695     CGF.EmitStopPoint(CS);
3696     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3697                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3698                       S.isXLHSInRHSPart(), S.getLocStart());
3699   };
3700   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3701   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3702 }
3703 
3704 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3705                                          const OMPExecutableDirective &S,
3706                                          const RegionCodeGenTy &CodeGen) {
3707   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3708   CodeGenModule &CGM = CGF.CGM;
3709   const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target);
3710 
3711   llvm::Function *Fn = nullptr;
3712   llvm::Constant *FnID = nullptr;
3713 
3714   const Expr *IfCond = nullptr;
3715   // Check for the at most one if clause associated with the target region.
3716   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3717     if (C->getNameModifier() == OMPD_unknown ||
3718         C->getNameModifier() == OMPD_target) {
3719       IfCond = C->getCondition();
3720       break;
3721     }
3722   }
3723 
3724   // Check if we have any device clause associated with the directive.
3725   const Expr *Device = nullptr;
3726   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3727     Device = C->getDevice();
3728   }
3729 
3730   // Check if we have an if clause whose conditional always evaluates to false
3731   // or if we do not have any targets specified. If so the target region is not
3732   // an offload entry point.
3733   bool IsOffloadEntry = true;
3734   if (IfCond) {
3735     bool Val;
3736     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3737       IsOffloadEntry = false;
3738   }
3739   if (CGM.getLangOpts().OMPTargetTriples.empty())
3740     IsOffloadEntry = false;
3741 
3742   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3743   StringRef ParentName;
3744   // In case we have Ctors/Dtors we use the complete type variant to produce
3745   // the mangling of the device outlined kernel.
3746   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3747     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3748   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3749     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3750   else
3751     ParentName =
3752         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3753 
3754   // Emit target region as a standalone region.
3755   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3756                                                     IsOffloadEntry, CodeGen);
3757   OMPLexicalScope Scope(CGF, S);
3758   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3759   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3760   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3761                                         CapturedVars);
3762 }
3763 
3764 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3765                              PrePostActionTy &Action) {
3766   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3767   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3768   CGF.EmitOMPPrivateClause(S, PrivateScope);
3769   (void)PrivateScope.Privatize();
3770 
3771   Action.Enter(CGF);
3772   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3773 }
3774 
3775 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3776                                                   StringRef ParentName,
3777                                                   const OMPTargetDirective &S) {
3778   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3779     emitTargetRegion(CGF, S, Action);
3780   };
3781   llvm::Function *Fn;
3782   llvm::Constant *Addr;
3783   // Emit target region as a standalone region.
3784   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3785       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3786   assert(Fn && Addr && "Target device function emission failed.");
3787 }
3788 
3789 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3790   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3791     emitTargetRegion(CGF, S, Action);
3792   };
3793   emitCommonOMPTargetDirective(*this, S, CodeGen);
3794 }
3795 
3796 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3797                                         const OMPExecutableDirective &S,
3798                                         OpenMPDirectiveKind InnermostKind,
3799                                         const RegionCodeGenTy &CodeGen) {
3800   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3801   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3802       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3803 
3804   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3805   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3806   if (NT || TL) {
3807     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3808     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3809 
3810     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3811                                                   S.getLocStart());
3812   }
3813 
3814   OMPTeamsScope Scope(CGF, S);
3815   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3816   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3817   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3818                                            CapturedVars);
3819 }
3820 
3821 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3822   // Emit teams region as a standalone region.
3823   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3824     OMPPrivateScope PrivateScope(CGF);
3825     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3826     CGF.EmitOMPPrivateClause(S, PrivateScope);
3827     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3828     (void)PrivateScope.Privatize();
3829     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3830     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3831   };
3832   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
3833   emitPostUpdateForReductionClause(
3834       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3835 }
3836 
3837 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3838                                   const OMPTargetTeamsDirective &S) {
3839   auto *CS = S.getCapturedStmt(OMPD_teams);
3840   Action.Enter(CGF);
3841   // Emit teams region as a standalone region.
3842   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
3843     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3844     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3845     CGF.EmitOMPPrivateClause(S, PrivateScope);
3846     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3847     (void)PrivateScope.Privatize();
3848     Action.Enter(CGF);
3849     CGF.EmitStmt(CS->getCapturedStmt());
3850     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3851   };
3852   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3853   emitPostUpdateForReductionClause(
3854       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3855 }
3856 
3857 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3858     CodeGenModule &CGM, StringRef ParentName,
3859     const OMPTargetTeamsDirective &S) {
3860   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3861     emitTargetTeamsRegion(CGF, Action, S);
3862   };
3863   llvm::Function *Fn;
3864   llvm::Constant *Addr;
3865   // Emit target region as a standalone region.
3866   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3867       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3868   assert(Fn && Addr && "Target device function emission failed.");
3869 }
3870 
3871 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3872     const OMPTargetTeamsDirective &S) {
3873   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3874     emitTargetTeamsRegion(CGF, Action, S);
3875   };
3876   emitCommonOMPTargetDirective(*this, S, CodeGen);
3877 }
3878 
3879 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
3880     const OMPTeamsDistributeDirective &S) {
3881 
3882   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3883     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3884   };
3885 
3886   // Emit teams region as a standalone region.
3887   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3888                                             PrePostActionTy &) {
3889     OMPPrivateScope PrivateScope(CGF);
3890     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3891     (void)PrivateScope.Privatize();
3892     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3893                                                     CodeGenDistribute);
3894     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3895   };
3896   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
3897   emitPostUpdateForReductionClause(*this, S,
3898                                    [](CodeGenFunction &) { return nullptr; });
3899 }
3900 
3901 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
3902     const OMPTeamsDistributeParallelForDirective &S) {
3903   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3904     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3905                               S.getDistInc());
3906   };
3907 
3908   // Emit teams region as a standalone region.
3909   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3910                                             PrePostActionTy &) {
3911     OMPPrivateScope PrivateScope(CGF);
3912     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3913     (void)PrivateScope.Privatize();
3914     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3915                                                     CodeGenDistribute);
3916     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3917   };
3918   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
3919   emitPostUpdateForReductionClause(*this, S,
3920                                    [](CodeGenFunction &) { return nullptr; });
3921 }
3922 
3923 void CodeGenFunction::EmitOMPCancellationPointDirective(
3924     const OMPCancellationPointDirective &S) {
3925   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3926                                                    S.getCancelRegion());
3927 }
3928 
3929 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3930   const Expr *IfCond = nullptr;
3931   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3932     if (C->getNameModifier() == OMPD_unknown ||
3933         C->getNameModifier() == OMPD_cancel) {
3934       IfCond = C->getCondition();
3935       break;
3936     }
3937   }
3938   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3939                                         S.getCancelRegion());
3940 }
3941 
3942 CodeGenFunction::JumpDest
3943 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3944   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3945       Kind == OMPD_target_parallel)
3946     return ReturnBlock;
3947   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3948          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3949          Kind == OMPD_distribute_parallel_for ||
3950          Kind == OMPD_target_parallel_for ||
3951          Kind == OMPD_teams_distribute_parallel_for ||
3952          Kind == OMPD_target_teams_distribute_parallel_for);
3953   return OMPCancelStack.getExitBlock();
3954 }
3955 
3956 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3957     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3958     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3959   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3960   auto OrigVarIt = C.varlist_begin();
3961   auto InitIt = C.inits().begin();
3962   for (auto PvtVarIt : C.private_copies()) {
3963     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3964     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3965     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3966 
3967     // In order to identify the right initializer we need to match the
3968     // declaration used by the mapping logic. In some cases we may get
3969     // OMPCapturedExprDecl that refers to the original declaration.
3970     const ValueDecl *MatchingVD = OrigVD;
3971     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3972       // OMPCapturedExprDecl are used to privative fields of the current
3973       // structure.
3974       auto *ME = cast<MemberExpr>(OED->getInit());
3975       assert(isa<CXXThisExpr>(ME->getBase()) &&
3976              "Base should be the current struct!");
3977       MatchingVD = ME->getMemberDecl();
3978     }
3979 
3980     // If we don't have information about the current list item, move on to
3981     // the next one.
3982     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3983     if (InitAddrIt == CaptureDeviceAddrMap.end())
3984       continue;
3985 
3986     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3987       // Initialize the temporary initialization variable with the address we
3988       // get from the runtime library. We have to cast the source address
3989       // because it is always a void *. References are materialized in the
3990       // privatization scope, so the initialization here disregards the fact
3991       // the original variable is a reference.
3992       QualType AddrQTy =
3993           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3994       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3995       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3996       setAddrOfLocalVar(InitVD, InitAddr);
3997 
3998       // Emit private declaration, it will be initialized by the value we
3999       // declaration we just added to the local declarations map.
4000       EmitDecl(*PvtVD);
4001 
4002       // The initialization variables reached its purpose in the emission
4003       // ofthe previous declaration, so we don't need it anymore.
4004       LocalDeclMap.erase(InitVD);
4005 
4006       // Return the address of the private variable.
4007       return GetAddrOfLocalVar(PvtVD);
4008     });
4009     assert(IsRegistered && "firstprivate var already registered as private");
4010     // Silence the warning about unused variable.
4011     (void)IsRegistered;
4012 
4013     ++OrigVarIt;
4014     ++InitIt;
4015   }
4016 }
4017 
4018 // Generate the instructions for '#pragma omp target data' directive.
4019 void CodeGenFunction::EmitOMPTargetDataDirective(
4020     const OMPTargetDataDirective &S) {
4021   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4022 
4023   // Create a pre/post action to signal the privatization of the device pointer.
4024   // This action can be replaced by the OpenMP runtime code generation to
4025   // deactivate privatization.
4026   bool PrivatizeDevicePointers = false;
4027   class DevicePointerPrivActionTy : public PrePostActionTy {
4028     bool &PrivatizeDevicePointers;
4029 
4030   public:
4031     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4032         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4033     void Enter(CodeGenFunction &CGF) override {
4034       PrivatizeDevicePointers = true;
4035     }
4036   };
4037   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4038 
4039   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4040       CodeGenFunction &CGF, PrePostActionTy &Action) {
4041     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4042       CGF.EmitStmt(
4043           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
4044     };
4045 
4046     // Codegen that selects wheather to generate the privatization code or not.
4047     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4048                           &InnermostCodeGen](CodeGenFunction &CGF,
4049                                              PrePostActionTy &Action) {
4050       RegionCodeGenTy RCG(InnermostCodeGen);
4051       PrivatizeDevicePointers = false;
4052 
4053       // Call the pre-action to change the status of PrivatizeDevicePointers if
4054       // needed.
4055       Action.Enter(CGF);
4056 
4057       if (PrivatizeDevicePointers) {
4058         OMPPrivateScope PrivateScope(CGF);
4059         // Emit all instances of the use_device_ptr clause.
4060         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4061           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4062                                         Info.CaptureDeviceAddrMap);
4063         (void)PrivateScope.Privatize();
4064         RCG(CGF);
4065       } else
4066         RCG(CGF);
4067     };
4068 
4069     // Forward the provided action to the privatization codegen.
4070     RegionCodeGenTy PrivRCG(PrivCodeGen);
4071     PrivRCG.setAction(Action);
4072 
4073     // Notwithstanding the body of the region is emitted as inlined directive,
4074     // we don't use an inline scope as changes in the references inside the
4075     // region are expected to be visible outside, so we do not privative them.
4076     OMPLexicalScope Scope(CGF, S);
4077     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4078                                                     PrivRCG);
4079   };
4080 
4081   RegionCodeGenTy RCG(CodeGen);
4082 
4083   // If we don't have target devices, don't bother emitting the data mapping
4084   // code.
4085   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4086     RCG(*this);
4087     return;
4088   }
4089 
4090   // Check if we have any if clause associated with the directive.
4091   const Expr *IfCond = nullptr;
4092   if (auto *C = S.getSingleClause<OMPIfClause>())
4093     IfCond = C->getCondition();
4094 
4095   // Check if we have any device clause associated with the directive.
4096   const Expr *Device = nullptr;
4097   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4098     Device = C->getDevice();
4099 
4100   // Set the action to signal privatization of device pointers.
4101   RCG.setAction(PrivAction);
4102 
4103   // Emit region code.
4104   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4105                                              Info);
4106 }
4107 
4108 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4109     const OMPTargetEnterDataDirective &S) {
4110   // If we don't have target devices, don't bother emitting the data mapping
4111   // code.
4112   if (CGM.getLangOpts().OMPTargetTriples.empty())
4113     return;
4114 
4115   // Check if we have any if clause associated with the directive.
4116   const Expr *IfCond = nullptr;
4117   if (auto *C = S.getSingleClause<OMPIfClause>())
4118     IfCond = C->getCondition();
4119 
4120   // Check if we have any device clause associated with the directive.
4121   const Expr *Device = nullptr;
4122   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4123     Device = C->getDevice();
4124 
4125   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4126                                         PrePostActionTy &) {
4127     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4128                                                             Device);
4129   };
4130   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4131   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data,
4132                                               CodeGen);
4133 }
4134 
4135 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4136     const OMPTargetExitDataDirective &S) {
4137   // If we don't have target devices, don't bother emitting the data mapping
4138   // code.
4139   if (CGM.getLangOpts().OMPTargetTriples.empty())
4140     return;
4141 
4142   // Check if we have any if clause associated with the directive.
4143   const Expr *IfCond = nullptr;
4144   if (auto *C = S.getSingleClause<OMPIfClause>())
4145     IfCond = C->getCondition();
4146 
4147   // Check if we have any device clause associated with the directive.
4148   const Expr *Device = nullptr;
4149   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4150     Device = C->getDevice();
4151 
4152   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4153                                         PrePostActionTy &) {
4154     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4155                                                             Device);
4156   };
4157   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4158   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data,
4159                                               CodeGen);
4160 }
4161 
4162 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4163                                      const OMPTargetParallelDirective &S,
4164                                      PrePostActionTy &Action) {
4165   // Get the captured statement associated with the 'parallel' region.
4166   auto *CS = S.getCapturedStmt(OMPD_parallel);
4167   Action.Enter(CGF);
4168   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4169     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4170     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4171     CGF.EmitOMPPrivateClause(S, PrivateScope);
4172     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4173     (void)PrivateScope.Privatize();
4174     // TODO: Add support for clauses.
4175     CGF.EmitStmt(CS->getCapturedStmt());
4176     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4177   };
4178   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4179                                  emitEmptyBoundParameters);
4180   emitPostUpdateForReductionClause(
4181       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4182 }
4183 
4184 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4185     CodeGenModule &CGM, StringRef ParentName,
4186     const OMPTargetParallelDirective &S) {
4187   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4188     emitTargetParallelRegion(CGF, S, Action);
4189   };
4190   llvm::Function *Fn;
4191   llvm::Constant *Addr;
4192   // Emit target region as a standalone region.
4193   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4194       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4195   assert(Fn && Addr && "Target device function emission failed.");
4196 }
4197 
4198 void CodeGenFunction::EmitOMPTargetParallelDirective(
4199     const OMPTargetParallelDirective &S) {
4200   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4201     emitTargetParallelRegion(CGF, S, Action);
4202   };
4203   emitCommonOMPTargetDirective(*this, S, CodeGen);
4204 }
4205 
4206 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4207                                         const OMPTargetParallelForDirective &S,
4208                                         PrePostActionTy &Action) {
4209   Action.Enter(CGF);
4210   // Emit directive as a combined directive that consists of two implicit
4211   // directives: 'parallel' with 'for' directive.
4212   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4213     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4214         CGF, OMPD_target_parallel_for, S.hasCancel());
4215     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4216                                emitDispatchForLoopBounds);
4217   };
4218   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4219                                  emitEmptyBoundParameters);
4220 }
4221 
4222 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4223     CodeGenModule &CGM, StringRef ParentName,
4224     const OMPTargetParallelForDirective &S) {
4225   // Emit SPMD target parallel for region as a standalone region.
4226   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4227     emitTargetParallelForRegion(CGF, S, Action);
4228   };
4229   llvm::Function *Fn;
4230   llvm::Constant *Addr;
4231   // Emit target region as a standalone region.
4232   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4233       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4234   assert(Fn && Addr && "Target device function emission failed.");
4235 }
4236 
4237 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4238     const OMPTargetParallelForDirective &S) {
4239   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4240     emitTargetParallelForRegion(CGF, S, Action);
4241   };
4242   emitCommonOMPTargetDirective(*this, S, CodeGen);
4243 }
4244 
4245 static void
4246 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4247                                 const OMPTargetParallelForSimdDirective &S,
4248                                 PrePostActionTy &Action) {
4249   Action.Enter(CGF);
4250   // Emit directive as a combined directive that consists of two implicit
4251   // directives: 'parallel' with 'for' directive.
4252   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4253     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4254                                emitDispatchForLoopBounds);
4255   };
4256   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4257                                  emitEmptyBoundParameters);
4258 }
4259 
4260 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4261     CodeGenModule &CGM, StringRef ParentName,
4262     const OMPTargetParallelForSimdDirective &S) {
4263   // Emit SPMD target parallel for region as a standalone region.
4264   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4265     emitTargetParallelForSimdRegion(CGF, S, Action);
4266   };
4267   llvm::Function *Fn;
4268   llvm::Constant *Addr;
4269   // Emit target region as a standalone region.
4270   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4271       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4272   assert(Fn && Addr && "Target device function emission failed.");
4273 }
4274 
4275 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4276     const OMPTargetParallelForSimdDirective &S) {
4277   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4278     emitTargetParallelForSimdRegion(CGF, S, Action);
4279   };
4280   emitCommonOMPTargetDirective(*this, S, CodeGen);
4281 }
4282 
4283 /// Emit a helper variable and return corresponding lvalue.
4284 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4285                      const ImplicitParamDecl *PVD,
4286                      CodeGenFunction::OMPPrivateScope &Privates) {
4287   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4288   Privates.addPrivate(
4289       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4290 }
4291 
4292 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4293   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4294   // Emit outlined function for task construct.
4295   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4296   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4297   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4298   const Expr *IfCond = nullptr;
4299   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4300     if (C->getNameModifier() == OMPD_unknown ||
4301         C->getNameModifier() == OMPD_taskloop) {
4302       IfCond = C->getCondition();
4303       break;
4304     }
4305   }
4306 
4307   OMPTaskDataTy Data;
4308   // Check if taskloop must be emitted without taskgroup.
4309   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4310   // TODO: Check if we should emit tied or untied task.
4311   Data.Tied = true;
4312   // Set scheduling for taskloop
4313   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4314     // grainsize clause
4315     Data.Schedule.setInt(/*IntVal=*/false);
4316     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4317   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4318     // num_tasks clause
4319     Data.Schedule.setInt(/*IntVal=*/true);
4320     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4321   }
4322 
4323   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4324     // if (PreCond) {
4325     //   for (IV in 0..LastIteration) BODY;
4326     //   <Final counter/linear vars updates>;
4327     // }
4328     //
4329 
4330     // Emit: if (PreCond) - begin.
4331     // If the condition constant folds and can be elided, avoid emitting the
4332     // whole loop.
4333     bool CondConstant;
4334     llvm::BasicBlock *ContBlock = nullptr;
4335     OMPLoopScope PreInitScope(CGF, S);
4336     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4337       if (!CondConstant)
4338         return;
4339     } else {
4340       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4341       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4342       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4343                   CGF.getProfileCount(&S));
4344       CGF.EmitBlock(ThenBlock);
4345       CGF.incrementProfileCounter(&S);
4346     }
4347 
4348     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4349       CGF.EmitOMPSimdInit(S);
4350 
4351     OMPPrivateScope LoopScope(CGF);
4352     // Emit helper vars inits.
4353     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4354     auto *I = CS->getCapturedDecl()->param_begin();
4355     auto *LBP = std::next(I, LowerBound);
4356     auto *UBP = std::next(I, UpperBound);
4357     auto *STP = std::next(I, Stride);
4358     auto *LIP = std::next(I, LastIter);
4359     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4360              LoopScope);
4361     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4362              LoopScope);
4363     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4364     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4365              LoopScope);
4366     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4367     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4368     (void)LoopScope.Privatize();
4369     // Emit the loop iteration variable.
4370     const Expr *IVExpr = S.getIterationVariable();
4371     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4372     CGF.EmitVarDecl(*IVDecl);
4373     CGF.EmitIgnoredExpr(S.getInit());
4374 
4375     // Emit the iterations count variable.
4376     // If it is not a variable, Sema decided to calculate iterations count on
4377     // each iteration (e.g., it is foldable into a constant).
4378     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4379       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4380       // Emit calculation of the iterations count.
4381       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4382     }
4383 
4384     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4385                          S.getInc(),
4386                          [&S](CodeGenFunction &CGF) {
4387                            CGF.EmitOMPLoopBody(S, JumpDest());
4388                            CGF.EmitStopPoint(&S);
4389                          },
4390                          [](CodeGenFunction &) {});
4391     // Emit: if (PreCond) - end.
4392     if (ContBlock) {
4393       CGF.EmitBranch(ContBlock);
4394       CGF.EmitBlock(ContBlock, true);
4395     }
4396     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4397     if (HasLastprivateClause) {
4398       CGF.EmitOMPLastprivateClauseFinal(
4399           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4400           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4401               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4402               (*LIP)->getType(), S.getLocStart())));
4403     }
4404   };
4405   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4406                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4407                             const OMPTaskDataTy &Data) {
4408     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4409       OMPLoopScope PreInitScope(CGF, S);
4410       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4411                                                   OutlinedFn, SharedsTy,
4412                                                   CapturedStruct, IfCond, Data);
4413     };
4414     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4415                                                     CodeGen);
4416   };
4417   if (Data.Nogroup)
4418     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4419   else {
4420     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4421         *this,
4422         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4423                                         PrePostActionTy &Action) {
4424           Action.Enter(CGF);
4425           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4426         },
4427         S.getLocStart());
4428   }
4429 }
4430 
4431 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4432   EmitOMPTaskLoopBasedDirective(S);
4433 }
4434 
4435 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4436     const OMPTaskLoopSimdDirective &S) {
4437   EmitOMPTaskLoopBasedDirective(S);
4438 }
4439 
4440 // Generate the instructions for '#pragma omp target update' directive.
4441 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4442     const OMPTargetUpdateDirective &S) {
4443   // If we don't have target devices, don't bother emitting the data mapping
4444   // code.
4445   if (CGM.getLangOpts().OMPTargetTriples.empty())
4446     return;
4447 
4448   // Check if we have any if clause associated with the directive.
4449   const Expr *IfCond = nullptr;
4450   if (auto *C = S.getSingleClause<OMPIfClause>())
4451     IfCond = C->getCondition();
4452 
4453   // Check if we have any device clause associated with the directive.
4454   const Expr *Device = nullptr;
4455   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4456     Device = C->getDevice();
4457 
4458   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4459                                         PrePostActionTy &) {
4460     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4461                                                             Device);
4462   };
4463   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4464   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update,
4465                                               CodeGen);
4466 }
4467