1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(
57       CodeGenFunction &CGF, const OMPExecutableDirective &S,
58       const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
59       const bool EmitPreInitStmt = true)
60       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
61         InlinedShareds(CGF) {
62     if (EmitPreInitStmt)
63       emitPreInitStmt(CGF, S);
64     if (!CapturedRegion.hasValue())
65       return;
66     assert(S.hasAssociatedStmt() &&
67            "Expected associated statement for inlined directive.");
68     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
69     for (auto &C : CS->captures()) {
70       if (C.capturesVariable() || C.capturesVariableByCopy()) {
71         auto *VD = C.getCapturedVar();
72         assert(VD == VD->getCanonicalDecl() &&
73                "Canonical decl must be captured.");
74         DeclRefExpr DRE(
75             const_cast<VarDecl *>(VD),
76             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
77                                        InlinedShareds.isGlobalVarCaptured(VD)),
78             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
79         InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
80           return CGF.EmitLValue(&DRE).getAddress();
81         });
82       }
83     }
84     (void)InlinedShareds.Privatize();
85   }
86 };
87 
88 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
89 /// for captured expressions.
90 class OMPParallelScope final : public OMPLexicalScope {
91   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
92     OpenMPDirectiveKind Kind = S.getDirectiveKind();
93     return !(isOpenMPTargetExecutionDirective(Kind) ||
94              isOpenMPLoopBoundSharingDirective(Kind)) &&
95            isOpenMPParallelDirective(Kind);
96   }
97 
98 public:
99   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
100       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
101                         EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
116                         EmitPreInitStmt(S)) {}
117 };
118 
119 /// Private scope for OpenMP loop-based directives, that supports capturing
120 /// of used expression from loop statement.
121 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
122   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
123     CodeGenFunction::OMPMapVars PreCondVars;
124     for (auto *E : S.counters()) {
125       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
126       (void)PreCondVars.setVarAddr(
127           CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
128     }
129     (void)PreCondVars.apply(CGF);
130     if (auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
131       for (const auto *I : PreInits->decls())
132         CGF.EmitVarDecl(cast<VarDecl>(*I));
133     }
134     PreCondVars.restore(CGF);
135   }
136 
137 public:
138   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
139       : CodeGenFunction::RunCleanupsScope(CGF) {
140     emitPreInitStmt(CGF, S);
141   }
142 };
143 
144 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
145   CodeGenFunction::OMPPrivateScope InlinedShareds;
146 
147   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
148     return CGF.LambdaCaptureFields.lookup(VD) ||
149            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
150            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
151             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
152   }
153 
154 public:
155   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
156       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
157         InlinedShareds(CGF) {
158     for (const auto *C : S.clauses()) {
159       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
160         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
161           for (const auto *I : PreInit->decls()) {
162             if (!I->hasAttr<OMPCaptureNoInitAttr>())
163               CGF.EmitVarDecl(cast<VarDecl>(*I));
164             else {
165               CodeGenFunction::AutoVarEmission Emission =
166                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
167               CGF.EmitAutoVarCleanups(Emission);
168             }
169           }
170         }
171       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
172         for (const Expr *E : UDP->varlists()) {
173           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
174           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
175             CGF.EmitVarDecl(*OED);
176         }
177       }
178     }
179     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
180       CGF.EmitOMPPrivateClause(S, InlinedShareds);
181     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
182       if (const Expr *E = TG->getReductionRef())
183         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
184     }
185     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
186     while (CS) {
187       for (auto &C : CS->captures()) {
188         if (C.capturesVariable() || C.capturesVariableByCopy()) {
189           auto *VD = C.getCapturedVar();
190           assert(VD == VD->getCanonicalDecl() &&
191                  "Canonical decl must be captured.");
192           DeclRefExpr DRE(const_cast<VarDecl *>(VD),
193                           isCapturedVar(CGF, VD) ||
194                               (CGF.CapturedStmtInfo &&
195                                InlinedShareds.isGlobalVarCaptured(VD)),
196                           VD->getType().getNonReferenceType(), VK_LValue,
197                           C.getLocation());
198           InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
199             return CGF.EmitLValue(&DRE).getAddress();
200           });
201         }
202       }
203       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
204     }
205     (void)InlinedShareds.Privatize();
206   }
207 };
208 
209 } // namespace
210 
211 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
212                                          const OMPExecutableDirective &S,
213                                          const RegionCodeGenTy &CodeGen);
214 
215 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
216   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
217     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
218       OrigVD = OrigVD->getCanonicalDecl();
219       bool IsCaptured =
220           LambdaCaptureFields.lookup(OrigVD) ||
221           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
222           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
223       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
224                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
225       return EmitLValue(&DRE);
226     }
227   }
228   return EmitLValue(E);
229 }
230 
231 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
232   auto &C = getContext();
233   llvm::Value *Size = nullptr;
234   auto SizeInChars = C.getTypeSizeInChars(Ty);
235   if (SizeInChars.isZero()) {
236     // getTypeSizeInChars() returns 0 for a VLA.
237     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
238       auto VlaSize = getVLASize(VAT);
239       Ty = VlaSize.Type;
240       Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
241                   : VlaSize.NumElts;
242     }
243     SizeInChars = C.getTypeSizeInChars(Ty);
244     if (SizeInChars.isZero())
245       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
246     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
247   } else
248     Size = CGM.getSize(SizeInChars);
249   return Size;
250 }
251 
252 void CodeGenFunction::GenerateOpenMPCapturedVars(
253     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
254   const RecordDecl *RD = S.getCapturedRecordDecl();
255   auto CurField = RD->field_begin();
256   auto CurCap = S.captures().begin();
257   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
258                                                  E = S.capture_init_end();
259        I != E; ++I, ++CurField, ++CurCap) {
260     if (CurField->hasCapturedVLAType()) {
261       auto VAT = CurField->getCapturedVLAType();
262       auto *Val = VLASizeMap[VAT->getSizeExpr()];
263       CapturedVars.push_back(Val);
264     } else if (CurCap->capturesThis())
265       CapturedVars.push_back(CXXThisValue);
266     else if (CurCap->capturesVariableByCopy()) {
267       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
268 
269       // If the field is not a pointer, we need to save the actual value
270       // and load it as a void pointer.
271       if (!CurField->getType()->isAnyPointerType()) {
272         auto &Ctx = getContext();
273         auto DstAddr = CreateMemTemp(
274             Ctx.getUIntPtrType(),
275             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
276         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
277 
278         auto *SrcAddrVal = EmitScalarConversion(
279             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
280             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
281         LValue SrcLV =
282             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
283 
284         // Store the value using the source type pointer.
285         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
286 
287         // Load the value using the destination type pointer.
288         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
289       }
290       CapturedVars.push_back(CV);
291     } else {
292       assert(CurCap->capturesVariable() && "Expected capture by reference.");
293       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
294     }
295   }
296 }
297 
298 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
299                                     QualType DstType, StringRef Name,
300                                     LValue AddrLV,
301                                     bool isReferenceType = false) {
302   ASTContext &Ctx = CGF.getContext();
303 
304   auto *CastedPtr = CGF.EmitScalarConversion(AddrLV.getAddress().getPointer(),
305                                              Ctx.getUIntPtrType(),
306                                              Ctx.getPointerType(DstType), Loc);
307   auto TmpAddr =
308       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
309           .getAddress();
310 
311   // If we are dealing with references we need to return the address of the
312   // reference instead of the reference of the value.
313   if (isReferenceType) {
314     QualType RefType = Ctx.getLValueReferenceType(DstType);
315     auto *RefVal = TmpAddr.getPointer();
316     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
317     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
318     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
319   }
320 
321   return TmpAddr;
322 }
323 
324 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
325   if (T->isLValueReferenceType()) {
326     return C.getLValueReferenceType(
327         getCanonicalParamType(C, T.getNonReferenceType()),
328         /*SpelledAsLValue=*/false);
329   }
330   if (T->isPointerType())
331     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
332   if (auto *A = T->getAsArrayTypeUnsafe()) {
333     if (auto *VLA = dyn_cast<VariableArrayType>(A))
334       return getCanonicalParamType(C, VLA->getElementType());
335     else if (!A->isVariablyModifiedType())
336       return C.getCanonicalType(T);
337   }
338   return C.getCanonicalParamType(T);
339 }
340 
341 namespace {
342   /// Contains required data for proper outlined function codegen.
343   struct FunctionOptions {
344     /// Captured statement for which the function is generated.
345     const CapturedStmt *S = nullptr;
346     /// true if cast to/from  UIntPtr is required for variables captured by
347     /// value.
348     const bool UIntPtrCastRequired = true;
349     /// true if only casted arguments must be registered as local args or VLA
350     /// sizes.
351     const bool RegisterCastedArgsOnly = false;
352     /// Name of the generated function.
353     const StringRef FunctionName;
354     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
355                              bool RegisterCastedArgsOnly,
356                              StringRef FunctionName)
357         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
358           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
359           FunctionName(FunctionName) {}
360   };
361 }
362 
363 static llvm::Function *emitOutlinedFunctionPrologue(
364     CodeGenFunction &CGF, FunctionArgList &Args,
365     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
366         &LocalAddrs,
367     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
368         &VLASizes,
369     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
370   const CapturedDecl *CD = FO.S->getCapturedDecl();
371   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
372   assert(CD->hasBody() && "missing CapturedDecl body");
373 
374   CXXThisValue = nullptr;
375   // Build the argument list.
376   CodeGenModule &CGM = CGF.CGM;
377   ASTContext &Ctx = CGM.getContext();
378   FunctionArgList TargetArgs;
379   Args.append(CD->param_begin(),
380               std::next(CD->param_begin(), CD->getContextParamPosition()));
381   TargetArgs.append(
382       CD->param_begin(),
383       std::next(CD->param_begin(), CD->getContextParamPosition()));
384   auto I = FO.S->captures().begin();
385   FunctionDecl *DebugFunctionDecl = nullptr;
386   if (!FO.UIntPtrCastRequired) {
387     FunctionProtoType::ExtProtoInfo EPI;
388     DebugFunctionDecl = FunctionDecl::Create(
389         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
390         SourceLocation(), DeclarationName(), Ctx.VoidTy,
391         Ctx.getTrivialTypeSourceInfo(
392             Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
393         SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
394   }
395   for (auto *FD : RD->fields()) {
396     QualType ArgType = FD->getType();
397     IdentifierInfo *II = nullptr;
398     VarDecl *CapVar = nullptr;
399 
400     // If this is a capture by copy and the type is not a pointer, the outlined
401     // function argument type should be uintptr and the value properly casted to
402     // uintptr. This is necessary given that the runtime library is only able to
403     // deal with pointers. We can pass in the same way the VLA type sizes to the
404     // outlined function.
405     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
406         I->capturesVariableArrayType()) {
407       if (FO.UIntPtrCastRequired)
408         ArgType = Ctx.getUIntPtrType();
409     }
410 
411     if (I->capturesVariable() || I->capturesVariableByCopy()) {
412       CapVar = I->getCapturedVar();
413       II = CapVar->getIdentifier();
414     } else if (I->capturesThis())
415       II = &Ctx.Idents.get("this");
416     else {
417       assert(I->capturesVariableArrayType());
418       II = &Ctx.Idents.get("vla");
419     }
420     if (ArgType->isVariablyModifiedType())
421       ArgType = getCanonicalParamType(Ctx, ArgType);
422     VarDecl *Arg;
423     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
424       Arg = ParmVarDecl::Create(
425           Ctx, DebugFunctionDecl,
426           CapVar ? CapVar->getLocStart() : FD->getLocStart(),
427           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
428           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
429     } else {
430       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
431                                       II, ArgType, ImplicitParamDecl::Other);
432     }
433     Args.emplace_back(Arg);
434     // Do not cast arguments if we emit function with non-original types.
435     TargetArgs.emplace_back(
436         FO.UIntPtrCastRequired
437             ? Arg
438             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
439     ++I;
440   }
441   Args.append(
442       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
443       CD->param_end());
444   TargetArgs.append(
445       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
446       CD->param_end());
447 
448   // Create the function declaration.
449   const CGFunctionInfo &FuncInfo =
450       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
451   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
452 
453   llvm::Function *F =
454       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
455                              FO.FunctionName, &CGM.getModule());
456   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
457   if (CD->isNothrow())
458     F->setDoesNotThrow();
459   F->setDoesNotRecurse();
460 
461   // Generate the function.
462   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
463                     FO.S->getLocStart(), CD->getBody()->getLocStart());
464   unsigned Cnt = CD->getContextParamPosition();
465   I = FO.S->captures().begin();
466   for (auto *FD : RD->fields()) {
467     // Do not map arguments if we emit function with non-original types.
468     Address LocalAddr(Address::invalid());
469     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
470       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
471                                                              TargetArgs[Cnt]);
472     } else {
473       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
474     }
475     // If we are capturing a pointer by copy we don't need to do anything, just
476     // use the value that we get from the arguments.
477     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
478       const VarDecl *CurVD = I->getCapturedVar();
479       // If the variable is a reference we need to materialize it here.
480       if (CurVD->getType()->isReferenceType()) {
481         Address RefAddr = CGF.CreateMemTemp(
482             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
483         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
484                               /*Volatile=*/false, CurVD->getType());
485         LocalAddr = RefAddr;
486       }
487       if (!FO.RegisterCastedArgsOnly)
488         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
489       ++Cnt;
490       ++I;
491       continue;
492     }
493 
494     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
495                                         AlignmentSource::Decl);
496     if (FD->hasCapturedVLAType()) {
497       if (FO.UIntPtrCastRequired) {
498         ArgLVal = CGF.MakeAddrLValue(
499             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
500                                  Args[Cnt]->getName(), ArgLVal),
501             FD->getType(), AlignmentSource::Decl);
502       }
503       auto *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
504       auto VAT = FD->getCapturedVLAType();
505       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
506     } else if (I->capturesVariable()) {
507       auto *Var = I->getCapturedVar();
508       QualType VarTy = Var->getType();
509       Address ArgAddr = ArgLVal.getAddress();
510       if (!VarTy->isReferenceType()) {
511         if (ArgLVal.getType()->isLValueReferenceType()) {
512           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
513         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
514           assert(ArgLVal.getType()->isPointerType());
515           ArgAddr = CGF.EmitLoadOfPointer(
516               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
517         }
518       }
519       if (!FO.RegisterCastedArgsOnly) {
520         LocalAddrs.insert(
521             {Args[Cnt],
522              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
523       }
524     } else if (I->capturesVariableByCopy()) {
525       assert(!FD->getType()->isAnyPointerType() &&
526              "Not expecting a captured pointer.");
527       auto *Var = I->getCapturedVar();
528       QualType VarTy = Var->getType();
529       LocalAddrs.insert(
530           {Args[Cnt],
531            {Var, FO.UIntPtrCastRequired
532                      ? castValueFromUintptr(CGF, I->getLocation(),
533                                             FD->getType(), Args[Cnt]->getName(),
534                                             ArgLVal, VarTy->isReferenceType())
535                      : ArgLVal.getAddress()}});
536     } else {
537       // If 'this' is captured, load it into CXXThisValue.
538       assert(I->capturesThis());
539       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
540       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
541     }
542     ++Cnt;
543     ++I;
544   }
545 
546   return F;
547 }
548 
549 llvm::Function *
550 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
551   assert(
552       CapturedStmtInfo &&
553       "CapturedStmtInfo should be set when generating the captured function");
554   const CapturedDecl *CD = S.getCapturedDecl();
555   // Build the argument list.
556   bool NeedWrapperFunction =
557       getDebugInfo() &&
558       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
559   FunctionArgList Args;
560   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
561   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
562   SmallString<256> Buffer;
563   llvm::raw_svector_ostream Out(Buffer);
564   Out << CapturedStmtInfo->getHelperName();
565   if (NeedWrapperFunction)
566     Out << "_debug__";
567   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
568                      Out.str());
569   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
570                                                    VLASizes, CXXThisValue, FO);
571   for (const auto &LocalAddrPair : LocalAddrs) {
572     if (LocalAddrPair.second.first) {
573       setAddrOfLocalVar(LocalAddrPair.second.first,
574                         LocalAddrPair.second.second);
575     }
576   }
577   for (const auto &VLASizePair : VLASizes)
578     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
579   PGO.assignRegionCounters(GlobalDecl(CD), F);
580   CapturedStmtInfo->EmitBody(*this, CD->getBody());
581   FinishFunction(CD->getBodyRBrace());
582   if (!NeedWrapperFunction)
583     return F;
584 
585   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
586                             /*RegisterCastedArgsOnly=*/true,
587                             CapturedStmtInfo->getHelperName());
588   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
589   WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
590   Args.clear();
591   LocalAddrs.clear();
592   VLASizes.clear();
593   llvm::Function *WrapperF =
594       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
595                                    WrapperCGF.CXXThisValue, WrapperFO);
596   llvm::SmallVector<llvm::Value *, 4> CallArgs;
597   for (const auto *Arg : Args) {
598     llvm::Value *CallArg;
599     auto I = LocalAddrs.find(Arg);
600     if (I != LocalAddrs.end()) {
601       LValue LV = WrapperCGF.MakeAddrLValue(
602           I->second.second,
603           I->second.first ? I->second.first->getType() : Arg->getType(),
604           AlignmentSource::Decl);
605       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
606     } else {
607       auto EI = VLASizes.find(Arg);
608       if (EI != VLASizes.end())
609         CallArg = EI->second.second;
610       else {
611         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
612                                               Arg->getType(),
613                                               AlignmentSource::Decl);
614         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
615       }
616     }
617     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
618   }
619   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
620                                                   F, CallArgs);
621   WrapperCGF.FinishFunction();
622   return WrapperF;
623 }
624 
625 //===----------------------------------------------------------------------===//
626 //                              OpenMP Directive Emission
627 //===----------------------------------------------------------------------===//
628 void CodeGenFunction::EmitOMPAggregateAssign(
629     Address DestAddr, Address SrcAddr, QualType OriginalType,
630     const llvm::function_ref<void(Address, Address)> &CopyGen) {
631   // Perform element-by-element initialization.
632   QualType ElementTy;
633 
634   // Drill down to the base element type on both arrays.
635   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
636   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
637   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
638 
639   auto SrcBegin = SrcAddr.getPointer();
640   auto DestBegin = DestAddr.getPointer();
641   // Cast from pointer to array type to pointer to single element.
642   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
643   // The basic structure here is a while-do loop.
644   auto BodyBB = createBasicBlock("omp.arraycpy.body");
645   auto DoneBB = createBasicBlock("omp.arraycpy.done");
646   auto IsEmpty =
647       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
648   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
649 
650   // Enter the loop body, making that address the current address.
651   auto EntryBB = Builder.GetInsertBlock();
652   EmitBlock(BodyBB);
653 
654   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
655 
656   llvm::PHINode *SrcElementPHI =
657     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
658   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
659   Address SrcElementCurrent =
660       Address(SrcElementPHI,
661               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
662 
663   llvm::PHINode *DestElementPHI =
664     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
665   DestElementPHI->addIncoming(DestBegin, EntryBB);
666   Address DestElementCurrent =
667     Address(DestElementPHI,
668             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
669 
670   // Emit copy.
671   CopyGen(DestElementCurrent, SrcElementCurrent);
672 
673   // Shift the address forward by one element.
674   auto DestElementNext = Builder.CreateConstGEP1_32(
675       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
676   auto SrcElementNext = Builder.CreateConstGEP1_32(
677       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
678   // Check whether we've reached the end.
679   auto Done =
680       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
681   Builder.CreateCondBr(Done, DoneBB, BodyBB);
682   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
683   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
684 
685   // Done.
686   EmitBlock(DoneBB, /*IsFinished=*/true);
687 }
688 
689 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
690                                   Address SrcAddr, const VarDecl *DestVD,
691                                   const VarDecl *SrcVD, const Expr *Copy) {
692   if (OriginalType->isArrayType()) {
693     auto *BO = dyn_cast<BinaryOperator>(Copy);
694     if (BO && BO->getOpcode() == BO_Assign) {
695       // Perform simple memcpy for simple copying.
696       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
697       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
698       EmitAggregateAssign(Dest, Src, OriginalType);
699     } else {
700       // For arrays with complex element types perform element by element
701       // copying.
702       EmitOMPAggregateAssign(
703           DestAddr, SrcAddr, OriginalType,
704           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
705             // Working with the single array element, so have to remap
706             // destination and source variables to corresponding array
707             // elements.
708             CodeGenFunction::OMPPrivateScope Remap(*this);
709             Remap.addPrivate(DestVD, [DestElement]() -> Address {
710               return DestElement;
711             });
712             Remap.addPrivate(
713                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
714             (void)Remap.Privatize();
715             EmitIgnoredExpr(Copy);
716           });
717     }
718   } else {
719     // Remap pseudo source variable to private copy.
720     CodeGenFunction::OMPPrivateScope Remap(*this);
721     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
722     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
723     (void)Remap.Privatize();
724     // Emit copying of the whole variable.
725     EmitIgnoredExpr(Copy);
726   }
727 }
728 
729 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
730                                                 OMPPrivateScope &PrivateScope) {
731   if (!HaveInsertPoint())
732     return false;
733   bool FirstprivateIsLastprivate = false;
734   llvm::DenseSet<const VarDecl *> Lastprivates;
735   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
736     for (const auto *D : C->varlists())
737       Lastprivates.insert(
738           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
739   }
740   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
741   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
742   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
743   // Force emission of the firstprivate copy if the directive does not emit
744   // outlined function, like omp for, omp simd, omp distribute etc.
745   bool MustEmitFirstprivateCopy =
746       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
747   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
748     auto IRef = C->varlist_begin();
749     auto InitsRef = C->inits().begin();
750     for (auto IInit : C->private_copies()) {
751       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
752       bool ThisFirstprivateIsLastprivate =
753           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
754       auto *FD = CapturedStmtInfo->lookup(OrigVD);
755       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
756           !FD->getType()->isReferenceType()) {
757         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
758         ++IRef;
759         ++InitsRef;
760         continue;
761       }
762       FirstprivateIsLastprivate =
763           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
764       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
765         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
766         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
767         bool IsRegistered;
768         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
769                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
770                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
771         LValue OriginalLVal = EmitLValue(&DRE);
772         Address OriginalAddr = OriginalLVal.getAddress();
773         QualType Type = VD->getType();
774         if (Type->isArrayType()) {
775           // Emit VarDecl with copy init for arrays.
776           // Get the address of the original variable captured in current
777           // captured region.
778           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
779             auto Emission = EmitAutoVarAlloca(*VD);
780             auto *Init = VD->getInit();
781             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
782               // Perform simple memcpy.
783               LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(),
784                                            Type);
785               EmitAggregateAssign(Dest, OriginalLVal, Type);
786             } else {
787               EmitOMPAggregateAssign(
788                   Emission.getAllocatedAddress(), OriginalAddr, Type,
789                   [this, VDInit, Init](Address DestElement,
790                                        Address SrcElement) {
791                     // Clean up any temporaries needed by the initialization.
792                     RunCleanupsScope InitScope(*this);
793                     // Emit initialization for single element.
794                     setAddrOfLocalVar(VDInit, SrcElement);
795                     EmitAnyExprToMem(Init, DestElement,
796                                      Init->getType().getQualifiers(),
797                                      /*IsInitializer*/ false);
798                     LocalDeclMap.erase(VDInit);
799                   });
800             }
801             EmitAutoVarCleanups(Emission);
802             return Emission.getAllocatedAddress();
803           });
804         } else {
805           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
806             // Emit private VarDecl with copy init.
807             // Remap temp VDInit variable to the address of the original
808             // variable
809             // (for proper handling of captured global variables).
810             setAddrOfLocalVar(VDInit, OriginalAddr);
811             EmitDecl(*VD);
812             LocalDeclMap.erase(VDInit);
813             return GetAddrOfLocalVar(VD);
814           });
815         }
816         assert(IsRegistered &&
817                "firstprivate var already registered as private");
818         // Silence the warning about unused variable.
819         (void)IsRegistered;
820       }
821       ++IRef;
822       ++InitsRef;
823     }
824   }
825   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
826 }
827 
828 void CodeGenFunction::EmitOMPPrivateClause(
829     const OMPExecutableDirective &D,
830     CodeGenFunction::OMPPrivateScope &PrivateScope) {
831   if (!HaveInsertPoint())
832     return;
833   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
834   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
835     auto IRef = C->varlist_begin();
836     for (auto IInit : C->private_copies()) {
837       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
838       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
839         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
840         bool IsRegistered =
841             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
842               // Emit private VarDecl with copy init.
843               EmitDecl(*VD);
844               return GetAddrOfLocalVar(VD);
845             });
846         assert(IsRegistered && "private var already registered as private");
847         // Silence the warning about unused variable.
848         (void)IsRegistered;
849       }
850       ++IRef;
851     }
852   }
853 }
854 
855 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
856   if (!HaveInsertPoint())
857     return false;
858   // threadprivate_var1 = master_threadprivate_var1;
859   // operator=(threadprivate_var2, master_threadprivate_var2);
860   // ...
861   // __kmpc_barrier(&loc, global_tid);
862   llvm::DenseSet<const VarDecl *> CopiedVars;
863   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
864   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
865     auto IRef = C->varlist_begin();
866     auto ISrcRef = C->source_exprs().begin();
867     auto IDestRef = C->destination_exprs().begin();
868     for (auto *AssignOp : C->assignment_ops()) {
869       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
870       QualType Type = VD->getType();
871       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
872         // Get the address of the master variable. If we are emitting code with
873         // TLS support, the address is passed from the master as field in the
874         // captured declaration.
875         Address MasterAddr = Address::invalid();
876         if (getLangOpts().OpenMPUseTLS &&
877             getContext().getTargetInfo().isTLSSupported()) {
878           assert(CapturedStmtInfo->lookup(VD) &&
879                  "Copyin threadprivates should have been captured!");
880           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
881                           VK_LValue, (*IRef)->getExprLoc());
882           MasterAddr = EmitLValue(&DRE).getAddress();
883           LocalDeclMap.erase(VD);
884         } else {
885           MasterAddr =
886             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
887                                         : CGM.GetAddrOfGlobal(VD),
888                     getContext().getDeclAlign(VD));
889         }
890         // Get the address of the threadprivate variable.
891         Address PrivateAddr = EmitLValue(*IRef).getAddress();
892         if (CopiedVars.size() == 1) {
893           // At first check if current thread is a master thread. If it is, no
894           // need to copy data.
895           CopyBegin = createBasicBlock("copyin.not.master");
896           CopyEnd = createBasicBlock("copyin.not.master.end");
897           Builder.CreateCondBr(
898               Builder.CreateICmpNE(
899                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
900                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
901               CopyBegin, CopyEnd);
902           EmitBlock(CopyBegin);
903         }
904         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
905         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
906         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
907       }
908       ++IRef;
909       ++ISrcRef;
910       ++IDestRef;
911     }
912   }
913   if (CopyEnd) {
914     // Exit out of copying procedure for non-master thread.
915     EmitBlock(CopyEnd, /*IsFinished=*/true);
916     return true;
917   }
918   return false;
919 }
920 
921 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
922     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
923   if (!HaveInsertPoint())
924     return false;
925   bool HasAtLeastOneLastprivate = false;
926   llvm::DenseSet<const VarDecl *> SIMDLCVs;
927   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
928     auto *LoopDirective = cast<OMPLoopDirective>(&D);
929     for (auto *C : LoopDirective->counters()) {
930       SIMDLCVs.insert(
931           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
932     }
933   }
934   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
935   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
936     HasAtLeastOneLastprivate = true;
937     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
938         !getLangOpts().OpenMPSimd)
939       break;
940     auto IRef = C->varlist_begin();
941     auto IDestRef = C->destination_exprs().begin();
942     for (auto *IInit : C->private_copies()) {
943       // Keep the address of the original variable for future update at the end
944       // of the loop.
945       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
946       // Taskloops do not require additional initialization, it is done in
947       // runtime support library.
948       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
949         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
950         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
951           DeclRefExpr DRE(
952               const_cast<VarDecl *>(OrigVD),
953               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
954                   OrigVD) != nullptr,
955               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
956           return EmitLValue(&DRE).getAddress();
957         });
958         // Check if the variable is also a firstprivate: in this case IInit is
959         // not generated. Initialization of this variable will happen in codegen
960         // for 'firstprivate' clause.
961         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
962           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
963           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
964             // Emit private VarDecl with copy init.
965             EmitDecl(*VD);
966             return GetAddrOfLocalVar(VD);
967           });
968           assert(IsRegistered &&
969                  "lastprivate var already registered as private");
970           (void)IsRegistered;
971         }
972       }
973       ++IRef;
974       ++IDestRef;
975     }
976   }
977   return HasAtLeastOneLastprivate;
978 }
979 
980 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
981     const OMPExecutableDirective &D, bool NoFinals,
982     llvm::Value *IsLastIterCond) {
983   if (!HaveInsertPoint())
984     return;
985   // Emit following code:
986   // if (<IsLastIterCond>) {
987   //   orig_var1 = private_orig_var1;
988   //   ...
989   //   orig_varn = private_orig_varn;
990   // }
991   llvm::BasicBlock *ThenBB = nullptr;
992   llvm::BasicBlock *DoneBB = nullptr;
993   if (IsLastIterCond) {
994     ThenBB = createBasicBlock(".omp.lastprivate.then");
995     DoneBB = createBasicBlock(".omp.lastprivate.done");
996     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
997     EmitBlock(ThenBB);
998   }
999   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1000   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1001   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1002     auto IC = LoopDirective->counters().begin();
1003     for (auto F : LoopDirective->finals()) {
1004       auto *D =
1005           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1006       if (NoFinals)
1007         AlreadyEmittedVars.insert(D);
1008       else
1009         LoopCountersAndUpdates[D] = F;
1010       ++IC;
1011     }
1012   }
1013   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1014     auto IRef = C->varlist_begin();
1015     auto ISrcRef = C->source_exprs().begin();
1016     auto IDestRef = C->destination_exprs().begin();
1017     for (auto *AssignOp : C->assignment_ops()) {
1018       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1019       QualType Type = PrivateVD->getType();
1020       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1021       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1022         // If lastprivate variable is a loop control variable for loop-based
1023         // directive, update its value before copyin back to original
1024         // variable.
1025         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1026           EmitIgnoredExpr(FinalExpr);
1027         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1028         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1029         // Get the address of the original variable.
1030         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1031         // Get the address of the private variable.
1032         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1033         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1034           PrivateAddr =
1035               Address(Builder.CreateLoad(PrivateAddr),
1036                       getNaturalTypeAlignment(RefTy->getPointeeType()));
1037         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1038       }
1039       ++IRef;
1040       ++ISrcRef;
1041       ++IDestRef;
1042     }
1043     if (auto *PostUpdate = C->getPostUpdateExpr())
1044       EmitIgnoredExpr(PostUpdate);
1045   }
1046   if (IsLastIterCond)
1047     EmitBlock(DoneBB, /*IsFinished=*/true);
1048 }
1049 
1050 void CodeGenFunction::EmitOMPReductionClauseInit(
1051     const OMPExecutableDirective &D,
1052     CodeGenFunction::OMPPrivateScope &PrivateScope) {
1053   if (!HaveInsertPoint())
1054     return;
1055   SmallVector<const Expr *, 4> Shareds;
1056   SmallVector<const Expr *, 4> Privates;
1057   SmallVector<const Expr *, 4> ReductionOps;
1058   SmallVector<const Expr *, 4> LHSs;
1059   SmallVector<const Expr *, 4> RHSs;
1060   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1061     auto IPriv = C->privates().begin();
1062     auto IRed = C->reduction_ops().begin();
1063     auto ILHS = C->lhs_exprs().begin();
1064     auto IRHS = C->rhs_exprs().begin();
1065     for (const auto *Ref : C->varlists()) {
1066       Shareds.emplace_back(Ref);
1067       Privates.emplace_back(*IPriv);
1068       ReductionOps.emplace_back(*IRed);
1069       LHSs.emplace_back(*ILHS);
1070       RHSs.emplace_back(*IRHS);
1071       std::advance(IPriv, 1);
1072       std::advance(IRed, 1);
1073       std::advance(ILHS, 1);
1074       std::advance(IRHS, 1);
1075     }
1076   }
1077   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
1078   unsigned Count = 0;
1079   auto ILHS = LHSs.begin();
1080   auto IRHS = RHSs.begin();
1081   auto IPriv = Privates.begin();
1082   for (const auto *IRef : Shareds) {
1083     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1084     // Emit private VarDecl with reduction init.
1085     RedCG.emitSharedLValue(*this, Count);
1086     RedCG.emitAggregateType(*this, Count);
1087     auto Emission = EmitAutoVarAlloca(*PrivateVD);
1088     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1089                              RedCG.getSharedLValue(Count),
1090                              [&Emission](CodeGenFunction &CGF) {
1091                                CGF.EmitAutoVarInit(Emission);
1092                                return true;
1093                              });
1094     EmitAutoVarCleanups(Emission);
1095     Address BaseAddr = RedCG.adjustPrivateAddress(
1096         *this, Count, Emission.getAllocatedAddress());
1097     bool IsRegistered = PrivateScope.addPrivate(
1098         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
1099     assert(IsRegistered && "private var already registered as private");
1100     // Silence the warning about unused variable.
1101     (void)IsRegistered;
1102 
1103     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1104     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1105     QualType Type = PrivateVD->getType();
1106     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1107     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1108       // Store the address of the original variable associated with the LHS
1109       // implicit variable.
1110       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1111         return RedCG.getSharedLValue(Count).getAddress();
1112       });
1113       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1114         return GetAddrOfLocalVar(PrivateVD);
1115       });
1116     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1117                isa<ArraySubscriptExpr>(IRef)) {
1118       // Store the address of the original variable associated with the LHS
1119       // implicit variable.
1120       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1121         return RedCG.getSharedLValue(Count).getAddress();
1122       });
1123       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1124         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1125                                             ConvertTypeForMem(RHSVD->getType()),
1126                                             "rhs.begin");
1127       });
1128     } else {
1129       QualType Type = PrivateVD->getType();
1130       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1131       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1132       // Store the address of the original variable associated with the LHS
1133       // implicit variable.
1134       if (IsArray) {
1135         OriginalAddr = Builder.CreateElementBitCast(
1136             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1137       }
1138       PrivateScope.addPrivate(
1139           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1140       PrivateScope.addPrivate(
1141           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1142             return IsArray
1143                        ? Builder.CreateElementBitCast(
1144                              GetAddrOfLocalVar(PrivateVD),
1145                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1146                        : GetAddrOfLocalVar(PrivateVD);
1147           });
1148     }
1149     ++ILHS;
1150     ++IRHS;
1151     ++IPriv;
1152     ++Count;
1153   }
1154 }
1155 
1156 void CodeGenFunction::EmitOMPReductionClauseFinal(
1157     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1158   if (!HaveInsertPoint())
1159     return;
1160   llvm::SmallVector<const Expr *, 8> Privates;
1161   llvm::SmallVector<const Expr *, 8> LHSExprs;
1162   llvm::SmallVector<const Expr *, 8> RHSExprs;
1163   llvm::SmallVector<const Expr *, 8> ReductionOps;
1164   bool HasAtLeastOneReduction = false;
1165   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1166     HasAtLeastOneReduction = true;
1167     Privates.append(C->privates().begin(), C->privates().end());
1168     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1169     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1170     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1171   }
1172   if (HasAtLeastOneReduction) {
1173     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1174                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1175                       ReductionKind == OMPD_simd;
1176     bool SimpleReduction = ReductionKind == OMPD_simd;
1177     // Emit nowait reduction if nowait clause is present or directive is a
1178     // parallel directive (it always has implicit barrier).
1179     CGM.getOpenMPRuntime().emitReduction(
1180         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1181         {WithNowait, SimpleReduction, ReductionKind});
1182   }
1183 }
1184 
1185 static void emitPostUpdateForReductionClause(
1186     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1187     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1188   if (!CGF.HaveInsertPoint())
1189     return;
1190   llvm::BasicBlock *DoneBB = nullptr;
1191   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1192     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1193       if (!DoneBB) {
1194         if (auto *Cond = CondGen(CGF)) {
1195           // If the first post-update expression is found, emit conditional
1196           // block if it was requested.
1197           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1198           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1199           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1200           CGF.EmitBlock(ThenBB);
1201         }
1202       }
1203       CGF.EmitIgnoredExpr(PostUpdate);
1204     }
1205   }
1206   if (DoneBB)
1207     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1208 }
1209 
1210 namespace {
1211 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1212 /// parallel function. This is necessary for combined constructs such as
1213 /// 'distribute parallel for'
1214 typedef llvm::function_ref<void(CodeGenFunction &,
1215                                 const OMPExecutableDirective &,
1216                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1217     CodeGenBoundParametersTy;
1218 } // anonymous namespace
1219 
1220 static void emitCommonOMPParallelDirective(
1221     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1222     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1223     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1224   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1225   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1226       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1227   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1228     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1229     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1230                                          /*IgnoreResultAssign*/ true);
1231     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1232         CGF, NumThreads, NumThreadsClause->getLocStart());
1233   }
1234   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1235     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1236     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1237         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1238   }
1239   const Expr *IfCond = nullptr;
1240   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1241     if (C->getNameModifier() == OMPD_unknown ||
1242         C->getNameModifier() == OMPD_parallel) {
1243       IfCond = C->getCondition();
1244       break;
1245     }
1246   }
1247 
1248   OMPParallelScope Scope(CGF, S);
1249   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1250   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1251   // lower and upper bounds with the pragma 'for' chunking mechanism.
1252   // The following lambda takes care of appending the lower and upper bound
1253   // parameters when necessary
1254   CodeGenBoundParameters(CGF, S, CapturedVars);
1255   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1256   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1257                                               CapturedVars, IfCond);
1258 }
1259 
1260 static void emitEmptyBoundParameters(CodeGenFunction &,
1261                                      const OMPExecutableDirective &,
1262                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1263 
1264 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1265   // Emit parallel region as a standalone region.
1266   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1267     Action.Enter(CGF);
1268     OMPPrivateScope PrivateScope(CGF);
1269     bool Copyins = CGF.EmitOMPCopyinClause(S);
1270     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1271     if (Copyins) {
1272       // Emit implicit barrier to synchronize threads and avoid data races on
1273       // propagation master's thread values of threadprivate variables to local
1274       // instances of that variables of all other implicit threads.
1275       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1276           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1277           /*ForceSimpleCall=*/true);
1278     }
1279     CGF.EmitOMPPrivateClause(S, PrivateScope);
1280     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1281     (void)PrivateScope.Privatize();
1282     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1283     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1284   };
1285   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1286                                  emitEmptyBoundParameters);
1287   emitPostUpdateForReductionClause(
1288       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1289 }
1290 
1291 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1292                                       JumpDest LoopExit) {
1293   RunCleanupsScope BodyScope(*this);
1294   // Update counters values on current iteration.
1295   for (auto I : D.updates()) {
1296     EmitIgnoredExpr(I);
1297   }
1298   // Update the linear variables.
1299   // In distribute directives only loop counters may be marked as linear, no
1300   // need to generate the code for them.
1301   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1302     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1303       for (auto *U : C->updates())
1304         EmitIgnoredExpr(U);
1305     }
1306   }
1307 
1308   // On a continue in the body, jump to the end.
1309   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1310   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1311   // Emit loop body.
1312   EmitStmt(D.getBody());
1313   // The end (updates/cleanups).
1314   EmitBlock(Continue.getBlock());
1315   BreakContinueStack.pop_back();
1316 }
1317 
1318 void CodeGenFunction::EmitOMPInnerLoop(
1319     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1320     const Expr *IncExpr,
1321     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1322     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1323   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1324 
1325   // Start the loop with a block that tests the condition.
1326   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1327   EmitBlock(CondBlock);
1328   const SourceRange &R = S.getSourceRange();
1329   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1330                  SourceLocToDebugLoc(R.getEnd()));
1331 
1332   // If there are any cleanups between here and the loop-exit scope,
1333   // create a block to stage a loop exit along.
1334   auto ExitBlock = LoopExit.getBlock();
1335   if (RequiresCleanup)
1336     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1337 
1338   auto LoopBody = createBasicBlock("omp.inner.for.body");
1339 
1340   // Emit condition.
1341   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1342   if (ExitBlock != LoopExit.getBlock()) {
1343     EmitBlock(ExitBlock);
1344     EmitBranchThroughCleanup(LoopExit);
1345   }
1346 
1347   EmitBlock(LoopBody);
1348   incrementProfileCounter(&S);
1349 
1350   // Create a block for the increment.
1351   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1352   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1353 
1354   BodyGen(*this);
1355 
1356   // Emit "IV = IV + 1" and a back-edge to the condition block.
1357   EmitBlock(Continue.getBlock());
1358   EmitIgnoredExpr(IncExpr);
1359   PostIncGen(*this);
1360   BreakContinueStack.pop_back();
1361   EmitBranch(CondBlock);
1362   LoopStack.pop();
1363   // Emit the fall-through block.
1364   EmitBlock(LoopExit.getBlock());
1365 }
1366 
1367 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1368   if (!HaveInsertPoint())
1369     return false;
1370   // Emit inits for the linear variables.
1371   bool HasLinears = false;
1372   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1373     for (auto *Init : C->inits()) {
1374       HasLinears = true;
1375       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1376       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1377         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1378         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1379         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1380                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1381                         VD->getInit()->getType(), VK_LValue,
1382                         VD->getInit()->getExprLoc());
1383         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1384                                                 VD->getType()),
1385                        /*capturedByInit=*/false);
1386         EmitAutoVarCleanups(Emission);
1387       } else
1388         EmitVarDecl(*VD);
1389     }
1390     // Emit the linear steps for the linear clauses.
1391     // If a step is not constant, it is pre-calculated before the loop.
1392     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1393       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1394         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1395         // Emit calculation of the linear step.
1396         EmitIgnoredExpr(CS);
1397       }
1398   }
1399   return HasLinears;
1400 }
1401 
1402 void CodeGenFunction::EmitOMPLinearClauseFinal(
1403     const OMPLoopDirective &D,
1404     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1405   if (!HaveInsertPoint())
1406     return;
1407   llvm::BasicBlock *DoneBB = nullptr;
1408   // Emit the final values of the linear variables.
1409   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1410     auto IC = C->varlist_begin();
1411     for (auto *F : C->finals()) {
1412       if (!DoneBB) {
1413         if (auto *Cond = CondGen(*this)) {
1414           // If the first post-update expression is found, emit conditional
1415           // block if it was requested.
1416           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1417           DoneBB = createBasicBlock(".omp.linear.pu.done");
1418           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1419           EmitBlock(ThenBB);
1420         }
1421       }
1422       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1423       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1424                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1425                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1426       Address OrigAddr = EmitLValue(&DRE).getAddress();
1427       CodeGenFunction::OMPPrivateScope VarScope(*this);
1428       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1429       (void)VarScope.Privatize();
1430       EmitIgnoredExpr(F);
1431       ++IC;
1432     }
1433     if (auto *PostUpdate = C->getPostUpdateExpr())
1434       EmitIgnoredExpr(PostUpdate);
1435   }
1436   if (DoneBB)
1437     EmitBlock(DoneBB, /*IsFinished=*/true);
1438 }
1439 
1440 static void emitAlignedClause(CodeGenFunction &CGF,
1441                               const OMPExecutableDirective &D) {
1442   if (!CGF.HaveInsertPoint())
1443     return;
1444   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1445     unsigned ClauseAlignment = 0;
1446     if (auto AlignmentExpr = Clause->getAlignment()) {
1447       auto AlignmentCI =
1448           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1449       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1450     }
1451     for (auto E : Clause->varlists()) {
1452       unsigned Alignment = ClauseAlignment;
1453       if (Alignment == 0) {
1454         // OpenMP [2.8.1, Description]
1455         // If no optional parameter is specified, implementation-defined default
1456         // alignments for SIMD instructions on the target platforms are assumed.
1457         Alignment =
1458             CGF.getContext()
1459                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1460                     E->getType()->getPointeeType()))
1461                 .getQuantity();
1462       }
1463       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1464              "alignment is not power of 2");
1465       if (Alignment != 0) {
1466         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1467         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1468       }
1469     }
1470   }
1471 }
1472 
1473 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1474     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1475   if (!HaveInsertPoint())
1476     return;
1477   auto I = S.private_counters().begin();
1478   for (auto *E : S.counters()) {
1479     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1480     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1481     // Emit var without initialization.
1482     auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1483     EmitAutoVarCleanups(VarEmission);
1484     LocalDeclMap.erase(PrivateVD);
1485     (void)LoopScope.addPrivate(VD, [&VarEmission]() {
1486       return VarEmission.getAllocatedAddress();
1487     });
1488     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1489         VD->hasGlobalStorage()) {
1490       (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
1491         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1492                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1493                         E->getType(), VK_LValue, E->getExprLoc());
1494         return EmitLValue(&DRE).getAddress();
1495       });
1496     } else {
1497       (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
1498         return VarEmission.getAllocatedAddress();
1499       });
1500     }
1501     ++I;
1502   }
1503 }
1504 
1505 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1506                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1507                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1508   if (!CGF.HaveInsertPoint())
1509     return;
1510   {
1511     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1512     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1513     (void)PreCondScope.Privatize();
1514     // Get initial values of real counters.
1515     for (auto I : S.inits()) {
1516       CGF.EmitIgnoredExpr(I);
1517     }
1518   }
1519   // Check that loop is executed at least one time.
1520   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1521 }
1522 
1523 void CodeGenFunction::EmitOMPLinearClause(
1524     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1525   if (!HaveInsertPoint())
1526     return;
1527   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1528   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1529     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1530     for (auto *C : LoopDirective->counters()) {
1531       SIMDLCVs.insert(
1532           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1533     }
1534   }
1535   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1536     auto CurPrivate = C->privates().begin();
1537     for (auto *E : C->varlists()) {
1538       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1539       auto *PrivateVD =
1540           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1541       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1542         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1543           // Emit private VarDecl with copy init.
1544           EmitVarDecl(*PrivateVD);
1545           return GetAddrOfLocalVar(PrivateVD);
1546         });
1547         assert(IsRegistered && "linear var already registered as private");
1548         // Silence the warning about unused variable.
1549         (void)IsRegistered;
1550       } else
1551         EmitVarDecl(*PrivateVD);
1552       ++CurPrivate;
1553     }
1554   }
1555 }
1556 
1557 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1558                                      const OMPExecutableDirective &D,
1559                                      bool IsMonotonic) {
1560   if (!CGF.HaveInsertPoint())
1561     return;
1562   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1563     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1564                                  /*ignoreResult=*/true);
1565     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1566     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1567     // In presence of finite 'safelen', it may be unsafe to mark all
1568     // the memory instructions parallel, because loop-carried
1569     // dependences of 'safelen' iterations are possible.
1570     if (!IsMonotonic)
1571       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1572   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1573     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1574                                  /*ignoreResult=*/true);
1575     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1576     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1577     // In presence of finite 'safelen', it may be unsafe to mark all
1578     // the memory instructions parallel, because loop-carried
1579     // dependences of 'safelen' iterations are possible.
1580     CGF.LoopStack.setParallel(false);
1581   }
1582 }
1583 
1584 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1585                                       bool IsMonotonic) {
1586   // Walk clauses and process safelen/lastprivate.
1587   LoopStack.setParallel(!IsMonotonic);
1588   LoopStack.setVectorizeEnable(true);
1589   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1590 }
1591 
1592 void CodeGenFunction::EmitOMPSimdFinal(
1593     const OMPLoopDirective &D,
1594     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1595   if (!HaveInsertPoint())
1596     return;
1597   llvm::BasicBlock *DoneBB = nullptr;
1598   auto IC = D.counters().begin();
1599   auto IPC = D.private_counters().begin();
1600   for (auto F : D.finals()) {
1601     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1602     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1603     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1604     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1605         OrigVD->hasGlobalStorage() || CED) {
1606       if (!DoneBB) {
1607         if (auto *Cond = CondGen(*this)) {
1608           // If the first post-update expression is found, emit conditional
1609           // block if it was requested.
1610           auto *ThenBB = createBasicBlock(".omp.final.then");
1611           DoneBB = createBasicBlock(".omp.final.done");
1612           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1613           EmitBlock(ThenBB);
1614         }
1615       }
1616       Address OrigAddr = Address::invalid();
1617       if (CED) {
1618         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1619       } else {
1620         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1621                         /*RefersToEnclosingVariableOrCapture=*/false,
1622                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1623         OrigAddr = EmitLValue(&DRE).getAddress();
1624       }
1625       OMPPrivateScope VarScope(*this);
1626       VarScope.addPrivate(OrigVD,
1627                           [OrigAddr]() -> Address { return OrigAddr; });
1628       (void)VarScope.Privatize();
1629       EmitIgnoredExpr(F);
1630     }
1631     ++IC;
1632     ++IPC;
1633   }
1634   if (DoneBB)
1635     EmitBlock(DoneBB, /*IsFinished=*/true);
1636 }
1637 
1638 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1639                                          const OMPLoopDirective &S,
1640                                          CodeGenFunction::JumpDest LoopExit) {
1641   CGF.EmitOMPLoopBody(S, LoopExit);
1642   CGF.EmitStopPoint(&S);
1643 }
1644 
1645 /// Emit a helper variable and return corresponding lvalue.
1646 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1647                                const DeclRefExpr *Helper) {
1648   auto VDecl = cast<VarDecl>(Helper->getDecl());
1649   CGF.EmitVarDecl(*VDecl);
1650   return CGF.EmitLValue(Helper);
1651 }
1652 
1653 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1654                               PrePostActionTy &Action) {
1655   Action.Enter(CGF);
1656   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1657          "Expected simd directive");
1658   OMPLoopScope PreInitScope(CGF, S);
1659   // if (PreCond) {
1660   //   for (IV in 0..LastIteration) BODY;
1661   //   <Final counter/linear vars updates>;
1662   // }
1663   //
1664   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
1665       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
1666       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
1667     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1668     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1669   }
1670 
1671   // Emit: if (PreCond) - begin.
1672   // If the condition constant folds and can be elided, avoid emitting the
1673   // whole loop.
1674   bool CondConstant;
1675   llvm::BasicBlock *ContBlock = nullptr;
1676   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1677     if (!CondConstant)
1678       return;
1679   } else {
1680     auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1681     ContBlock = CGF.createBasicBlock("simd.if.end");
1682     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1683                 CGF.getProfileCount(&S));
1684     CGF.EmitBlock(ThenBlock);
1685     CGF.incrementProfileCounter(&S);
1686   }
1687 
1688   // Emit the loop iteration variable.
1689   const Expr *IVExpr = S.getIterationVariable();
1690   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1691   CGF.EmitVarDecl(*IVDecl);
1692   CGF.EmitIgnoredExpr(S.getInit());
1693 
1694   // Emit the iterations count variable.
1695   // If it is not a variable, Sema decided to calculate iterations count on
1696   // each iteration (e.g., it is foldable into a constant).
1697   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1698     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1699     // Emit calculation of the iterations count.
1700     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1701   }
1702 
1703   CGF.EmitOMPSimdInit(S);
1704 
1705   emitAlignedClause(CGF, S);
1706   (void)CGF.EmitOMPLinearClauseInit(S);
1707   {
1708     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1709     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1710     CGF.EmitOMPLinearClause(S, LoopScope);
1711     CGF.EmitOMPPrivateClause(S, LoopScope);
1712     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1713     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1714     (void)LoopScope.Privatize();
1715     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1716                          S.getInc(),
1717                          [&S](CodeGenFunction &CGF) {
1718                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1719                            CGF.EmitStopPoint(&S);
1720                          },
1721                          [](CodeGenFunction &) {});
1722     CGF.EmitOMPSimdFinal(
1723         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1724     // Emit final copy of the lastprivate variables at the end of loops.
1725     if (HasLastprivateClause)
1726       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1727     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1728     emitPostUpdateForReductionClause(
1729         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1730   }
1731   CGF.EmitOMPLinearClauseFinal(
1732       S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1733   // Emit: if (PreCond) - end.
1734   if (ContBlock) {
1735     CGF.EmitBranch(ContBlock);
1736     CGF.EmitBlock(ContBlock, true);
1737   }
1738 }
1739 
1740 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1741   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1742     emitOMPSimdRegion(CGF, S, Action);
1743   };
1744   OMPLexicalScope Scope(*this, S, OMPD_unknown);
1745   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1746 }
1747 
1748 void CodeGenFunction::EmitOMPOuterLoop(
1749     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1750     CodeGenFunction::OMPPrivateScope &LoopScope,
1751     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1752     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1753     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1754   auto &RT = CGM.getOpenMPRuntime();
1755 
1756   const Expr *IVExpr = S.getIterationVariable();
1757   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1758   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1759 
1760   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1761 
1762   // Start the loop with a block that tests the condition.
1763   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1764   EmitBlock(CondBlock);
1765   const SourceRange &R = S.getSourceRange();
1766   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1767                  SourceLocToDebugLoc(R.getEnd()));
1768 
1769   llvm::Value *BoolCondVal = nullptr;
1770   if (!DynamicOrOrdered) {
1771     // UB = min(UB, GlobalUB) or
1772     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1773     // 'distribute parallel for')
1774     EmitIgnoredExpr(LoopArgs.EUB);
1775     // IV = LB
1776     EmitIgnoredExpr(LoopArgs.Init);
1777     // IV < UB
1778     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1779   } else {
1780     BoolCondVal =
1781         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1782                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1783   }
1784 
1785   // If there are any cleanups between here and the loop-exit scope,
1786   // create a block to stage a loop exit along.
1787   auto ExitBlock = LoopExit.getBlock();
1788   if (LoopScope.requiresCleanups())
1789     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1790 
1791   auto LoopBody = createBasicBlock("omp.dispatch.body");
1792   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1793   if (ExitBlock != LoopExit.getBlock()) {
1794     EmitBlock(ExitBlock);
1795     EmitBranchThroughCleanup(LoopExit);
1796   }
1797   EmitBlock(LoopBody);
1798 
1799   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1800   // LB for loop condition and emitted it above).
1801   if (DynamicOrOrdered)
1802     EmitIgnoredExpr(LoopArgs.Init);
1803 
1804   // Create a block for the increment.
1805   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1806   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1807 
1808   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1809   // with dynamic/guided scheduling and without ordered clause.
1810   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1811     LoopStack.setParallel(!IsMonotonic);
1812   else
1813     EmitOMPSimdInit(S, IsMonotonic);
1814 
1815   SourceLocation Loc = S.getLocStart();
1816 
1817   // when 'distribute' is not combined with a 'for':
1818   // while (idx <= UB) { BODY; ++idx; }
1819   // when 'distribute' is combined with a 'for'
1820   // (e.g. 'distribute parallel for')
1821   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1822   EmitOMPInnerLoop(
1823       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1824       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1825         CodeGenLoop(CGF, S, LoopExit);
1826       },
1827       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1828         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1829       });
1830 
1831   EmitBlock(Continue.getBlock());
1832   BreakContinueStack.pop_back();
1833   if (!DynamicOrOrdered) {
1834     // Emit "LB = LB + Stride", "UB = UB + Stride".
1835     EmitIgnoredExpr(LoopArgs.NextLB);
1836     EmitIgnoredExpr(LoopArgs.NextUB);
1837   }
1838 
1839   EmitBranch(CondBlock);
1840   LoopStack.pop();
1841   // Emit the fall-through block.
1842   EmitBlock(LoopExit.getBlock());
1843 
1844   // Tell the runtime we are done.
1845   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1846     if (!DynamicOrOrdered)
1847       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1848                                                      S.getDirectiveKind());
1849   };
1850   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1851 }
1852 
1853 void CodeGenFunction::EmitOMPForOuterLoop(
1854     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1855     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1856     const OMPLoopArguments &LoopArgs,
1857     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1858   auto &RT = CGM.getOpenMPRuntime();
1859 
1860   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1861   const bool DynamicOrOrdered =
1862       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1863 
1864   assert((Ordered ||
1865           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1866                                  LoopArgs.Chunk != nullptr)) &&
1867          "static non-chunked schedule does not need outer loop");
1868 
1869   // Emit outer loop.
1870   //
1871   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1872   // When schedule(dynamic,chunk_size) is specified, the iterations are
1873   // distributed to threads in the team in chunks as the threads request them.
1874   // Each thread executes a chunk of iterations, then requests another chunk,
1875   // until no chunks remain to be distributed. Each chunk contains chunk_size
1876   // iterations, except for the last chunk to be distributed, which may have
1877   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1878   //
1879   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1880   // to threads in the team in chunks as the executing threads request them.
1881   // Each thread executes a chunk of iterations, then requests another chunk,
1882   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1883   // each chunk is proportional to the number of unassigned iterations divided
1884   // by the number of threads in the team, decreasing to 1. For a chunk_size
1885   // with value k (greater than 1), the size of each chunk is determined in the
1886   // same way, with the restriction that the chunks do not contain fewer than k
1887   // iterations (except for the last chunk to be assigned, which may have fewer
1888   // than k iterations).
1889   //
1890   // When schedule(auto) is specified, the decision regarding scheduling is
1891   // delegated to the compiler and/or runtime system. The programmer gives the
1892   // implementation the freedom to choose any possible mapping of iterations to
1893   // threads in the team.
1894   //
1895   // When schedule(runtime) is specified, the decision regarding scheduling is
1896   // deferred until run time, and the schedule and chunk size are taken from the
1897   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1898   // implementation defined
1899   //
1900   // while(__kmpc_dispatch_next(&LB, &UB)) {
1901   //   idx = LB;
1902   //   while (idx <= UB) { BODY; ++idx;
1903   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1904   //   } // inner loop
1905   // }
1906   //
1907   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1908   // When schedule(static, chunk_size) is specified, iterations are divided into
1909   // chunks of size chunk_size, and the chunks are assigned to the threads in
1910   // the team in a round-robin fashion in the order of the thread number.
1911   //
1912   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1913   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1914   //   LB = LB + ST;
1915   //   UB = UB + ST;
1916   // }
1917   //
1918 
1919   const Expr *IVExpr = S.getIterationVariable();
1920   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1921   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1922 
1923   if (DynamicOrOrdered) {
1924     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1925     llvm::Value *LBVal = DispatchBounds.first;
1926     llvm::Value *UBVal = DispatchBounds.second;
1927     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1928                                                              LoopArgs.Chunk};
1929     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1930                            IVSigned, Ordered, DipatchRTInputValues);
1931   } else {
1932     CGOpenMPRuntime::StaticRTInput StaticInit(
1933         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1934         LoopArgs.ST, LoopArgs.Chunk);
1935     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1936                          ScheduleKind, StaticInit);
1937   }
1938 
1939   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1940                                     const unsigned IVSize,
1941                                     const bool IVSigned) {
1942     if (Ordered) {
1943       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1944                                                             IVSigned);
1945     }
1946   };
1947 
1948   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1949                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1950   OuterLoopArgs.IncExpr = S.getInc();
1951   OuterLoopArgs.Init = S.getInit();
1952   OuterLoopArgs.Cond = S.getCond();
1953   OuterLoopArgs.NextLB = S.getNextLowerBound();
1954   OuterLoopArgs.NextUB = S.getNextUpperBound();
1955   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1956                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1957 }
1958 
1959 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1960                              const unsigned IVSize, const bool IVSigned) {}
1961 
1962 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1963     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1964     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1965     const CodeGenLoopTy &CodeGenLoopContent) {
1966 
1967   auto &RT = CGM.getOpenMPRuntime();
1968 
1969   // Emit outer loop.
1970   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1971   // dynamic
1972   //
1973 
1974   const Expr *IVExpr = S.getIterationVariable();
1975   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1976   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1977 
1978   CGOpenMPRuntime::StaticRTInput StaticInit(
1979       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1980       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1981   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1982 
1983   // for combined 'distribute' and 'for' the increment expression of distribute
1984   // is store in DistInc. For 'distribute' alone, it is in Inc.
1985   Expr *IncExpr;
1986   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1987     IncExpr = S.getDistInc();
1988   else
1989     IncExpr = S.getInc();
1990 
1991   // this routine is shared by 'omp distribute parallel for' and
1992   // 'omp distribute': select the right EUB expression depending on the
1993   // directive
1994   OMPLoopArguments OuterLoopArgs;
1995   OuterLoopArgs.LB = LoopArgs.LB;
1996   OuterLoopArgs.UB = LoopArgs.UB;
1997   OuterLoopArgs.ST = LoopArgs.ST;
1998   OuterLoopArgs.IL = LoopArgs.IL;
1999   OuterLoopArgs.Chunk = LoopArgs.Chunk;
2000   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2001                           ? S.getCombinedEnsureUpperBound()
2002                           : S.getEnsureUpperBound();
2003   OuterLoopArgs.IncExpr = IncExpr;
2004   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2005                            ? S.getCombinedInit()
2006                            : S.getInit();
2007   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2008                            ? S.getCombinedCond()
2009                            : S.getCond();
2010   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2011                              ? S.getCombinedNextLowerBound()
2012                              : S.getNextLowerBound();
2013   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2014                              ? S.getCombinedNextUpperBound()
2015                              : S.getNextUpperBound();
2016 
2017   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2018                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
2019                    emitEmptyOrdered);
2020 }
2021 
2022 static std::pair<LValue, LValue>
2023 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2024                                      const OMPExecutableDirective &S) {
2025   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2026   LValue LB =
2027       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2028   LValue UB =
2029       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2030 
2031   // When composing 'distribute' with 'for' (e.g. as in 'distribute
2032   // parallel for') we need to use the 'distribute'
2033   // chunk lower and upper bounds rather than the whole loop iteration
2034   // space. These are parameters to the outlined function for 'parallel'
2035   // and we copy the bounds of the previous schedule into the
2036   // the current ones.
2037   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2038   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2039   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2040       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2041   PrevLBVal = CGF.EmitScalarConversion(
2042       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2043       LS.getIterationVariable()->getType(),
2044       LS.getPrevLowerBoundVariable()->getExprLoc());
2045   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2046       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2047   PrevUBVal = CGF.EmitScalarConversion(
2048       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2049       LS.getIterationVariable()->getType(),
2050       LS.getPrevUpperBoundVariable()->getExprLoc());
2051 
2052   CGF.EmitStoreOfScalar(PrevLBVal, LB);
2053   CGF.EmitStoreOfScalar(PrevUBVal, UB);
2054 
2055   return {LB, UB};
2056 }
2057 
2058 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2059 /// we need to use the LB and UB expressions generated by the worksharing
2060 /// code generation support, whereas in non combined situations we would
2061 /// just emit 0 and the LastIteration expression
2062 /// This function is necessary due to the difference of the LB and UB
2063 /// types for the RT emission routines for 'for_static_init' and
2064 /// 'for_dispatch_init'
2065 static std::pair<llvm::Value *, llvm::Value *>
2066 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2067                                         const OMPExecutableDirective &S,
2068                                         Address LB, Address UB) {
2069   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2070   const Expr *IVExpr = LS.getIterationVariable();
2071   // when implementing a dynamic schedule for a 'for' combined with a
2072   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2073   // is not normalized as each team only executes its own assigned
2074   // distribute chunk
2075   QualType IteratorTy = IVExpr->getType();
2076   llvm::Value *LBVal =
2077       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart());
2078   llvm::Value *UBVal =
2079       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart());
2080   return {LBVal, UBVal};
2081 }
2082 
2083 static void emitDistributeParallelForDistributeInnerBoundParams(
2084     CodeGenFunction &CGF, const OMPExecutableDirective &S,
2085     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2086   const auto &Dir = cast<OMPLoopDirective>(S);
2087   LValue LB =
2088       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2089   auto LBCast = CGF.Builder.CreateIntCast(
2090       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2091   CapturedVars.push_back(LBCast);
2092   LValue UB =
2093       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2094 
2095   auto UBCast = CGF.Builder.CreateIntCast(
2096       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2097   CapturedVars.push_back(UBCast);
2098 }
2099 
2100 static void
2101 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2102                                  const OMPLoopDirective &S,
2103                                  CodeGenFunction::JumpDest LoopExit) {
2104   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2105                                          PrePostActionTy &Action) {
2106     Action.Enter(CGF);
2107     bool HasCancel = false;
2108     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2109       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2110         HasCancel = D->hasCancel();
2111       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2112         HasCancel = D->hasCancel();
2113       else if (const auto *D =
2114                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2115         HasCancel = D->hasCancel();
2116     }
2117     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2118                                                      HasCancel);
2119     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2120                                emitDistributeParallelForInnerBounds,
2121                                emitDistributeParallelForDispatchBounds);
2122   };
2123 
2124   emitCommonOMPParallelDirective(
2125       CGF, S,
2126       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2127       CGInlinedWorksharingLoop,
2128       emitDistributeParallelForDistributeInnerBoundParams);
2129 }
2130 
2131 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2132     const OMPDistributeParallelForDirective &S) {
2133   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2134     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2135                               S.getDistInc());
2136   };
2137   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2138   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2139 }
2140 
2141 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2142     const OMPDistributeParallelForSimdDirective &S) {
2143   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2144     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2145                               S.getDistInc());
2146   };
2147   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2148   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2149 }
2150 
2151 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2152     const OMPDistributeSimdDirective &S) {
2153   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2154     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2155   };
2156   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2157   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2158 }
2159 
2160 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2161     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2162   // Emit SPMD target parallel for region as a standalone region.
2163   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2164     emitOMPSimdRegion(CGF, S, Action);
2165   };
2166   llvm::Function *Fn;
2167   llvm::Constant *Addr;
2168   // Emit target region as a standalone region.
2169   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2170       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2171   assert(Fn && Addr && "Target device function emission failed.");
2172 }
2173 
2174 void CodeGenFunction::EmitOMPTargetSimdDirective(
2175     const OMPTargetSimdDirective &S) {
2176   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2177     emitOMPSimdRegion(CGF, S, Action);
2178   };
2179   emitCommonOMPTargetDirective(*this, S, CodeGen);
2180 }
2181 
2182 namespace {
2183   struct ScheduleKindModifiersTy {
2184     OpenMPScheduleClauseKind Kind;
2185     OpenMPScheduleClauseModifier M1;
2186     OpenMPScheduleClauseModifier M2;
2187     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2188                             OpenMPScheduleClauseModifier M1,
2189                             OpenMPScheduleClauseModifier M2)
2190         : Kind(Kind), M1(M1), M2(M2) {}
2191   };
2192 } // namespace
2193 
2194 bool CodeGenFunction::EmitOMPWorksharingLoop(
2195     const OMPLoopDirective &S, Expr *EUB,
2196     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2197     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2198   // Emit the loop iteration variable.
2199   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2200   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2201   EmitVarDecl(*IVDecl);
2202 
2203   // Emit the iterations count variable.
2204   // If it is not a variable, Sema decided to calculate iterations count on each
2205   // iteration (e.g., it is foldable into a constant).
2206   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2207     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2208     // Emit calculation of the iterations count.
2209     EmitIgnoredExpr(S.getCalcLastIteration());
2210   }
2211 
2212   auto &RT = CGM.getOpenMPRuntime();
2213 
2214   bool HasLastprivateClause;
2215   // Check pre-condition.
2216   {
2217     OMPLoopScope PreInitScope(*this, S);
2218     // Skip the entire loop if we don't meet the precondition.
2219     // If the condition constant folds and can be elided, avoid emitting the
2220     // whole loop.
2221     bool CondConstant;
2222     llvm::BasicBlock *ContBlock = nullptr;
2223     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2224       if (!CondConstant)
2225         return false;
2226     } else {
2227       auto *ThenBlock = createBasicBlock("omp.precond.then");
2228       ContBlock = createBasicBlock("omp.precond.end");
2229       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2230                   getProfileCount(&S));
2231       EmitBlock(ThenBlock);
2232       incrementProfileCounter(&S);
2233     }
2234 
2235     RunCleanupsScope DoacrossCleanupScope(*this);
2236     bool Ordered = false;
2237     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2238       if (OrderedClause->getNumForLoops())
2239         RT.emitDoacrossInit(*this, S);
2240       else
2241         Ordered = true;
2242     }
2243 
2244     llvm::DenseSet<const Expr *> EmittedFinals;
2245     emitAlignedClause(*this, S);
2246     bool HasLinears = EmitOMPLinearClauseInit(S);
2247     // Emit helper vars inits.
2248 
2249     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2250     LValue LB = Bounds.first;
2251     LValue UB = Bounds.second;
2252     LValue ST =
2253         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2254     LValue IL =
2255         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2256 
2257     // Emit 'then' code.
2258     {
2259       OMPPrivateScope LoopScope(*this);
2260       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2261         // Emit implicit barrier to synchronize threads and avoid data races on
2262         // initialization of firstprivate variables and post-update of
2263         // lastprivate variables.
2264         CGM.getOpenMPRuntime().emitBarrierCall(
2265             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2266             /*ForceSimpleCall=*/true);
2267       }
2268       EmitOMPPrivateClause(S, LoopScope);
2269       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2270       EmitOMPReductionClauseInit(S, LoopScope);
2271       EmitOMPPrivateLoopCounters(S, LoopScope);
2272       EmitOMPLinearClause(S, LoopScope);
2273       (void)LoopScope.Privatize();
2274 
2275       // Detect the loop schedule kind and chunk.
2276       llvm::Value *Chunk = nullptr;
2277       OpenMPScheduleTy ScheduleKind;
2278       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2279         ScheduleKind.Schedule = C->getScheduleKind();
2280         ScheduleKind.M1 = C->getFirstScheduleModifier();
2281         ScheduleKind.M2 = C->getSecondScheduleModifier();
2282         if (const auto *Ch = C->getChunkSize()) {
2283           Chunk = EmitScalarExpr(Ch);
2284           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2285                                        S.getIterationVariable()->getType(),
2286                                        S.getLocStart());
2287         }
2288       }
2289       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2290       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2291       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2292       // If the static schedule kind is specified or if the ordered clause is
2293       // specified, and if no monotonic modifier is specified, the effect will
2294       // be as if the monotonic modifier was specified.
2295       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2296                                 /* Chunked */ Chunk != nullptr) &&
2297           !Ordered) {
2298         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2299           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2300         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2301         // When no chunk_size is specified, the iteration space is divided into
2302         // chunks that are approximately equal in size, and at most one chunk is
2303         // distributed to each thread. Note that the size of the chunks is
2304         // unspecified in this case.
2305         CGOpenMPRuntime::StaticRTInput StaticInit(
2306             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2307             UB.getAddress(), ST.getAddress());
2308         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2309                              ScheduleKind, StaticInit);
2310         auto LoopExit =
2311             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2312         // UB = min(UB, GlobalUB);
2313         EmitIgnoredExpr(S.getEnsureUpperBound());
2314         // IV = LB;
2315         EmitIgnoredExpr(S.getInit());
2316         // while (idx <= UB) { BODY; ++idx; }
2317         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2318                          S.getInc(),
2319                          [&S, LoopExit](CodeGenFunction &CGF) {
2320                            CGF.EmitOMPLoopBody(S, LoopExit);
2321                            CGF.EmitStopPoint(&S);
2322                          },
2323                          [](CodeGenFunction &) {});
2324         EmitBlock(LoopExit.getBlock());
2325         // Tell the runtime we are done.
2326         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2327           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2328                                                          S.getDirectiveKind());
2329         };
2330         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2331       } else {
2332         const bool IsMonotonic =
2333             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2334             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2335             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2336             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2337         // Emit the outer loop, which requests its work chunk [LB..UB] from
2338         // runtime and runs the inner loop to process it.
2339         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2340                                              ST.getAddress(), IL.getAddress(),
2341                                              Chunk, EUB);
2342         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2343                             LoopArguments, CGDispatchBounds);
2344       }
2345       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2346         EmitOMPSimdFinal(S,
2347                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2348                            return CGF.Builder.CreateIsNotNull(
2349                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2350                          });
2351       }
2352       EmitOMPReductionClauseFinal(
2353           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2354                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2355                  : /*Parallel only*/ OMPD_parallel);
2356       // Emit post-update of the reduction variables if IsLastIter != 0.
2357       emitPostUpdateForReductionClause(
2358           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2359             return CGF.Builder.CreateIsNotNull(
2360                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2361           });
2362       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2363       if (HasLastprivateClause)
2364         EmitOMPLastprivateClauseFinal(
2365             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2366             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2367     }
2368     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2369       return CGF.Builder.CreateIsNotNull(
2370           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2371     });
2372     DoacrossCleanupScope.ForceCleanup();
2373     // We're now done with the loop, so jump to the continuation block.
2374     if (ContBlock) {
2375       EmitBranch(ContBlock);
2376       EmitBlock(ContBlock, true);
2377     }
2378   }
2379   return HasLastprivateClause;
2380 }
2381 
2382 /// The following two functions generate expressions for the loop lower
2383 /// and upper bounds in case of static and dynamic (dispatch) schedule
2384 /// of the associated 'for' or 'distribute' loop.
2385 static std::pair<LValue, LValue>
2386 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2387   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2388   LValue LB =
2389       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2390   LValue UB =
2391       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2392   return {LB, UB};
2393 }
2394 
2395 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2396 /// consider the lower and upper bound expressions generated by the
2397 /// worksharing loop support, but we use 0 and the iteration space size as
2398 /// constants
2399 static std::pair<llvm::Value *, llvm::Value *>
2400 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2401                           Address LB, Address UB) {
2402   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2403   const Expr *IVExpr = LS.getIterationVariable();
2404   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2405   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2406   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2407   return {LBVal, UBVal};
2408 }
2409 
2410 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2411   bool HasLastprivates = false;
2412   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2413                                           PrePostActionTy &) {
2414     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2415     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2416                                                  emitForLoopBounds,
2417                                                  emitDispatchForLoopBounds);
2418   };
2419   {
2420     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2421     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2422                                                 S.hasCancel());
2423   }
2424 
2425   // Emit an implicit barrier at the end.
2426   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2427     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2428   }
2429 }
2430 
2431 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2432   bool HasLastprivates = false;
2433   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2434                                           PrePostActionTy &) {
2435     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2436                                                  emitForLoopBounds,
2437                                                  emitDispatchForLoopBounds);
2438   };
2439   {
2440     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2441     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2442   }
2443 
2444   // Emit an implicit barrier at the end.
2445   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2446     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2447   }
2448 }
2449 
2450 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2451                                 const Twine &Name,
2452                                 llvm::Value *Init = nullptr) {
2453   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2454   if (Init)
2455     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2456   return LVal;
2457 }
2458 
2459 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2460   const Stmt *Stmt = S.getInnermostCapturedStmt()->getCapturedStmt();
2461   const auto *CS = dyn_cast<CompoundStmt>(Stmt);
2462   bool HasLastprivates = false;
2463   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2464                                                     PrePostActionTy &) {
2465     auto &C = CGF.CGM.getContext();
2466     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2467     // Emit helper vars inits.
2468     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2469                                   CGF.Builder.getInt32(0));
2470     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2471                                       : CGF.Builder.getInt32(0);
2472     LValue UB =
2473         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2474     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2475                                   CGF.Builder.getInt32(1));
2476     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2477                                   CGF.Builder.getInt32(0));
2478     // Loop counter.
2479     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2480     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2481     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2482     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2483     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2484     // Generate condition for loop.
2485     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2486                         OK_Ordinary, S.getLocStart(), FPOptions());
2487     // Increment for loop counter.
2488     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2489                       S.getLocStart(), true);
2490     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2491       // Iterate through all sections and emit a switch construct:
2492       // switch (IV) {
2493       //   case 0:
2494       //     <SectionStmt[0]>;
2495       //     break;
2496       // ...
2497       //   case <NumSection> - 1:
2498       //     <SectionStmt[<NumSection> - 1]>;
2499       //     break;
2500       // }
2501       // .omp.sections.exit:
2502       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2503       auto *SwitchStmt =
2504           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()),
2505                                    ExitBB, CS == nullptr ? 1 : CS->size());
2506       if (CS) {
2507         unsigned CaseNumber = 0;
2508         for (auto *SubStmt : CS->children()) {
2509           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2510           CGF.EmitBlock(CaseBB);
2511           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2512           CGF.EmitStmt(SubStmt);
2513           CGF.EmitBranch(ExitBB);
2514           ++CaseNumber;
2515         }
2516       } else {
2517         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2518         CGF.EmitBlock(CaseBB);
2519         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2520         CGF.EmitStmt(Stmt);
2521         CGF.EmitBranch(ExitBB);
2522       }
2523       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2524     };
2525 
2526     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2527     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2528       // Emit implicit barrier to synchronize threads and avoid data races on
2529       // initialization of firstprivate variables and post-update of lastprivate
2530       // variables.
2531       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2532           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2533           /*ForceSimpleCall=*/true);
2534     }
2535     CGF.EmitOMPPrivateClause(S, LoopScope);
2536     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2537     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2538     (void)LoopScope.Privatize();
2539 
2540     // Emit static non-chunked loop.
2541     OpenMPScheduleTy ScheduleKind;
2542     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2543     CGOpenMPRuntime::StaticRTInput StaticInit(
2544         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2545         LB.getAddress(), UB.getAddress(), ST.getAddress());
2546     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2547         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2548     // UB = min(UB, GlobalUB);
2549     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2550     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2551         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2552     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2553     // IV = LB;
2554     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2555     // while (idx <= UB) { BODY; ++idx; }
2556     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2557                          [](CodeGenFunction &) {});
2558     // Tell the runtime we are done.
2559     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2560       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2561                                                      S.getDirectiveKind());
2562     };
2563     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2564     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2565     // Emit post-update of the reduction variables if IsLastIter != 0.
2566     emitPostUpdateForReductionClause(
2567         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2568           return CGF.Builder.CreateIsNotNull(
2569               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2570         });
2571 
2572     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2573     if (HasLastprivates)
2574       CGF.EmitOMPLastprivateClauseFinal(
2575           S, /*NoFinals=*/false,
2576           CGF.Builder.CreateIsNotNull(
2577               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2578   };
2579 
2580   bool HasCancel = false;
2581   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2582     HasCancel = OSD->hasCancel();
2583   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2584     HasCancel = OPSD->hasCancel();
2585   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2586   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2587                                               HasCancel);
2588   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2589   // clause. Otherwise the barrier will be generated by the codegen for the
2590   // directive.
2591   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2592     // Emit implicit barrier to synchronize threads and avoid data races on
2593     // initialization of firstprivate variables.
2594     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2595                                            OMPD_unknown);
2596   }
2597 }
2598 
2599 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2600   {
2601     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2602     EmitSections(S);
2603   }
2604   // Emit an implicit barrier at the end.
2605   if (!S.getSingleClause<OMPNowaitClause>()) {
2606     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2607                                            OMPD_sections);
2608   }
2609 }
2610 
2611 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2612   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2613     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2614   };
2615   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2616   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2617                                               S.hasCancel());
2618 }
2619 
2620 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2621   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2622   llvm::SmallVector<const Expr *, 8> DestExprs;
2623   llvm::SmallVector<const Expr *, 8> SrcExprs;
2624   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2625   // Check if there are any 'copyprivate' clauses associated with this
2626   // 'single' construct.
2627   // Build a list of copyprivate variables along with helper expressions
2628   // (<source>, <destination>, <destination>=<source> expressions)
2629   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2630     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2631     DestExprs.append(C->destination_exprs().begin(),
2632                      C->destination_exprs().end());
2633     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2634     AssignmentOps.append(C->assignment_ops().begin(),
2635                          C->assignment_ops().end());
2636   }
2637   // Emit code for 'single' region along with 'copyprivate' clauses
2638   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2639     Action.Enter(CGF);
2640     OMPPrivateScope SingleScope(CGF);
2641     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2642     CGF.EmitOMPPrivateClause(S, SingleScope);
2643     (void)SingleScope.Privatize();
2644     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2645   };
2646   {
2647     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2648     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2649                                             CopyprivateVars, DestExprs,
2650                                             SrcExprs, AssignmentOps);
2651   }
2652   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2653   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2654   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2655     CGM.getOpenMPRuntime().emitBarrierCall(
2656         *this, S.getLocStart(),
2657         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2658   }
2659 }
2660 
2661 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2662   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2663     Action.Enter(CGF);
2664     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2665   };
2666   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2667   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2668 }
2669 
2670 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2671   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2672     Action.Enter(CGF);
2673     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2674   };
2675   Expr *Hint = nullptr;
2676   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2677     Hint = HintClause->getHint();
2678   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2679   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2680                                             S.getDirectiveName().getAsString(),
2681                                             CodeGen, S.getLocStart(), Hint);
2682 }
2683 
2684 void CodeGenFunction::EmitOMPParallelForDirective(
2685     const OMPParallelForDirective &S) {
2686   // Emit directive as a combined directive that consists of two implicit
2687   // directives: 'parallel' with 'for' directive.
2688   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2689     Action.Enter(CGF);
2690     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2691     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2692                                emitDispatchForLoopBounds);
2693   };
2694   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2695                                  emitEmptyBoundParameters);
2696 }
2697 
2698 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2699     const OMPParallelForSimdDirective &S) {
2700   // Emit directive as a combined directive that consists of two implicit
2701   // directives: 'parallel' with 'for' directive.
2702   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2703     Action.Enter(CGF);
2704     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2705                                emitDispatchForLoopBounds);
2706   };
2707   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2708                                  emitEmptyBoundParameters);
2709 }
2710 
2711 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2712     const OMPParallelSectionsDirective &S) {
2713   // Emit directive as a combined directive that consists of two implicit
2714   // directives: 'parallel' with 'sections' directive.
2715   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2716     Action.Enter(CGF);
2717     CGF.EmitSections(S);
2718   };
2719   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2720                                  emitEmptyBoundParameters);
2721 }
2722 
2723 void CodeGenFunction::EmitOMPTaskBasedDirective(
2724     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
2725     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
2726     OMPTaskDataTy &Data) {
2727   // Emit outlined function for task construct.
2728   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
2729   auto *I = CS->getCapturedDecl()->param_begin();
2730   auto *PartId = std::next(I);
2731   auto *TaskT = std::next(I, 4);
2732   // Check if the task is final
2733   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2734     // If the condition constant folds and can be elided, try to avoid emitting
2735     // the condition and the dead arm of the if/else.
2736     auto *Cond = Clause->getCondition();
2737     bool CondConstant;
2738     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2739       Data.Final.setInt(CondConstant);
2740     else
2741       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2742   } else {
2743     // By default the task is not final.
2744     Data.Final.setInt(/*IntVal=*/false);
2745   }
2746   // Check if the task has 'priority' clause.
2747   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2748     auto *Prio = Clause->getPriority();
2749     Data.Priority.setInt(/*IntVal=*/true);
2750     Data.Priority.setPointer(EmitScalarConversion(
2751         EmitScalarExpr(Prio), Prio->getType(),
2752         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2753         Prio->getExprLoc()));
2754   }
2755   // The first function argument for tasks is a thread id, the second one is a
2756   // part id (0 for tied tasks, >=0 for untied task).
2757   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2758   // Get list of private variables.
2759   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2760     auto IRef = C->varlist_begin();
2761     for (auto *IInit : C->private_copies()) {
2762       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2763       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2764         Data.PrivateVars.push_back(*IRef);
2765         Data.PrivateCopies.push_back(IInit);
2766       }
2767       ++IRef;
2768     }
2769   }
2770   EmittedAsPrivate.clear();
2771   // Get list of firstprivate variables.
2772   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2773     auto IRef = C->varlist_begin();
2774     auto IElemInitRef = C->inits().begin();
2775     for (auto *IInit : C->private_copies()) {
2776       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2777       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2778         Data.FirstprivateVars.push_back(*IRef);
2779         Data.FirstprivateCopies.push_back(IInit);
2780         Data.FirstprivateInits.push_back(*IElemInitRef);
2781       }
2782       ++IRef;
2783       ++IElemInitRef;
2784     }
2785   }
2786   // Get list of lastprivate variables (for taskloops).
2787   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2788   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2789     auto IRef = C->varlist_begin();
2790     auto ID = C->destination_exprs().begin();
2791     for (auto *IInit : C->private_copies()) {
2792       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2793       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2794         Data.LastprivateVars.push_back(*IRef);
2795         Data.LastprivateCopies.push_back(IInit);
2796       }
2797       LastprivateDstsOrigs.insert(
2798           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2799            cast<DeclRefExpr>(*IRef)});
2800       ++IRef;
2801       ++ID;
2802     }
2803   }
2804   SmallVector<const Expr *, 4> LHSs;
2805   SmallVector<const Expr *, 4> RHSs;
2806   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2807     auto IPriv = C->privates().begin();
2808     auto IRed = C->reduction_ops().begin();
2809     auto ILHS = C->lhs_exprs().begin();
2810     auto IRHS = C->rhs_exprs().begin();
2811     for (const auto *Ref : C->varlists()) {
2812       Data.ReductionVars.emplace_back(Ref);
2813       Data.ReductionCopies.emplace_back(*IPriv);
2814       Data.ReductionOps.emplace_back(*IRed);
2815       LHSs.emplace_back(*ILHS);
2816       RHSs.emplace_back(*IRHS);
2817       std::advance(IPriv, 1);
2818       std::advance(IRed, 1);
2819       std::advance(ILHS, 1);
2820       std::advance(IRHS, 1);
2821     }
2822   }
2823   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2824       *this, S.getLocStart(), LHSs, RHSs, Data);
2825   // Build list of dependences.
2826   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2827     for (auto *IRef : C->varlists())
2828       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2829   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
2830                     CapturedRegion](CodeGenFunction &CGF,
2831                                     PrePostActionTy &Action) {
2832     // Set proper addresses for generated private copies.
2833     OMPPrivateScope Scope(CGF);
2834     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2835         !Data.LastprivateVars.empty()) {
2836       enum { PrivatesParam = 2, CopyFnParam = 3 };
2837       auto *CopyFn = CGF.Builder.CreateLoad(
2838           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2839       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2840           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2841       // Map privates.
2842       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2843       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2844       CallArgs.push_back(PrivatesPtr);
2845       for (auto *E : Data.PrivateVars) {
2846         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2847         Address PrivatePtr = CGF.CreateMemTemp(
2848             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2849         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2850         CallArgs.push_back(PrivatePtr.getPointer());
2851       }
2852       for (auto *E : Data.FirstprivateVars) {
2853         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2854         Address PrivatePtr =
2855             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2856                               ".firstpriv.ptr.addr");
2857         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2858         CallArgs.push_back(PrivatePtr.getPointer());
2859       }
2860       for (auto *E : Data.LastprivateVars) {
2861         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2862         Address PrivatePtr =
2863             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2864                               ".lastpriv.ptr.addr");
2865         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2866         CallArgs.push_back(PrivatePtr.getPointer());
2867       }
2868       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2869                                                           CopyFn, CallArgs);
2870       for (auto &&Pair : LastprivateDstsOrigs) {
2871         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2872         DeclRefExpr DRE(
2873             const_cast<VarDecl *>(OrigVD),
2874             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2875                 OrigVD) != nullptr,
2876             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2877         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2878           return CGF.EmitLValue(&DRE).getAddress();
2879         });
2880       }
2881       for (auto &&Pair : PrivatePtrs) {
2882         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2883                             CGF.getContext().getDeclAlign(Pair.first));
2884         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2885       }
2886     }
2887     if (Data.Reductions) {
2888       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
2889       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2890                              Data.ReductionOps);
2891       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2892           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2893       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2894         RedCG.emitSharedLValue(CGF, Cnt);
2895         RedCG.emitAggregateType(CGF, Cnt);
2896         // FIXME: This must removed once the runtime library is fixed.
2897         // Emit required threadprivate variables for
2898         // initilizer/combiner/finalizer.
2899         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2900                                                            RedCG, Cnt);
2901         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2902             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2903         Replacement =
2904             Address(CGF.EmitScalarConversion(
2905                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2906                         CGF.getContext().getPointerType(
2907                             Data.ReductionCopies[Cnt]->getType()),
2908                         Data.ReductionCopies[Cnt]->getExprLoc()),
2909                     Replacement.getAlignment());
2910         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2911         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2912                          [Replacement]() { return Replacement; });
2913       }
2914     }
2915     // Privatize all private variables except for in_reduction items.
2916     (void)Scope.Privatize();
2917     SmallVector<const Expr *, 4> InRedVars;
2918     SmallVector<const Expr *, 4> InRedPrivs;
2919     SmallVector<const Expr *, 4> InRedOps;
2920     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2921     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2922       auto IPriv = C->privates().begin();
2923       auto IRed = C->reduction_ops().begin();
2924       auto ITD = C->taskgroup_descriptors().begin();
2925       for (const auto *Ref : C->varlists()) {
2926         InRedVars.emplace_back(Ref);
2927         InRedPrivs.emplace_back(*IPriv);
2928         InRedOps.emplace_back(*IRed);
2929         TaskgroupDescriptors.emplace_back(*ITD);
2930         std::advance(IPriv, 1);
2931         std::advance(IRed, 1);
2932         std::advance(ITD, 1);
2933       }
2934     }
2935     // Privatize in_reduction items here, because taskgroup descriptors must be
2936     // privatized earlier.
2937     OMPPrivateScope InRedScope(CGF);
2938     if (!InRedVars.empty()) {
2939       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2940       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2941         RedCG.emitSharedLValue(CGF, Cnt);
2942         RedCG.emitAggregateType(CGF, Cnt);
2943         // The taskgroup descriptor variable is always implicit firstprivate and
2944         // privatized already during procoessing of the firstprivates.
2945         // FIXME: This must removed once the runtime library is fixed.
2946         // Emit required threadprivate variables for
2947         // initilizer/combiner/finalizer.
2948         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2949                                                            RedCG, Cnt);
2950         llvm::Value *ReductionsPtr =
2951             CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
2952                                  TaskgroupDescriptors[Cnt]->getExprLoc());
2953         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2954             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2955         Replacement = Address(
2956             CGF.EmitScalarConversion(
2957                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2958                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2959                 InRedPrivs[Cnt]->getExprLoc()),
2960             Replacement.getAlignment());
2961         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2962         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2963                               [Replacement]() { return Replacement; });
2964       }
2965     }
2966     (void)InRedScope.Privatize();
2967 
2968     Action.Enter(CGF);
2969     BodyGen(CGF);
2970   };
2971   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2972       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2973       Data.NumberOfParts);
2974   OMPLexicalScope Scope(*this, S);
2975   TaskGen(*this, OutlinedFn, Data);
2976 }
2977 
2978 static ImplicitParamDecl *
2979 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
2980                                   QualType Ty, CapturedDecl *CD,
2981                                   SourceLocation Loc) {
2982   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
2983                                            ImplicitParamDecl::Other);
2984   auto *OrigRef = DeclRefExpr::Create(
2985       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
2986       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
2987   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
2988                                               ImplicitParamDecl::Other);
2989   auto *PrivateRef = DeclRefExpr::Create(
2990       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
2991       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
2992   QualType ElemType = C.getBaseElementType(Ty);
2993   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
2994                                            ImplicitParamDecl::Other);
2995   auto *InitRef = DeclRefExpr::Create(
2996       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
2997       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
2998   PrivateVD->setInitStyle(VarDecl::CInit);
2999   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
3000                                               InitRef, /*BasePath=*/nullptr,
3001                                               VK_RValue));
3002   Data.FirstprivateVars.emplace_back(OrigRef);
3003   Data.FirstprivateCopies.emplace_back(PrivateRef);
3004   Data.FirstprivateInits.emplace_back(InitRef);
3005   return OrigVD;
3006 }
3007 
3008 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
3009     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
3010     OMPTargetDataInfo &InputInfo) {
3011   // Emit outlined function for task construct.
3012   auto CS = S.getCapturedStmt(OMPD_task);
3013   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3014   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3015   auto *I = CS->getCapturedDecl()->param_begin();
3016   auto *PartId = std::next(I);
3017   auto *TaskT = std::next(I, 4);
3018   OMPTaskDataTy Data;
3019   // The task is not final.
3020   Data.Final.setInt(/*IntVal=*/false);
3021   // Get list of firstprivate variables.
3022   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3023     auto IRef = C->varlist_begin();
3024     auto IElemInitRef = C->inits().begin();
3025     for (auto *IInit : C->private_copies()) {
3026       Data.FirstprivateVars.push_back(*IRef);
3027       Data.FirstprivateCopies.push_back(IInit);
3028       Data.FirstprivateInits.push_back(*IElemInitRef);
3029       ++IRef;
3030       ++IElemInitRef;
3031     }
3032   }
3033   OMPPrivateScope TargetScope(*this);
3034   VarDecl *BPVD = nullptr;
3035   VarDecl *PVD = nullptr;
3036   VarDecl *SVD = nullptr;
3037   if (InputInfo.NumberOfTargetItems > 0) {
3038     auto *CD = CapturedDecl::Create(
3039         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
3040     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
3041     QualType BaseAndPointersType = getContext().getConstantArrayType(
3042         getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
3043         /*IndexTypeQuals=*/0);
3044     BPVD = createImplicitFirstprivateForType(
3045         getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
3046     PVD = createImplicitFirstprivateForType(
3047         getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
3048     QualType SizesType = getContext().getConstantArrayType(
3049         getContext().getSizeType(), ArrSize, ArrayType::Normal,
3050         /*IndexTypeQuals=*/0);
3051     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
3052                                             S.getLocStart());
3053     TargetScope.addPrivate(
3054         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
3055     TargetScope.addPrivate(PVD,
3056                            [&InputInfo]() { return InputInfo.PointersArray; });
3057     TargetScope.addPrivate(SVD,
3058                            [&InputInfo]() { return InputInfo.SizesArray; });
3059   }
3060   (void)TargetScope.Privatize();
3061   // Build list of dependences.
3062   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
3063     for (auto *IRef : C->varlists())
3064       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
3065   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
3066                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
3067     // Set proper addresses for generated private copies.
3068     OMPPrivateScope Scope(CGF);
3069     if (!Data.FirstprivateVars.empty()) {
3070       enum { PrivatesParam = 2, CopyFnParam = 3 };
3071       auto *CopyFn = CGF.Builder.CreateLoad(
3072           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
3073       auto *PrivatesPtr = CGF.Builder.CreateLoad(
3074           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
3075       // Map privates.
3076       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3077       llvm::SmallVector<llvm::Value *, 16> CallArgs;
3078       CallArgs.push_back(PrivatesPtr);
3079       for (auto *E : Data.FirstprivateVars) {
3080         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3081         Address PrivatePtr =
3082             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3083                               ".firstpriv.ptr.addr");
3084         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
3085         CallArgs.push_back(PrivatePtr.getPointer());
3086       }
3087       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3088                                                           CopyFn, CallArgs);
3089       for (auto &&Pair : PrivatePtrs) {
3090         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3091                             CGF.getContext().getDeclAlign(Pair.first));
3092         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3093       }
3094     }
3095     // Privatize all private variables except for in_reduction items.
3096     (void)Scope.Privatize();
3097     if (InputInfo.NumberOfTargetItems > 0) {
3098       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
3099           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
3100       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
3101           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
3102       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
3103           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
3104     }
3105 
3106     Action.Enter(CGF);
3107     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
3108     BodyGen(CGF);
3109   };
3110   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3111       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
3112       Data.NumberOfParts);
3113   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
3114   IntegerLiteral IfCond(getContext(), TrueOrFalse,
3115                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3116                         SourceLocation());
3117 
3118   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn,
3119                                       SharedsTy, CapturedStruct, &IfCond, Data);
3120 }
3121 
3122 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
3123   // Emit outlined function for task construct.
3124   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3125   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3126   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3127   const Expr *IfCond = nullptr;
3128   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3129     if (C->getNameModifier() == OMPD_unknown ||
3130         C->getNameModifier() == OMPD_task) {
3131       IfCond = C->getCondition();
3132       break;
3133     }
3134   }
3135 
3136   OMPTaskDataTy Data;
3137   // Check if we should emit tied or untied task.
3138   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
3139   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3140     CGF.EmitStmt(CS->getCapturedStmt());
3141   };
3142   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3143                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3144                             const OMPTaskDataTy &Data) {
3145     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
3146                                             SharedsTy, CapturedStruct, IfCond,
3147                                             Data);
3148   };
3149   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
3150 }
3151 
3152 void CodeGenFunction::EmitOMPTaskyieldDirective(
3153     const OMPTaskyieldDirective &S) {
3154   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
3155 }
3156 
3157 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3158   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
3159 }
3160 
3161 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3162   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
3163 }
3164 
3165 void CodeGenFunction::EmitOMPTaskgroupDirective(
3166     const OMPTaskgroupDirective &S) {
3167   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3168     Action.Enter(CGF);
3169     if (const Expr *E = S.getReductionRef()) {
3170       SmallVector<const Expr *, 4> LHSs;
3171       SmallVector<const Expr *, 4> RHSs;
3172       OMPTaskDataTy Data;
3173       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
3174         auto IPriv = C->privates().begin();
3175         auto IRed = C->reduction_ops().begin();
3176         auto ILHS = C->lhs_exprs().begin();
3177         auto IRHS = C->rhs_exprs().begin();
3178         for (const auto *Ref : C->varlists()) {
3179           Data.ReductionVars.emplace_back(Ref);
3180           Data.ReductionCopies.emplace_back(*IPriv);
3181           Data.ReductionOps.emplace_back(*IRed);
3182           LHSs.emplace_back(*ILHS);
3183           RHSs.emplace_back(*IRHS);
3184           std::advance(IPriv, 1);
3185           std::advance(IRed, 1);
3186           std::advance(ILHS, 1);
3187           std::advance(IRHS, 1);
3188         }
3189       }
3190       llvm::Value *ReductionDesc =
3191           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
3192                                                            LHSs, RHSs, Data);
3193       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3194       CGF.EmitVarDecl(*VD);
3195       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3196                             /*Volatile=*/false, E->getType());
3197     }
3198     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3199   };
3200   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3201   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3202 }
3203 
3204 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3205   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3206     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3207       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3208                                 FlushClause->varlist_end());
3209     }
3210     return llvm::None;
3211   }(), S.getLocStart());
3212 }
3213 
3214 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3215                                             const CodeGenLoopTy &CodeGenLoop,
3216                                             Expr *IncExpr) {
3217   // Emit the loop iteration variable.
3218   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3219   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3220   EmitVarDecl(*IVDecl);
3221 
3222   // Emit the iterations count variable.
3223   // If it is not a variable, Sema decided to calculate iterations count on each
3224   // iteration (e.g., it is foldable into a constant).
3225   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3226     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3227     // Emit calculation of the iterations count.
3228     EmitIgnoredExpr(S.getCalcLastIteration());
3229   }
3230 
3231   auto &RT = CGM.getOpenMPRuntime();
3232 
3233   bool HasLastprivateClause = false;
3234   // Check pre-condition.
3235   {
3236     OMPLoopScope PreInitScope(*this, S);
3237     // Skip the entire loop if we don't meet the precondition.
3238     // If the condition constant folds and can be elided, avoid emitting the
3239     // whole loop.
3240     bool CondConstant;
3241     llvm::BasicBlock *ContBlock = nullptr;
3242     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3243       if (!CondConstant)
3244         return;
3245     } else {
3246       auto *ThenBlock = createBasicBlock("omp.precond.then");
3247       ContBlock = createBasicBlock("omp.precond.end");
3248       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3249                   getProfileCount(&S));
3250       EmitBlock(ThenBlock);
3251       incrementProfileCounter(&S);
3252     }
3253 
3254     emitAlignedClause(*this, S);
3255     // Emit 'then' code.
3256     {
3257       // Emit helper vars inits.
3258 
3259       LValue LB = EmitOMPHelperVar(
3260           *this, cast<DeclRefExpr>(
3261                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3262                           ? S.getCombinedLowerBoundVariable()
3263                           : S.getLowerBoundVariable())));
3264       LValue UB = EmitOMPHelperVar(
3265           *this, cast<DeclRefExpr>(
3266                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3267                           ? S.getCombinedUpperBoundVariable()
3268                           : S.getUpperBoundVariable())));
3269       LValue ST =
3270           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3271       LValue IL =
3272           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3273 
3274       OMPPrivateScope LoopScope(*this);
3275       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3276         // Emit implicit barrier to synchronize threads and avoid data races
3277         // on initialization of firstprivate variables and post-update of
3278         // lastprivate variables.
3279         CGM.getOpenMPRuntime().emitBarrierCall(
3280             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3281             /*ForceSimpleCall=*/true);
3282       }
3283       EmitOMPPrivateClause(S, LoopScope);
3284       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3285           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3286           !isOpenMPTeamsDirective(S.getDirectiveKind()))
3287         EmitOMPReductionClauseInit(S, LoopScope);
3288       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3289       EmitOMPPrivateLoopCounters(S, LoopScope);
3290       (void)LoopScope.Privatize();
3291 
3292       // Detect the distribute schedule kind and chunk.
3293       llvm::Value *Chunk = nullptr;
3294       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3295       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3296         ScheduleKind = C->getDistScheduleKind();
3297         if (const auto *Ch = C->getChunkSize()) {
3298           Chunk = EmitScalarExpr(Ch);
3299           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3300                                        S.getIterationVariable()->getType(),
3301                                        S.getLocStart());
3302         }
3303       }
3304       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3305       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3306 
3307       // OpenMP [2.10.8, distribute Construct, Description]
3308       // If dist_schedule is specified, kind must be static. If specified,
3309       // iterations are divided into chunks of size chunk_size, chunks are
3310       // assigned to the teams of the league in a round-robin fashion in the
3311       // order of the team number. When no chunk_size is specified, the
3312       // iteration space is divided into chunks that are approximately equal
3313       // in size, and at most one chunk is distributed to each team of the
3314       // league. The size of the chunks is unspecified in this case.
3315       if (RT.isStaticNonchunked(ScheduleKind,
3316                                 /* Chunked */ Chunk != nullptr)) {
3317         if (isOpenMPSimdDirective(S.getDirectiveKind()))
3318           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
3319         CGOpenMPRuntime::StaticRTInput StaticInit(
3320             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3321             LB.getAddress(), UB.getAddress(), ST.getAddress());
3322         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3323                                     StaticInit);
3324         auto LoopExit =
3325             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3326         // UB = min(UB, GlobalUB);
3327         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3328                             ? S.getCombinedEnsureUpperBound()
3329                             : S.getEnsureUpperBound());
3330         // IV = LB;
3331         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3332                             ? S.getCombinedInit()
3333                             : S.getInit());
3334 
3335         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3336                          ? S.getCombinedCond()
3337                          : S.getCond();
3338 
3339         // for distribute alone,  codegen
3340         // while (idx <= UB) { BODY; ++idx; }
3341         // when combined with 'for' (e.g. as in 'distribute parallel for')
3342         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3343         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3344                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3345                            CodeGenLoop(CGF, S, LoopExit);
3346                          },
3347                          [](CodeGenFunction &) {});
3348         EmitBlock(LoopExit.getBlock());
3349         // Tell the runtime we are done.
3350         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3351       } else {
3352         // Emit the outer loop, which requests its work chunk [LB..UB] from
3353         // runtime and runs the inner loop to process it.
3354         const OMPLoopArguments LoopArguments = {
3355             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3356             Chunk};
3357         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3358                                    CodeGenLoop);
3359       }
3360       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3361         EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3362           return CGF.Builder.CreateIsNotNull(
3363               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3364         });
3365       }
3366       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3367           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3368           !isOpenMPTeamsDirective(S.getDirectiveKind())) {
3369         OpenMPDirectiveKind ReductionKind = OMPD_unknown;
3370         if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
3371             isOpenMPSimdDirective(S.getDirectiveKind())) {
3372           ReductionKind = OMPD_parallel_for_simd;
3373         } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3374           ReductionKind = OMPD_parallel_for;
3375         } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3376           ReductionKind = OMPD_simd;
3377         } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
3378                    S.hasClausesOfKind<OMPReductionClause>()) {
3379           llvm_unreachable(
3380               "No reduction clauses is allowed in distribute directive.");
3381         }
3382         EmitOMPReductionClauseFinal(S, ReductionKind);
3383         // Emit post-update of the reduction variables if IsLastIter != 0.
3384         emitPostUpdateForReductionClause(
3385             *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3386               return CGF.Builder.CreateIsNotNull(
3387                   CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3388             });
3389       }
3390       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3391       if (HasLastprivateClause) {
3392         EmitOMPLastprivateClauseFinal(
3393             S, /*NoFinals=*/false,
3394             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
3395       }
3396     }
3397 
3398     // We're now done with the loop, so jump to the continuation block.
3399     if (ContBlock) {
3400       EmitBranch(ContBlock);
3401       EmitBlock(ContBlock, true);
3402     }
3403   }
3404 }
3405 
3406 void CodeGenFunction::EmitOMPDistributeDirective(
3407     const OMPDistributeDirective &S) {
3408   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3409 
3410     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3411   };
3412   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3413   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3414 }
3415 
3416 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3417                                                    const CapturedStmt *S) {
3418   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3419   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3420   CGF.CapturedStmtInfo = &CapStmtInfo;
3421   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3422   Fn->setDoesNotRecurse();
3423   return Fn;
3424 }
3425 
3426 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3427   if (S.hasClausesOfKind<OMPDependClause>()) {
3428     assert(!S.getAssociatedStmt() &&
3429            "No associated statement must be in ordered depend construct.");
3430     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3431       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3432     return;
3433   }
3434   auto *C = S.getSingleClause<OMPSIMDClause>();
3435   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3436                                  PrePostActionTy &Action) {
3437     const CapturedStmt *CS = S.getInnermostCapturedStmt();
3438     if (C) {
3439       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3440       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3441       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3442       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3443                                                       OutlinedFn, CapturedVars);
3444     } else {
3445       Action.Enter(CGF);
3446       CGF.EmitStmt(CS->getCapturedStmt());
3447     }
3448   };
3449   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3450   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3451 }
3452 
3453 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3454                                          QualType SrcType, QualType DestType,
3455                                          SourceLocation Loc) {
3456   assert(CGF.hasScalarEvaluationKind(DestType) &&
3457          "DestType must have scalar evaluation kind.");
3458   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3459   return Val.isScalar()
3460              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3461                                         Loc)
3462              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3463                                                  DestType, Loc);
3464 }
3465 
3466 static CodeGenFunction::ComplexPairTy
3467 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3468                       QualType DestType, SourceLocation Loc) {
3469   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3470          "DestType must have complex evaluation kind.");
3471   CodeGenFunction::ComplexPairTy ComplexVal;
3472   if (Val.isScalar()) {
3473     // Convert the input element to the element type of the complex.
3474     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3475     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3476                                               DestElementType, Loc);
3477     ComplexVal = CodeGenFunction::ComplexPairTy(
3478         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3479   } else {
3480     assert(Val.isComplex() && "Must be a scalar or complex.");
3481     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3482     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3483     ComplexVal.first = CGF.EmitScalarConversion(
3484         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3485     ComplexVal.second = CGF.EmitScalarConversion(
3486         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3487   }
3488   return ComplexVal;
3489 }
3490 
3491 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3492                                   LValue LVal, RValue RVal) {
3493   if (LVal.isGlobalReg()) {
3494     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3495   } else {
3496     CGF.EmitAtomicStore(RVal, LVal,
3497                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3498                                  : llvm::AtomicOrdering::Monotonic,
3499                         LVal.isVolatile(), /*IsInit=*/false);
3500   }
3501 }
3502 
3503 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3504                                          QualType RValTy, SourceLocation Loc) {
3505   switch (getEvaluationKind(LVal.getType())) {
3506   case TEK_Scalar:
3507     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3508                                *this, RVal, RValTy, LVal.getType(), Loc)),
3509                            LVal);
3510     break;
3511   case TEK_Complex:
3512     EmitStoreOfComplex(
3513         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3514         /*isInit=*/false);
3515     break;
3516   case TEK_Aggregate:
3517     llvm_unreachable("Must be a scalar or complex.");
3518   }
3519 }
3520 
3521 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3522                                   const Expr *X, const Expr *V,
3523                                   SourceLocation Loc) {
3524   // v = x;
3525   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3526   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3527   LValue XLValue = CGF.EmitLValue(X);
3528   LValue VLValue = CGF.EmitLValue(V);
3529   RValue Res = XLValue.isGlobalReg()
3530                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3531                    : CGF.EmitAtomicLoad(
3532                          XLValue, Loc,
3533                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3534                                   : llvm::AtomicOrdering::Monotonic,
3535                          XLValue.isVolatile());
3536   // OpenMP, 2.12.6, atomic Construct
3537   // Any atomic construct with a seq_cst clause forces the atomically
3538   // performed operation to include an implicit flush operation without a
3539   // list.
3540   if (IsSeqCst)
3541     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3542   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3543 }
3544 
3545 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3546                                    const Expr *X, const Expr *E,
3547                                    SourceLocation Loc) {
3548   // x = expr;
3549   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3550   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3551   // OpenMP, 2.12.6, atomic Construct
3552   // Any atomic construct with a seq_cst clause forces the atomically
3553   // performed operation to include an implicit flush operation without a
3554   // list.
3555   if (IsSeqCst)
3556     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3557 }
3558 
3559 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3560                                                 RValue Update,
3561                                                 BinaryOperatorKind BO,
3562                                                 llvm::AtomicOrdering AO,
3563                                                 bool IsXLHSInRHSPart) {
3564   auto &Context = CGF.CGM.getContext();
3565   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3566   // expression is simple and atomic is allowed for the given type for the
3567   // target platform.
3568   if (BO == BO_Comma || !Update.isScalar() ||
3569       !Update.getScalarVal()->getType()->isIntegerTy() ||
3570       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3571                         (Update.getScalarVal()->getType() !=
3572                          X.getAddress().getElementType())) ||
3573       !X.getAddress().getElementType()->isIntegerTy() ||
3574       !Context.getTargetInfo().hasBuiltinAtomic(
3575           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3576     return std::make_pair(false, RValue::get(nullptr));
3577 
3578   llvm::AtomicRMWInst::BinOp RMWOp;
3579   switch (BO) {
3580   case BO_Add:
3581     RMWOp = llvm::AtomicRMWInst::Add;
3582     break;
3583   case BO_Sub:
3584     if (!IsXLHSInRHSPart)
3585       return std::make_pair(false, RValue::get(nullptr));
3586     RMWOp = llvm::AtomicRMWInst::Sub;
3587     break;
3588   case BO_And:
3589     RMWOp = llvm::AtomicRMWInst::And;
3590     break;
3591   case BO_Or:
3592     RMWOp = llvm::AtomicRMWInst::Or;
3593     break;
3594   case BO_Xor:
3595     RMWOp = llvm::AtomicRMWInst::Xor;
3596     break;
3597   case BO_LT:
3598     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3599                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3600                                    : llvm::AtomicRMWInst::Max)
3601                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3602                                    : llvm::AtomicRMWInst::UMax);
3603     break;
3604   case BO_GT:
3605     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3606                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3607                                    : llvm::AtomicRMWInst::Min)
3608                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3609                                    : llvm::AtomicRMWInst::UMin);
3610     break;
3611   case BO_Assign:
3612     RMWOp = llvm::AtomicRMWInst::Xchg;
3613     break;
3614   case BO_Mul:
3615   case BO_Div:
3616   case BO_Rem:
3617   case BO_Shl:
3618   case BO_Shr:
3619   case BO_LAnd:
3620   case BO_LOr:
3621     return std::make_pair(false, RValue::get(nullptr));
3622   case BO_PtrMemD:
3623   case BO_PtrMemI:
3624   case BO_LE:
3625   case BO_GE:
3626   case BO_EQ:
3627   case BO_NE:
3628   case BO_Cmp:
3629   case BO_AddAssign:
3630   case BO_SubAssign:
3631   case BO_AndAssign:
3632   case BO_OrAssign:
3633   case BO_XorAssign:
3634   case BO_MulAssign:
3635   case BO_DivAssign:
3636   case BO_RemAssign:
3637   case BO_ShlAssign:
3638   case BO_ShrAssign:
3639   case BO_Comma:
3640     llvm_unreachable("Unsupported atomic update operation");
3641   }
3642   auto *UpdateVal = Update.getScalarVal();
3643   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3644     UpdateVal = CGF.Builder.CreateIntCast(
3645         IC, X.getAddress().getElementType(),
3646         X.getType()->hasSignedIntegerRepresentation());
3647   }
3648   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3649   return std::make_pair(true, RValue::get(Res));
3650 }
3651 
3652 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3653     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3654     llvm::AtomicOrdering AO, SourceLocation Loc,
3655     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3656   // Update expressions are allowed to have the following forms:
3657   // x binop= expr; -> xrval + expr;
3658   // x++, ++x -> xrval + 1;
3659   // x--, --x -> xrval - 1;
3660   // x = x binop expr; -> xrval binop expr
3661   // x = expr Op x; - > expr binop xrval;
3662   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3663   if (!Res.first) {
3664     if (X.isGlobalReg()) {
3665       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3666       // 'xrval'.
3667       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3668     } else {
3669       // Perform compare-and-swap procedure.
3670       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3671     }
3672   }
3673   return Res;
3674 }
3675 
3676 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3677                                     const Expr *X, const Expr *E,
3678                                     const Expr *UE, bool IsXLHSInRHSPart,
3679                                     SourceLocation Loc) {
3680   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3681          "Update expr in 'atomic update' must be a binary operator.");
3682   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3683   // Update expressions are allowed to have the following forms:
3684   // x binop= expr; -> xrval + expr;
3685   // x++, ++x -> xrval + 1;
3686   // x--, --x -> xrval - 1;
3687   // x = x binop expr; -> xrval binop expr
3688   // x = expr Op x; - > expr binop xrval;
3689   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3690   LValue XLValue = CGF.EmitLValue(X);
3691   RValue ExprRValue = CGF.EmitAnyExpr(E);
3692   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3693                      : llvm::AtomicOrdering::Monotonic;
3694   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3695   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3696   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3697   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3698   auto Gen =
3699       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3700         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3701         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3702         return CGF.EmitAnyExpr(UE);
3703       };
3704   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3705       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3706   // OpenMP, 2.12.6, atomic Construct
3707   // Any atomic construct with a seq_cst clause forces the atomically
3708   // performed operation to include an implicit flush operation without a
3709   // list.
3710   if (IsSeqCst)
3711     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3712 }
3713 
3714 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3715                             QualType SourceType, QualType ResType,
3716                             SourceLocation Loc) {
3717   switch (CGF.getEvaluationKind(ResType)) {
3718   case TEK_Scalar:
3719     return RValue::get(
3720         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3721   case TEK_Complex: {
3722     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3723     return RValue::getComplex(Res.first, Res.second);
3724   }
3725   case TEK_Aggregate:
3726     break;
3727   }
3728   llvm_unreachable("Must be a scalar or complex.");
3729 }
3730 
3731 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3732                                      bool IsPostfixUpdate, const Expr *V,
3733                                      const Expr *X, const Expr *E,
3734                                      const Expr *UE, bool IsXLHSInRHSPart,
3735                                      SourceLocation Loc) {
3736   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3737   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3738   RValue NewVVal;
3739   LValue VLValue = CGF.EmitLValue(V);
3740   LValue XLValue = CGF.EmitLValue(X);
3741   RValue ExprRValue = CGF.EmitAnyExpr(E);
3742   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3743                      : llvm::AtomicOrdering::Monotonic;
3744   QualType NewVValType;
3745   if (UE) {
3746     // 'x' is updated with some additional value.
3747     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3748            "Update expr in 'atomic capture' must be a binary operator.");
3749     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3750     // Update expressions are allowed to have the following forms:
3751     // x binop= expr; -> xrval + expr;
3752     // x++, ++x -> xrval + 1;
3753     // x--, --x -> xrval - 1;
3754     // x = x binop expr; -> xrval binop expr
3755     // x = expr Op x; - > expr binop xrval;
3756     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3757     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3758     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3759     NewVValType = XRValExpr->getType();
3760     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3761     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3762                   IsPostfixUpdate](RValue XRValue) -> RValue {
3763       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3764       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3765       RValue Res = CGF.EmitAnyExpr(UE);
3766       NewVVal = IsPostfixUpdate ? XRValue : Res;
3767       return Res;
3768     };
3769     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3770         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3771     if (Res.first) {
3772       // 'atomicrmw' instruction was generated.
3773       if (IsPostfixUpdate) {
3774         // Use old value from 'atomicrmw'.
3775         NewVVal = Res.second;
3776       } else {
3777         // 'atomicrmw' does not provide new value, so evaluate it using old
3778         // value of 'x'.
3779         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3780         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3781         NewVVal = CGF.EmitAnyExpr(UE);
3782       }
3783     }
3784   } else {
3785     // 'x' is simply rewritten with some 'expr'.
3786     NewVValType = X->getType().getNonReferenceType();
3787     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3788                                X->getType().getNonReferenceType(), Loc);
3789     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3790       NewVVal = XRValue;
3791       return ExprRValue;
3792     };
3793     // Try to perform atomicrmw xchg, otherwise simple exchange.
3794     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3795         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3796         Loc, Gen);
3797     if (Res.first) {
3798       // 'atomicrmw' instruction was generated.
3799       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3800     }
3801   }
3802   // Emit post-update store to 'v' of old/new 'x' value.
3803   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3804   // OpenMP, 2.12.6, atomic Construct
3805   // Any atomic construct with a seq_cst clause forces the atomically
3806   // performed operation to include an implicit flush operation without a
3807   // list.
3808   if (IsSeqCst)
3809     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3810 }
3811 
3812 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3813                               bool IsSeqCst, bool IsPostfixUpdate,
3814                               const Expr *X, const Expr *V, const Expr *E,
3815                               const Expr *UE, bool IsXLHSInRHSPart,
3816                               SourceLocation Loc) {
3817   switch (Kind) {
3818   case OMPC_read:
3819     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3820     break;
3821   case OMPC_write:
3822     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3823     break;
3824   case OMPC_unknown:
3825   case OMPC_update:
3826     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3827     break;
3828   case OMPC_capture:
3829     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3830                              IsXLHSInRHSPart, Loc);
3831     break;
3832   case OMPC_if:
3833   case OMPC_final:
3834   case OMPC_num_threads:
3835   case OMPC_private:
3836   case OMPC_firstprivate:
3837   case OMPC_lastprivate:
3838   case OMPC_reduction:
3839   case OMPC_task_reduction:
3840   case OMPC_in_reduction:
3841   case OMPC_safelen:
3842   case OMPC_simdlen:
3843   case OMPC_collapse:
3844   case OMPC_default:
3845   case OMPC_seq_cst:
3846   case OMPC_shared:
3847   case OMPC_linear:
3848   case OMPC_aligned:
3849   case OMPC_copyin:
3850   case OMPC_copyprivate:
3851   case OMPC_flush:
3852   case OMPC_proc_bind:
3853   case OMPC_schedule:
3854   case OMPC_ordered:
3855   case OMPC_nowait:
3856   case OMPC_untied:
3857   case OMPC_threadprivate:
3858   case OMPC_depend:
3859   case OMPC_mergeable:
3860   case OMPC_device:
3861   case OMPC_threads:
3862   case OMPC_simd:
3863   case OMPC_map:
3864   case OMPC_num_teams:
3865   case OMPC_thread_limit:
3866   case OMPC_priority:
3867   case OMPC_grainsize:
3868   case OMPC_nogroup:
3869   case OMPC_num_tasks:
3870   case OMPC_hint:
3871   case OMPC_dist_schedule:
3872   case OMPC_defaultmap:
3873   case OMPC_uniform:
3874   case OMPC_to:
3875   case OMPC_from:
3876   case OMPC_use_device_ptr:
3877   case OMPC_is_device_ptr:
3878     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3879   }
3880 }
3881 
3882 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3883   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3884   OpenMPClauseKind Kind = OMPC_unknown;
3885   for (auto *C : S.clauses()) {
3886     // Find first clause (skip seq_cst clause, if it is first).
3887     if (C->getClauseKind() != OMPC_seq_cst) {
3888       Kind = C->getClauseKind();
3889       break;
3890     }
3891   }
3892 
3893   const auto *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
3894   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3895     enterFullExpression(EWC);
3896   }
3897   // Processing for statements under 'atomic capture'.
3898   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3899     for (const auto *C : Compound->body()) {
3900       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3901         enterFullExpression(EWC);
3902       }
3903     }
3904   }
3905 
3906   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3907                                             PrePostActionTy &) {
3908     CGF.EmitStopPoint(CS);
3909     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3910                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3911                       S.isXLHSInRHSPart(), S.getLocStart());
3912   };
3913   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3914   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3915 }
3916 
3917 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3918                                          const OMPExecutableDirective &S,
3919                                          const RegionCodeGenTy &CodeGen) {
3920   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3921   CodeGenModule &CGM = CGF.CGM;
3922 
3923   // On device emit this construct as inlined code.
3924   if (CGM.getLangOpts().OpenMPIsDevice) {
3925     OMPLexicalScope Scope(CGF, S, OMPD_target);
3926     CGM.getOpenMPRuntime().emitInlinedDirective(
3927         CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3928           CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
3929         });
3930     return;
3931   }
3932 
3933   llvm::Function *Fn = nullptr;
3934   llvm::Constant *FnID = nullptr;
3935 
3936   const Expr *IfCond = nullptr;
3937   // Check for the at most one if clause associated with the target region.
3938   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3939     if (C->getNameModifier() == OMPD_unknown ||
3940         C->getNameModifier() == OMPD_target) {
3941       IfCond = C->getCondition();
3942       break;
3943     }
3944   }
3945 
3946   // Check if we have any device clause associated with the directive.
3947   const Expr *Device = nullptr;
3948   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3949     Device = C->getDevice();
3950   }
3951 
3952   // Check if we have an if clause whose conditional always evaluates to false
3953   // or if we do not have any targets specified. If so the target region is not
3954   // an offload entry point.
3955   bool IsOffloadEntry = true;
3956   if (IfCond) {
3957     bool Val;
3958     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3959       IsOffloadEntry = false;
3960   }
3961   if (CGM.getLangOpts().OMPTargetTriples.empty())
3962     IsOffloadEntry = false;
3963 
3964   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3965   StringRef ParentName;
3966   // In case we have Ctors/Dtors we use the complete type variant to produce
3967   // the mangling of the device outlined kernel.
3968   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3969     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3970   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3971     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3972   else
3973     ParentName =
3974         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3975 
3976   // Emit target region as a standalone region.
3977   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3978                                                     IsOffloadEntry, CodeGen);
3979   OMPLexicalScope Scope(CGF, S, OMPD_task);
3980   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
3981 }
3982 
3983 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3984                              PrePostActionTy &Action) {
3985   Action.Enter(CGF);
3986   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3987   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3988   CGF.EmitOMPPrivateClause(S, PrivateScope);
3989   (void)PrivateScope.Privatize();
3990 
3991   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
3992 }
3993 
3994 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3995                                                   StringRef ParentName,
3996                                                   const OMPTargetDirective &S) {
3997   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3998     emitTargetRegion(CGF, S, Action);
3999   };
4000   llvm::Function *Fn;
4001   llvm::Constant *Addr;
4002   // Emit target region as a standalone region.
4003   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4004       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4005   assert(Fn && Addr && "Target device function emission failed.");
4006 }
4007 
4008 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
4009   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4010     emitTargetRegion(CGF, S, Action);
4011   };
4012   emitCommonOMPTargetDirective(*this, S, CodeGen);
4013 }
4014 
4015 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
4016                                         const OMPExecutableDirective &S,
4017                                         OpenMPDirectiveKind InnermostKind,
4018                                         const RegionCodeGenTy &CodeGen) {
4019   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
4020   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
4021       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
4022 
4023   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
4024   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
4025   if (NT || TL) {
4026     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
4027     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
4028 
4029     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
4030                                                   S.getLocStart());
4031   }
4032 
4033   OMPTeamsScope Scope(CGF, S);
4034   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4035   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4036   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
4037                                            CapturedVars);
4038 }
4039 
4040 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
4041   // Emit teams region as a standalone region.
4042   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4043     Action.Enter(CGF);
4044     OMPPrivateScope PrivateScope(CGF);
4045     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4046     CGF.EmitOMPPrivateClause(S, PrivateScope);
4047     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4048     (void)PrivateScope.Privatize();
4049     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
4050     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4051   };
4052   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4053   emitPostUpdateForReductionClause(
4054       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4055 }
4056 
4057 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4058                                   const OMPTargetTeamsDirective &S) {
4059   auto *CS = S.getCapturedStmt(OMPD_teams);
4060   Action.Enter(CGF);
4061   // Emit teams region as a standalone region.
4062   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4063     Action.Enter(CGF);
4064     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4065     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4066     CGF.EmitOMPPrivateClause(S, PrivateScope);
4067     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4068     (void)PrivateScope.Privatize();
4069     CGF.EmitStmt(CS->getCapturedStmt());
4070     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4071   };
4072   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
4073   emitPostUpdateForReductionClause(
4074       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4075 }
4076 
4077 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
4078     CodeGenModule &CGM, StringRef ParentName,
4079     const OMPTargetTeamsDirective &S) {
4080   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4081     emitTargetTeamsRegion(CGF, Action, S);
4082   };
4083   llvm::Function *Fn;
4084   llvm::Constant *Addr;
4085   // Emit target region as a standalone region.
4086   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4087       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4088   assert(Fn && Addr && "Target device function emission failed.");
4089 }
4090 
4091 void CodeGenFunction::EmitOMPTargetTeamsDirective(
4092     const OMPTargetTeamsDirective &S) {
4093   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4094     emitTargetTeamsRegion(CGF, Action, S);
4095   };
4096   emitCommonOMPTargetDirective(*this, S, CodeGen);
4097 }
4098 
4099 static void
4100 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4101                                 const OMPTargetTeamsDistributeDirective &S) {
4102   Action.Enter(CGF);
4103   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4104     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4105   };
4106 
4107   // Emit teams region as a standalone region.
4108   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4109                                             PrePostActionTy &Action) {
4110     Action.Enter(CGF);
4111     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4112     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4113     (void)PrivateScope.Privatize();
4114     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4115                                                     CodeGenDistribute);
4116     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4117   };
4118   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
4119   emitPostUpdateForReductionClause(CGF, S,
4120                                    [](CodeGenFunction &) { return nullptr; });
4121 }
4122 
4123 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
4124     CodeGenModule &CGM, StringRef ParentName,
4125     const OMPTargetTeamsDistributeDirective &S) {
4126   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4127     emitTargetTeamsDistributeRegion(CGF, Action, S);
4128   };
4129   llvm::Function *Fn;
4130   llvm::Constant *Addr;
4131   // Emit target region as a standalone region.
4132   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4133       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4134   assert(Fn && Addr && "Target device function emission failed.");
4135 }
4136 
4137 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
4138     const OMPTargetTeamsDistributeDirective &S) {
4139   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4140     emitTargetTeamsDistributeRegion(CGF, Action, S);
4141   };
4142   emitCommonOMPTargetDirective(*this, S, CodeGen);
4143 }
4144 
4145 static void emitTargetTeamsDistributeSimdRegion(
4146     CodeGenFunction &CGF, PrePostActionTy &Action,
4147     const OMPTargetTeamsDistributeSimdDirective &S) {
4148   Action.Enter(CGF);
4149   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4150     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4151   };
4152 
4153   // Emit teams region as a standalone region.
4154   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4155                                             PrePostActionTy &Action) {
4156     Action.Enter(CGF);
4157     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4158     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4159     (void)PrivateScope.Privatize();
4160     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4161                                                     CodeGenDistribute);
4162     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4163   };
4164   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
4165   emitPostUpdateForReductionClause(CGF, S,
4166                                    [](CodeGenFunction &) { return nullptr; });
4167 }
4168 
4169 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
4170     CodeGenModule &CGM, StringRef ParentName,
4171     const OMPTargetTeamsDistributeSimdDirective &S) {
4172   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4173     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4174   };
4175   llvm::Function *Fn;
4176   llvm::Constant *Addr;
4177   // Emit target region as a standalone region.
4178   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4179       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4180   assert(Fn && Addr && "Target device function emission failed.");
4181 }
4182 
4183 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
4184     const OMPTargetTeamsDistributeSimdDirective &S) {
4185   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4186     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4187   };
4188   emitCommonOMPTargetDirective(*this, S, CodeGen);
4189 }
4190 
4191 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
4192     const OMPTeamsDistributeDirective &S) {
4193 
4194   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4195     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4196   };
4197 
4198   // Emit teams region as a standalone region.
4199   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4200                                             PrePostActionTy &Action) {
4201     Action.Enter(CGF);
4202     OMPPrivateScope PrivateScope(CGF);
4203     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4204     (void)PrivateScope.Privatize();
4205     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4206                                                     CodeGenDistribute);
4207     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4208   };
4209   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4210   emitPostUpdateForReductionClause(*this, S,
4211                                    [](CodeGenFunction &) { return nullptr; });
4212 }
4213 
4214 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
4215     const OMPTeamsDistributeSimdDirective &S) {
4216   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4217     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4218   };
4219 
4220   // Emit teams region as a standalone region.
4221   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4222                                             PrePostActionTy &Action) {
4223     Action.Enter(CGF);
4224     OMPPrivateScope PrivateScope(CGF);
4225     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4226     (void)PrivateScope.Privatize();
4227     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
4228                                                     CodeGenDistribute);
4229     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4230   };
4231   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
4232   emitPostUpdateForReductionClause(*this, S,
4233                                    [](CodeGenFunction &) { return nullptr; });
4234 }
4235 
4236 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
4237     const OMPTeamsDistributeParallelForDirective &S) {
4238   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4239     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4240                               S.getDistInc());
4241   };
4242 
4243   // Emit teams region as a standalone region.
4244   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4245                                             PrePostActionTy &Action) {
4246     Action.Enter(CGF);
4247     OMPPrivateScope PrivateScope(CGF);
4248     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4249     (void)PrivateScope.Privatize();
4250     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4251                                                     CodeGenDistribute);
4252     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4253   };
4254   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4255   emitPostUpdateForReductionClause(*this, S,
4256                                    [](CodeGenFunction &) { return nullptr; });
4257 }
4258 
4259 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
4260     const OMPTeamsDistributeParallelForSimdDirective &S) {
4261   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4262     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4263                               S.getDistInc());
4264   };
4265 
4266   // Emit teams region as a standalone region.
4267   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4268                                             PrePostActionTy &Action) {
4269     Action.Enter(CGF);
4270     OMPPrivateScope PrivateScope(CGF);
4271     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4272     (void)PrivateScope.Privatize();
4273     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4274         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4275     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4276   };
4277   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4278   emitPostUpdateForReductionClause(*this, S,
4279                                    [](CodeGenFunction &) { return nullptr; });
4280 }
4281 
4282 static void emitTargetTeamsDistributeParallelForRegion(
4283     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
4284     PrePostActionTy &Action) {
4285   Action.Enter(CGF);
4286   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4287     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4288                               S.getDistInc());
4289   };
4290 
4291   // Emit teams region as a standalone region.
4292   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4293                                                  PrePostActionTy &Action) {
4294     Action.Enter(CGF);
4295     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4296     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4297     (void)PrivateScope.Privatize();
4298     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4299         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4300     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4301   };
4302 
4303   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
4304                               CodeGenTeams);
4305   emitPostUpdateForReductionClause(CGF, S,
4306                                    [](CodeGenFunction &) { return nullptr; });
4307 }
4308 
4309 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
4310     CodeGenModule &CGM, StringRef ParentName,
4311     const OMPTargetTeamsDistributeParallelForDirective &S) {
4312   // Emit SPMD target teams distribute parallel for region as a standalone
4313   // region.
4314   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4315     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4316   };
4317   llvm::Function *Fn;
4318   llvm::Constant *Addr;
4319   // Emit target region as a standalone region.
4320   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4321       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4322   assert(Fn && Addr && "Target device function emission failed.");
4323 }
4324 
4325 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
4326     const OMPTargetTeamsDistributeParallelForDirective &S) {
4327   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4328     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4329   };
4330   emitCommonOMPTargetDirective(*this, S, CodeGen);
4331 }
4332 
4333 static void emitTargetTeamsDistributeParallelForSimdRegion(
4334     CodeGenFunction &CGF,
4335     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
4336     PrePostActionTy &Action) {
4337   Action.Enter(CGF);
4338   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4339     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4340                               S.getDistInc());
4341   };
4342 
4343   // Emit teams region as a standalone region.
4344   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4345                                                  PrePostActionTy &Action) {
4346     Action.Enter(CGF);
4347     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4348     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4349     (void)PrivateScope.Privatize();
4350     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4351         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4352     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4353   };
4354 
4355   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
4356                               CodeGenTeams);
4357   emitPostUpdateForReductionClause(CGF, S,
4358                                    [](CodeGenFunction &) { return nullptr; });
4359 }
4360 
4361 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
4362     CodeGenModule &CGM, StringRef ParentName,
4363     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4364   // Emit SPMD target teams distribute parallel for simd region as a standalone
4365   // region.
4366   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4367     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4368   };
4369   llvm::Function *Fn;
4370   llvm::Constant *Addr;
4371   // Emit target region as a standalone region.
4372   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4373       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4374   assert(Fn && Addr && "Target device function emission failed.");
4375 }
4376 
4377 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
4378     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4379   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4380     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4381   };
4382   emitCommonOMPTargetDirective(*this, S, CodeGen);
4383 }
4384 
4385 void CodeGenFunction::EmitOMPCancellationPointDirective(
4386     const OMPCancellationPointDirective &S) {
4387   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
4388                                                    S.getCancelRegion());
4389 }
4390 
4391 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
4392   const Expr *IfCond = nullptr;
4393   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4394     if (C->getNameModifier() == OMPD_unknown ||
4395         C->getNameModifier() == OMPD_cancel) {
4396       IfCond = C->getCondition();
4397       break;
4398     }
4399   }
4400   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
4401                                         S.getCancelRegion());
4402 }
4403 
4404 CodeGenFunction::JumpDest
4405 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
4406   if (Kind == OMPD_parallel || Kind == OMPD_task ||
4407       Kind == OMPD_target_parallel)
4408     return ReturnBlock;
4409   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
4410          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
4411          Kind == OMPD_distribute_parallel_for ||
4412          Kind == OMPD_target_parallel_for ||
4413          Kind == OMPD_teams_distribute_parallel_for ||
4414          Kind == OMPD_target_teams_distribute_parallel_for);
4415   return OMPCancelStack.getExitBlock();
4416 }
4417 
4418 void CodeGenFunction::EmitOMPUseDevicePtrClause(
4419     const OMPClause &NC, OMPPrivateScope &PrivateScope,
4420     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
4421   const auto &C = cast<OMPUseDevicePtrClause>(NC);
4422   auto OrigVarIt = C.varlist_begin();
4423   auto InitIt = C.inits().begin();
4424   for (auto PvtVarIt : C.private_copies()) {
4425     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
4426     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
4427     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
4428 
4429     // In order to identify the right initializer we need to match the
4430     // declaration used by the mapping logic. In some cases we may get
4431     // OMPCapturedExprDecl that refers to the original declaration.
4432     const ValueDecl *MatchingVD = OrigVD;
4433     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
4434       // OMPCapturedExprDecl are used to privative fields of the current
4435       // structure.
4436       auto *ME = cast<MemberExpr>(OED->getInit());
4437       assert(isa<CXXThisExpr>(ME->getBase()) &&
4438              "Base should be the current struct!");
4439       MatchingVD = ME->getMemberDecl();
4440     }
4441 
4442     // If we don't have information about the current list item, move on to
4443     // the next one.
4444     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
4445     if (InitAddrIt == CaptureDeviceAddrMap.end())
4446       continue;
4447 
4448     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
4449       // Initialize the temporary initialization variable with the address we
4450       // get from the runtime library. We have to cast the source address
4451       // because it is always a void *. References are materialized in the
4452       // privatization scope, so the initialization here disregards the fact
4453       // the original variable is a reference.
4454       QualType AddrQTy =
4455           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
4456       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
4457       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
4458       setAddrOfLocalVar(InitVD, InitAddr);
4459 
4460       // Emit private declaration, it will be initialized by the value we
4461       // declaration we just added to the local declarations map.
4462       EmitDecl(*PvtVD);
4463 
4464       // The initialization variables reached its purpose in the emission
4465       // of the previous declaration, so we don't need it anymore.
4466       LocalDeclMap.erase(InitVD);
4467 
4468       // Return the address of the private variable.
4469       return GetAddrOfLocalVar(PvtVD);
4470     });
4471     assert(IsRegistered && "firstprivate var already registered as private");
4472     // Silence the warning about unused variable.
4473     (void)IsRegistered;
4474 
4475     ++OrigVarIt;
4476     ++InitIt;
4477   }
4478 }
4479 
4480 // Generate the instructions for '#pragma omp target data' directive.
4481 void CodeGenFunction::EmitOMPTargetDataDirective(
4482     const OMPTargetDataDirective &S) {
4483   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4484 
4485   // Create a pre/post action to signal the privatization of the device pointer.
4486   // This action can be replaced by the OpenMP runtime code generation to
4487   // deactivate privatization.
4488   bool PrivatizeDevicePointers = false;
4489   class DevicePointerPrivActionTy : public PrePostActionTy {
4490     bool &PrivatizeDevicePointers;
4491 
4492   public:
4493     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4494         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4495     void Enter(CodeGenFunction &CGF) override {
4496       PrivatizeDevicePointers = true;
4497     }
4498   };
4499   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4500 
4501   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4502                        CodeGenFunction &CGF, PrePostActionTy &Action) {
4503     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4504       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4505     };
4506 
4507     // Codegen that selects whether to generate the privatization code or not.
4508     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4509                           &InnermostCodeGen](CodeGenFunction &CGF,
4510                                              PrePostActionTy &Action) {
4511       RegionCodeGenTy RCG(InnermostCodeGen);
4512       PrivatizeDevicePointers = false;
4513 
4514       // Call the pre-action to change the status of PrivatizeDevicePointers if
4515       // needed.
4516       Action.Enter(CGF);
4517 
4518       if (PrivatizeDevicePointers) {
4519         OMPPrivateScope PrivateScope(CGF);
4520         // Emit all instances of the use_device_ptr clause.
4521         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4522           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4523                                         Info.CaptureDeviceAddrMap);
4524         (void)PrivateScope.Privatize();
4525         RCG(CGF);
4526       } else
4527         RCG(CGF);
4528     };
4529 
4530     // Forward the provided action to the privatization codegen.
4531     RegionCodeGenTy PrivRCG(PrivCodeGen);
4532     PrivRCG.setAction(Action);
4533 
4534     // Notwithstanding the body of the region is emitted as inlined directive,
4535     // we don't use an inline scope as changes in the references inside the
4536     // region are expected to be visible outside, so we do not privative them.
4537     OMPLexicalScope Scope(CGF, S);
4538     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4539                                                     PrivRCG);
4540   };
4541 
4542   RegionCodeGenTy RCG(CodeGen);
4543 
4544   // If we don't have target devices, don't bother emitting the data mapping
4545   // code.
4546   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4547     RCG(*this);
4548     return;
4549   }
4550 
4551   // Check if we have any if clause associated with the directive.
4552   const Expr *IfCond = nullptr;
4553   if (auto *C = S.getSingleClause<OMPIfClause>())
4554     IfCond = C->getCondition();
4555 
4556   // Check if we have any device clause associated with the directive.
4557   const Expr *Device = nullptr;
4558   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4559     Device = C->getDevice();
4560 
4561   // Set the action to signal privatization of device pointers.
4562   RCG.setAction(PrivAction);
4563 
4564   // Emit region code.
4565   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4566                                              Info);
4567 }
4568 
4569 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4570     const OMPTargetEnterDataDirective &S) {
4571   // If we don't have target devices, don't bother emitting the data mapping
4572   // code.
4573   if (CGM.getLangOpts().OMPTargetTriples.empty())
4574     return;
4575 
4576   // Check if we have any if clause associated with the directive.
4577   const Expr *IfCond = nullptr;
4578   if (auto *C = S.getSingleClause<OMPIfClause>())
4579     IfCond = C->getCondition();
4580 
4581   // Check if we have any device clause associated with the directive.
4582   const Expr *Device = nullptr;
4583   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4584     Device = C->getDevice();
4585 
4586   OMPLexicalScope Scope(*this, S, OMPD_task);
4587   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4588 }
4589 
4590 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4591     const OMPTargetExitDataDirective &S) {
4592   // If we don't have target devices, don't bother emitting the data mapping
4593   // code.
4594   if (CGM.getLangOpts().OMPTargetTriples.empty())
4595     return;
4596 
4597   // Check if we have any if clause associated with the directive.
4598   const Expr *IfCond = nullptr;
4599   if (auto *C = S.getSingleClause<OMPIfClause>())
4600     IfCond = C->getCondition();
4601 
4602   // Check if we have any device clause associated with the directive.
4603   const Expr *Device = nullptr;
4604   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4605     Device = C->getDevice();
4606 
4607   OMPLexicalScope Scope(*this, S, OMPD_task);
4608   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4609 }
4610 
4611 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4612                                      const OMPTargetParallelDirective &S,
4613                                      PrePostActionTy &Action) {
4614   // Get the captured statement associated with the 'parallel' region.
4615   auto *CS = S.getCapturedStmt(OMPD_parallel);
4616   Action.Enter(CGF);
4617   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4618     Action.Enter(CGF);
4619     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4620     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4621     CGF.EmitOMPPrivateClause(S, PrivateScope);
4622     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4623     (void)PrivateScope.Privatize();
4624     // TODO: Add support for clauses.
4625     CGF.EmitStmt(CS->getCapturedStmt());
4626     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4627   };
4628   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4629                                  emitEmptyBoundParameters);
4630   emitPostUpdateForReductionClause(
4631       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4632 }
4633 
4634 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4635     CodeGenModule &CGM, StringRef ParentName,
4636     const OMPTargetParallelDirective &S) {
4637   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4638     emitTargetParallelRegion(CGF, S, Action);
4639   };
4640   llvm::Function *Fn;
4641   llvm::Constant *Addr;
4642   // Emit target region as a standalone region.
4643   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4644       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4645   assert(Fn && Addr && "Target device function emission failed.");
4646 }
4647 
4648 void CodeGenFunction::EmitOMPTargetParallelDirective(
4649     const OMPTargetParallelDirective &S) {
4650   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4651     emitTargetParallelRegion(CGF, S, Action);
4652   };
4653   emitCommonOMPTargetDirective(*this, S, CodeGen);
4654 }
4655 
4656 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4657                                         const OMPTargetParallelForDirective &S,
4658                                         PrePostActionTy &Action) {
4659   Action.Enter(CGF);
4660   // Emit directive as a combined directive that consists of two implicit
4661   // directives: 'parallel' with 'for' directive.
4662   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4663     Action.Enter(CGF);
4664     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4665         CGF, OMPD_target_parallel_for, S.hasCancel());
4666     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4667                                emitDispatchForLoopBounds);
4668   };
4669   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4670                                  emitEmptyBoundParameters);
4671 }
4672 
4673 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4674     CodeGenModule &CGM, StringRef ParentName,
4675     const OMPTargetParallelForDirective &S) {
4676   // Emit SPMD target parallel for region as a standalone region.
4677   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4678     emitTargetParallelForRegion(CGF, S, Action);
4679   };
4680   llvm::Function *Fn;
4681   llvm::Constant *Addr;
4682   // Emit target region as a standalone region.
4683   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4684       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4685   assert(Fn && Addr && "Target device function emission failed.");
4686 }
4687 
4688 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4689     const OMPTargetParallelForDirective &S) {
4690   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4691     emitTargetParallelForRegion(CGF, S, Action);
4692   };
4693   emitCommonOMPTargetDirective(*this, S, CodeGen);
4694 }
4695 
4696 static void
4697 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4698                                 const OMPTargetParallelForSimdDirective &S,
4699                                 PrePostActionTy &Action) {
4700   Action.Enter(CGF);
4701   // Emit directive as a combined directive that consists of two implicit
4702   // directives: 'parallel' with 'for' directive.
4703   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4704     Action.Enter(CGF);
4705     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4706                                emitDispatchForLoopBounds);
4707   };
4708   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4709                                  emitEmptyBoundParameters);
4710 }
4711 
4712 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4713     CodeGenModule &CGM, StringRef ParentName,
4714     const OMPTargetParallelForSimdDirective &S) {
4715   // Emit SPMD target parallel for region as a standalone region.
4716   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4717     emitTargetParallelForSimdRegion(CGF, S, Action);
4718   };
4719   llvm::Function *Fn;
4720   llvm::Constant *Addr;
4721   // Emit target region as a standalone region.
4722   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4723       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4724   assert(Fn && Addr && "Target device function emission failed.");
4725 }
4726 
4727 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4728     const OMPTargetParallelForSimdDirective &S) {
4729   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4730     emitTargetParallelForSimdRegion(CGF, S, Action);
4731   };
4732   emitCommonOMPTargetDirective(*this, S, CodeGen);
4733 }
4734 
4735 /// Emit a helper variable and return corresponding lvalue.
4736 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4737                      const ImplicitParamDecl *PVD,
4738                      CodeGenFunction::OMPPrivateScope &Privates) {
4739   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4740   Privates.addPrivate(
4741       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4742 }
4743 
4744 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4745   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4746   // Emit outlined function for task construct.
4747   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
4748   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4749   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4750   const Expr *IfCond = nullptr;
4751   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4752     if (C->getNameModifier() == OMPD_unknown ||
4753         C->getNameModifier() == OMPD_taskloop) {
4754       IfCond = C->getCondition();
4755       break;
4756     }
4757   }
4758 
4759   OMPTaskDataTy Data;
4760   // Check if taskloop must be emitted without taskgroup.
4761   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4762   // TODO: Check if we should emit tied or untied task.
4763   Data.Tied = true;
4764   // Set scheduling for taskloop
4765   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4766     // grainsize clause
4767     Data.Schedule.setInt(/*IntVal=*/false);
4768     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4769   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4770     // num_tasks clause
4771     Data.Schedule.setInt(/*IntVal=*/true);
4772     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4773   }
4774 
4775   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4776     // if (PreCond) {
4777     //   for (IV in 0..LastIteration) BODY;
4778     //   <Final counter/linear vars updates>;
4779     // }
4780     //
4781 
4782     // Emit: if (PreCond) - begin.
4783     // If the condition constant folds and can be elided, avoid emitting the
4784     // whole loop.
4785     bool CondConstant;
4786     llvm::BasicBlock *ContBlock = nullptr;
4787     OMPLoopScope PreInitScope(CGF, S);
4788     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4789       if (!CondConstant)
4790         return;
4791     } else {
4792       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4793       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4794       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4795                   CGF.getProfileCount(&S));
4796       CGF.EmitBlock(ThenBlock);
4797       CGF.incrementProfileCounter(&S);
4798     }
4799 
4800     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4801       CGF.EmitOMPSimdInit(S);
4802 
4803     OMPPrivateScope LoopScope(CGF);
4804     // Emit helper vars inits.
4805     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4806     auto *I = CS->getCapturedDecl()->param_begin();
4807     auto *LBP = std::next(I, LowerBound);
4808     auto *UBP = std::next(I, UpperBound);
4809     auto *STP = std::next(I, Stride);
4810     auto *LIP = std::next(I, LastIter);
4811     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4812              LoopScope);
4813     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4814              LoopScope);
4815     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4816     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4817              LoopScope);
4818     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4819     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4820     (void)LoopScope.Privatize();
4821     // Emit the loop iteration variable.
4822     const Expr *IVExpr = S.getIterationVariable();
4823     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4824     CGF.EmitVarDecl(*IVDecl);
4825     CGF.EmitIgnoredExpr(S.getInit());
4826 
4827     // Emit the iterations count variable.
4828     // If it is not a variable, Sema decided to calculate iterations count on
4829     // each iteration (e.g., it is foldable into a constant).
4830     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4831       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4832       // Emit calculation of the iterations count.
4833       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4834     }
4835 
4836     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4837                          S.getInc(),
4838                          [&S](CodeGenFunction &CGF) {
4839                            CGF.EmitOMPLoopBody(S, JumpDest());
4840                            CGF.EmitStopPoint(&S);
4841                          },
4842                          [](CodeGenFunction &) {});
4843     // Emit: if (PreCond) - end.
4844     if (ContBlock) {
4845       CGF.EmitBranch(ContBlock);
4846       CGF.EmitBlock(ContBlock, true);
4847     }
4848     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4849     if (HasLastprivateClause) {
4850       CGF.EmitOMPLastprivateClauseFinal(
4851           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4852           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4853               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4854               (*LIP)->getType(), S.getLocStart())));
4855     }
4856   };
4857   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4858                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4859                             const OMPTaskDataTy &Data) {
4860     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4861       OMPLoopScope PreInitScope(CGF, S);
4862       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4863                                                   OutlinedFn, SharedsTy,
4864                                                   CapturedStruct, IfCond, Data);
4865     };
4866     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4867                                                     CodeGen);
4868   };
4869   if (Data.Nogroup) {
4870     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
4871   } else {
4872     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4873         *this,
4874         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4875                                         PrePostActionTy &Action) {
4876           Action.Enter(CGF);
4877           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
4878                                         Data);
4879         },
4880         S.getLocStart());
4881   }
4882 }
4883 
4884 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4885   EmitOMPTaskLoopBasedDirective(S);
4886 }
4887 
4888 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4889     const OMPTaskLoopSimdDirective &S) {
4890   EmitOMPTaskLoopBasedDirective(S);
4891 }
4892 
4893 // Generate the instructions for '#pragma omp target update' directive.
4894 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4895     const OMPTargetUpdateDirective &S) {
4896   // If we don't have target devices, don't bother emitting the data mapping
4897   // code.
4898   if (CGM.getLangOpts().OMPTargetTriples.empty())
4899     return;
4900 
4901   // Check if we have any if clause associated with the directive.
4902   const Expr *IfCond = nullptr;
4903   if (auto *C = S.getSingleClause<OMPIfClause>())
4904     IfCond = C->getCondition();
4905 
4906   // Check if we have any device clause associated with the directive.
4907   const Expr *Device = nullptr;
4908   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4909     Device = C->getDevice();
4910 
4911   OMPLexicalScope Scope(*this, S, OMPD_task);
4912   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4913 }
4914 
4915 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
4916     const OMPExecutableDirective &D) {
4917   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
4918     return;
4919   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
4920     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
4921       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
4922     } else {
4923       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
4924         for (const auto *E : LD->counters()) {
4925           if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
4926                   cast<DeclRefExpr>(E)->getDecl())) {
4927             // Emit only those that were not explicitly referenced in clauses.
4928             if (!CGF.LocalDeclMap.count(VD))
4929               CGF.EmitVarDecl(*VD);
4930           }
4931         }
4932       }
4933       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
4934     }
4935   };
4936   OMPSimdLexicalScope Scope(*this, D);
4937   CGM.getOpenMPRuntime().emitInlinedDirective(
4938       *this,
4939       isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
4940                                                   : D.getDirectiveKind(),
4941       CodeGen);
4942 }
4943