1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/Support/AtomicOrdering.h"
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm::omp;
35 
36 static const VarDecl *getBaseDecl(const Expr *Ref);
37 
38 namespace {
39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
40 /// for captured expressions.
41 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
42   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
43     for (const auto *C : S.clauses()) {
44       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
45         if (const auto *PreInit =
46                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
47           for (const auto *I : PreInit->decls()) {
48             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
49               CGF.EmitVarDecl(cast<VarDecl>(*I));
50             } else {
51               CodeGenFunction::AutoVarEmission Emission =
52                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
53               CGF.EmitAutoVarCleanups(Emission);
54             }
55           }
56         }
57       }
58     }
59   }
60   CodeGenFunction::OMPPrivateScope InlinedShareds;
61 
62   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
63     return CGF.LambdaCaptureFields.lookup(VD) ||
64            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
65            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
66             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
67   }
68 
69 public:
70   OMPLexicalScope(
71       CodeGenFunction &CGF, const OMPExecutableDirective &S,
72       const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
73       const bool EmitPreInitStmt = true)
74       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
75         InlinedShareds(CGF) {
76     if (EmitPreInitStmt)
77       emitPreInitStmt(CGF, S);
78     if (!CapturedRegion.hasValue())
79       return;
80     assert(S.hasAssociatedStmt() &&
81            "Expected associated statement for inlined directive.");
82     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
83     for (const auto &C : CS->captures()) {
84       if (C.capturesVariable() || C.capturesVariableByCopy()) {
85         auto *VD = C.getCapturedVar();
86         assert(VD == VD->getCanonicalDecl() &&
87                "Canonical decl must be captured.");
88         DeclRefExpr DRE(
89             CGF.getContext(), const_cast<VarDecl *>(VD),
90             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
91                                        InlinedShareds.isGlobalVarCaptured(VD)),
92             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
93         InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
94           return CGF.EmitLValue(&DRE).getAddress(CGF);
95         });
96       }
97     }
98     (void)InlinedShareds.Privatize();
99   }
100 };
101 
102 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPParallelScope final : public OMPLexicalScope {
105   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106     OpenMPDirectiveKind Kind = S.getDirectiveKind();
107     return !(isOpenMPTargetExecutionDirective(Kind) ||
108              isOpenMPLoopBoundSharingDirective(Kind)) &&
109            isOpenMPParallelDirective(Kind);
110   }
111 
112 public:
113   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115                         EmitPreInitStmt(S)) {}
116 };
117 
118 /// Lexical scope for OpenMP teams construct, that handles correct codegen
119 /// for captured expressions.
120 class OMPTeamsScope final : public OMPLexicalScope {
121   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
122     OpenMPDirectiveKind Kind = S.getDirectiveKind();
123     return !isOpenMPTargetExecutionDirective(Kind) &&
124            isOpenMPTeamsDirective(Kind);
125   }
126 
127 public:
128   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
129       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
130                         EmitPreInitStmt(S)) {}
131 };
132 
133 /// Private scope for OpenMP loop-based directives, that supports capturing
134 /// of used expression from loop statement.
135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
136   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
137     CodeGenFunction::OMPMapVars PreCondVars;
138     llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
139     for (const auto *E : S.counters()) {
140       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
141       EmittedAsPrivate.insert(VD->getCanonicalDecl());
142       (void)PreCondVars.setVarAddr(
143           CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
144     }
145     // Mark private vars as undefs.
146     for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
147       for (const Expr *IRef : C->varlists()) {
148         const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
149         if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
150           (void)PreCondVars.setVarAddr(
151               CGF, OrigVD,
152               Address(llvm::UndefValue::get(
153                           CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
154                               OrigVD->getType().getNonReferenceType()))),
155                       CGF.getContext().getDeclAlign(OrigVD)));
156         }
157       }
158     }
159     (void)PreCondVars.apply(CGF);
160     // Emit init, __range and __end variables for C++ range loops.
161     const Stmt *Body =
162         S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
163     for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) {
164       Body = OMPLoopDirective::tryToFindNextInnerLoop(
165           Body, /*TryImperfectlyNestedLoops=*/true);
166       if (auto *For = dyn_cast<ForStmt>(Body)) {
167         Body = For->getBody();
168       } else {
169         assert(isa<CXXForRangeStmt>(Body) &&
170                "Expected canonical for loop or range-based for loop.");
171         auto *CXXFor = cast<CXXForRangeStmt>(Body);
172         if (const Stmt *Init = CXXFor->getInit())
173           CGF.EmitStmt(Init);
174         CGF.EmitStmt(CXXFor->getRangeStmt());
175         CGF.EmitStmt(CXXFor->getEndStmt());
176         Body = CXXFor->getBody();
177       }
178     }
179     if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
180       for (const auto *I : PreInits->decls())
181         CGF.EmitVarDecl(cast<VarDecl>(*I));
182     }
183     PreCondVars.restore(CGF);
184   }
185 
186 public:
187   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
188       : CodeGenFunction::RunCleanupsScope(CGF) {
189     emitPreInitStmt(CGF, S);
190   }
191 };
192 
193 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
194   CodeGenFunction::OMPPrivateScope InlinedShareds;
195 
196   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
197     return CGF.LambdaCaptureFields.lookup(VD) ||
198            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
199            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
200             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
201   }
202 
203 public:
204   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
205       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
206         InlinedShareds(CGF) {
207     for (const auto *C : S.clauses()) {
208       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
209         if (const auto *PreInit =
210                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
211           for (const auto *I : PreInit->decls()) {
212             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
213               CGF.EmitVarDecl(cast<VarDecl>(*I));
214             } else {
215               CodeGenFunction::AutoVarEmission Emission =
216                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
217               CGF.EmitAutoVarCleanups(Emission);
218             }
219           }
220         }
221       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
222         for (const Expr *E : UDP->varlists()) {
223           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
224           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
225             CGF.EmitVarDecl(*OED);
226         }
227       } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
228         for (const Expr *E : UDP->varlists()) {
229           const Decl *D = getBaseDecl(E);
230           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
231             CGF.EmitVarDecl(*OED);
232         }
233       }
234     }
235     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
236       CGF.EmitOMPPrivateClause(S, InlinedShareds);
237     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
238       if (const Expr *E = TG->getReductionRef())
239         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
240     }
241     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
242     while (CS) {
243       for (auto &C : CS->captures()) {
244         if (C.capturesVariable() || C.capturesVariableByCopy()) {
245           auto *VD = C.getCapturedVar();
246           assert(VD == VD->getCanonicalDecl() &&
247                  "Canonical decl must be captured.");
248           DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
249                           isCapturedVar(CGF, VD) ||
250                               (CGF.CapturedStmtInfo &&
251                                InlinedShareds.isGlobalVarCaptured(VD)),
252                           VD->getType().getNonReferenceType(), VK_LValue,
253                           C.getLocation());
254           InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
255             return CGF.EmitLValue(&DRE).getAddress(CGF);
256           });
257         }
258       }
259       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
260     }
261     (void)InlinedShareds.Privatize();
262   }
263 };
264 
265 } // namespace
266 
267 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
268                                          const OMPExecutableDirective &S,
269                                          const RegionCodeGenTy &CodeGen);
270 
271 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
272   if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
273     if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
274       OrigVD = OrigVD->getCanonicalDecl();
275       bool IsCaptured =
276           LambdaCaptureFields.lookup(OrigVD) ||
277           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
278           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
279       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
280                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
281       return EmitLValue(&DRE);
282     }
283   }
284   return EmitLValue(E);
285 }
286 
287 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
288   ASTContext &C = getContext();
289   llvm::Value *Size = nullptr;
290   auto SizeInChars = C.getTypeSizeInChars(Ty);
291   if (SizeInChars.isZero()) {
292     // getTypeSizeInChars() returns 0 for a VLA.
293     while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
294       VlaSizePair VlaSize = getVLASize(VAT);
295       Ty = VlaSize.Type;
296       Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
297                   : VlaSize.NumElts;
298     }
299     SizeInChars = C.getTypeSizeInChars(Ty);
300     if (SizeInChars.isZero())
301       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
302     return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
303   }
304   return CGM.getSize(SizeInChars);
305 }
306 
307 void CodeGenFunction::GenerateOpenMPCapturedVars(
308     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
309   const RecordDecl *RD = S.getCapturedRecordDecl();
310   auto CurField = RD->field_begin();
311   auto CurCap = S.captures().begin();
312   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
313                                                  E = S.capture_init_end();
314        I != E; ++I, ++CurField, ++CurCap) {
315     if (CurField->hasCapturedVLAType()) {
316       const VariableArrayType *VAT = CurField->getCapturedVLAType();
317       llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
318       CapturedVars.push_back(Val);
319     } else if (CurCap->capturesThis()) {
320       CapturedVars.push_back(CXXThisValue);
321     } else if (CurCap->capturesVariableByCopy()) {
322       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
323 
324       // If the field is not a pointer, we need to save the actual value
325       // and load it as a void pointer.
326       if (!CurField->getType()->isAnyPointerType()) {
327         ASTContext &Ctx = getContext();
328         Address DstAddr = CreateMemTemp(
329             Ctx.getUIntPtrType(),
330             Twine(CurCap->getCapturedVar()->getName(), ".casted"));
331         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
332 
333         llvm::Value *SrcAddrVal = EmitScalarConversion(
334             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
335             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
336         LValue SrcLV =
337             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
338 
339         // Store the value using the source type pointer.
340         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
341 
342         // Load the value using the destination type pointer.
343         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
344       }
345       CapturedVars.push_back(CV);
346     } else {
347       assert(CurCap->capturesVariable() && "Expected capture by reference.");
348       CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
349     }
350   }
351 }
352 
353 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
354                                     QualType DstType, StringRef Name,
355                                     LValue AddrLV) {
356   ASTContext &Ctx = CGF.getContext();
357 
358   llvm::Value *CastedPtr = CGF.EmitScalarConversion(
359       AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
360       Ctx.getPointerType(DstType), Loc);
361   Address TmpAddr =
362       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
363           .getAddress(CGF);
364   return TmpAddr;
365 }
366 
367 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
368   if (T->isLValueReferenceType())
369     return C.getLValueReferenceType(
370         getCanonicalParamType(C, T.getNonReferenceType()),
371         /*SpelledAsLValue=*/false);
372   if (T->isPointerType())
373     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
374   if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
375     if (const auto *VLA = dyn_cast<VariableArrayType>(A))
376       return getCanonicalParamType(C, VLA->getElementType());
377     if (!A->isVariablyModifiedType())
378       return C.getCanonicalType(T);
379   }
380   return C.getCanonicalParamType(T);
381 }
382 
383 namespace {
384 /// Contains required data for proper outlined function codegen.
385 struct FunctionOptions {
386   /// Captured statement for which the function is generated.
387   const CapturedStmt *S = nullptr;
388   /// true if cast to/from  UIntPtr is required for variables captured by
389   /// value.
390   const bool UIntPtrCastRequired = true;
391   /// true if only casted arguments must be registered as local args or VLA
392   /// sizes.
393   const bool RegisterCastedArgsOnly = false;
394   /// Name of the generated function.
395   const StringRef FunctionName;
396   /// Location of the non-debug version of the outlined function.
397   SourceLocation Loc;
398   explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
399                            bool RegisterCastedArgsOnly, StringRef FunctionName,
400                            SourceLocation Loc)
401       : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
402         RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
403         FunctionName(FunctionName), Loc(Loc) {}
404 };
405 } // namespace
406 
407 static llvm::Function *emitOutlinedFunctionPrologue(
408     CodeGenFunction &CGF, FunctionArgList &Args,
409     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
410         &LocalAddrs,
411     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
412         &VLASizes,
413     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
414   const CapturedDecl *CD = FO.S->getCapturedDecl();
415   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
416   assert(CD->hasBody() && "missing CapturedDecl body");
417 
418   CXXThisValue = nullptr;
419   // Build the argument list.
420   CodeGenModule &CGM = CGF.CGM;
421   ASTContext &Ctx = CGM.getContext();
422   FunctionArgList TargetArgs;
423   Args.append(CD->param_begin(),
424               std::next(CD->param_begin(), CD->getContextParamPosition()));
425   TargetArgs.append(
426       CD->param_begin(),
427       std::next(CD->param_begin(), CD->getContextParamPosition()));
428   auto I = FO.S->captures().begin();
429   FunctionDecl *DebugFunctionDecl = nullptr;
430   if (!FO.UIntPtrCastRequired) {
431     FunctionProtoType::ExtProtoInfo EPI;
432     QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
433     DebugFunctionDecl = FunctionDecl::Create(
434         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
435         SourceLocation(), DeclarationName(), FunctionTy,
436         Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
437         /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
438   }
439   for (const FieldDecl *FD : RD->fields()) {
440     QualType ArgType = FD->getType();
441     IdentifierInfo *II = nullptr;
442     VarDecl *CapVar = nullptr;
443 
444     // If this is a capture by copy and the type is not a pointer, the outlined
445     // function argument type should be uintptr and the value properly casted to
446     // uintptr. This is necessary given that the runtime library is only able to
447     // deal with pointers. We can pass in the same way the VLA type sizes to the
448     // outlined function.
449     if (FO.UIntPtrCastRequired &&
450         ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
451          I->capturesVariableArrayType()))
452       ArgType = Ctx.getUIntPtrType();
453 
454     if (I->capturesVariable() || I->capturesVariableByCopy()) {
455       CapVar = I->getCapturedVar();
456       II = CapVar->getIdentifier();
457     } else if (I->capturesThis()) {
458       II = &Ctx.Idents.get("this");
459     } else {
460       assert(I->capturesVariableArrayType());
461       II = &Ctx.Idents.get("vla");
462     }
463     if (ArgType->isVariablyModifiedType())
464       ArgType = getCanonicalParamType(Ctx, ArgType);
465     VarDecl *Arg;
466     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
467       Arg = ParmVarDecl::Create(
468           Ctx, DebugFunctionDecl,
469           CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
470           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
471           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
472     } else {
473       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
474                                       II, ArgType, ImplicitParamDecl::Other);
475     }
476     Args.emplace_back(Arg);
477     // Do not cast arguments if we emit function with non-original types.
478     TargetArgs.emplace_back(
479         FO.UIntPtrCastRequired
480             ? Arg
481             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
482     ++I;
483   }
484   Args.append(
485       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
486       CD->param_end());
487   TargetArgs.append(
488       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
489       CD->param_end());
490 
491   // Create the function declaration.
492   const CGFunctionInfo &FuncInfo =
493       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
494   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
495 
496   auto *F =
497       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
498                              FO.FunctionName, &CGM.getModule());
499   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
500   if (CD->isNothrow())
501     F->setDoesNotThrow();
502   F->setDoesNotRecurse();
503 
504   // Generate the function.
505   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
506                     FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
507                     FO.UIntPtrCastRequired ? FO.Loc
508                                            : CD->getBody()->getBeginLoc());
509   unsigned Cnt = CD->getContextParamPosition();
510   I = FO.S->captures().begin();
511   for (const FieldDecl *FD : RD->fields()) {
512     // Do not map arguments if we emit function with non-original types.
513     Address LocalAddr(Address::invalid());
514     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
515       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
516                                                              TargetArgs[Cnt]);
517     } else {
518       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
519     }
520     // If we are capturing a pointer by copy we don't need to do anything, just
521     // use the value that we get from the arguments.
522     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
523       const VarDecl *CurVD = I->getCapturedVar();
524       if (!FO.RegisterCastedArgsOnly)
525         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
526       ++Cnt;
527       ++I;
528       continue;
529     }
530 
531     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
532                                         AlignmentSource::Decl);
533     if (FD->hasCapturedVLAType()) {
534       if (FO.UIntPtrCastRequired) {
535         ArgLVal = CGF.MakeAddrLValue(
536             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
537                                  Args[Cnt]->getName(), ArgLVal),
538             FD->getType(), AlignmentSource::Decl);
539       }
540       llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
541       const VariableArrayType *VAT = FD->getCapturedVLAType();
542       VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
543     } else if (I->capturesVariable()) {
544       const VarDecl *Var = I->getCapturedVar();
545       QualType VarTy = Var->getType();
546       Address ArgAddr = ArgLVal.getAddress(CGF);
547       if (ArgLVal.getType()->isLValueReferenceType()) {
548         ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
549       } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
550         assert(ArgLVal.getType()->isPointerType());
551         ArgAddr = CGF.EmitLoadOfPointer(
552             ArgAddr, ArgLVal.getType()->castAs<PointerType>());
553       }
554       if (!FO.RegisterCastedArgsOnly) {
555         LocalAddrs.insert(
556             {Args[Cnt],
557              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
558       }
559     } else if (I->capturesVariableByCopy()) {
560       assert(!FD->getType()->isAnyPointerType() &&
561              "Not expecting a captured pointer.");
562       const VarDecl *Var = I->getCapturedVar();
563       LocalAddrs.insert({Args[Cnt],
564                          {Var, FO.UIntPtrCastRequired
565                                    ? castValueFromUintptr(
566                                          CGF, I->getLocation(), FD->getType(),
567                                          Args[Cnt]->getName(), ArgLVal)
568                                    : ArgLVal.getAddress(CGF)}});
569     } else {
570       // If 'this' is captured, load it into CXXThisValue.
571       assert(I->capturesThis());
572       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
573       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
574     }
575     ++Cnt;
576     ++I;
577   }
578 
579   return F;
580 }
581 
582 llvm::Function *
583 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
584                                                     SourceLocation Loc) {
585   assert(
586       CapturedStmtInfo &&
587       "CapturedStmtInfo should be set when generating the captured function");
588   const CapturedDecl *CD = S.getCapturedDecl();
589   // Build the argument list.
590   bool NeedWrapperFunction =
591       getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
592   FunctionArgList Args;
593   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
594   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
595   SmallString<256> Buffer;
596   llvm::raw_svector_ostream Out(Buffer);
597   Out << CapturedStmtInfo->getHelperName();
598   if (NeedWrapperFunction)
599     Out << "_debug__";
600   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
601                      Out.str(), Loc);
602   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
603                                                    VLASizes, CXXThisValue, FO);
604   CodeGenFunction::OMPPrivateScope LocalScope(*this);
605   for (const auto &LocalAddrPair : LocalAddrs) {
606     if (LocalAddrPair.second.first) {
607       LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
608         return LocalAddrPair.second.second;
609       });
610     }
611   }
612   (void)LocalScope.Privatize();
613   for (const auto &VLASizePair : VLASizes)
614     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
615   PGO.assignRegionCounters(GlobalDecl(CD), F);
616   CapturedStmtInfo->EmitBody(*this, CD->getBody());
617   (void)LocalScope.ForceCleanup();
618   FinishFunction(CD->getBodyRBrace());
619   if (!NeedWrapperFunction)
620     return F;
621 
622   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
623                             /*RegisterCastedArgsOnly=*/true,
624                             CapturedStmtInfo->getHelperName(), Loc);
625   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
626   WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
627   Args.clear();
628   LocalAddrs.clear();
629   VLASizes.clear();
630   llvm::Function *WrapperF =
631       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
632                                    WrapperCGF.CXXThisValue, WrapperFO);
633   llvm::SmallVector<llvm::Value *, 4> CallArgs;
634   for (const auto *Arg : Args) {
635     llvm::Value *CallArg;
636     auto I = LocalAddrs.find(Arg);
637     if (I != LocalAddrs.end()) {
638       LValue LV = WrapperCGF.MakeAddrLValue(
639           I->second.second,
640           I->second.first ? I->second.first->getType() : Arg->getType(),
641           AlignmentSource::Decl);
642       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
643     } else {
644       auto EI = VLASizes.find(Arg);
645       if (EI != VLASizes.end()) {
646         CallArg = EI->second.second;
647       } else {
648         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
649                                               Arg->getType(),
650                                               AlignmentSource::Decl);
651         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
652       }
653     }
654     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
655   }
656   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
657   WrapperCGF.FinishFunction();
658   return WrapperF;
659 }
660 
661 //===----------------------------------------------------------------------===//
662 //                              OpenMP Directive Emission
663 //===----------------------------------------------------------------------===//
664 void CodeGenFunction::EmitOMPAggregateAssign(
665     Address DestAddr, Address SrcAddr, QualType OriginalType,
666     const llvm::function_ref<void(Address, Address)> CopyGen) {
667   // Perform element-by-element initialization.
668   QualType ElementTy;
669 
670   // Drill down to the base element type on both arrays.
671   const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
672   llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
673   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
674 
675   llvm::Value *SrcBegin = SrcAddr.getPointer();
676   llvm::Value *DestBegin = DestAddr.getPointer();
677   // Cast from pointer to array type to pointer to single element.
678   llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
679   // The basic structure here is a while-do loop.
680   llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
681   llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
682   llvm::Value *IsEmpty =
683       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
684   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
685 
686   // Enter the loop body, making that address the current address.
687   llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
688   EmitBlock(BodyBB);
689 
690   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
691 
692   llvm::PHINode *SrcElementPHI =
693     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
694   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695   Address SrcElementCurrent =
696       Address(SrcElementPHI,
697               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 
699   llvm::PHINode *DestElementPHI =
700     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701   DestElementPHI->addIncoming(DestBegin, EntryBB);
702   Address DestElementCurrent =
703     Address(DestElementPHI,
704             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705 
706   // Emit copy.
707   CopyGen(DestElementCurrent, SrcElementCurrent);
708 
709   // Shift the address forward by one element.
710   llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
711       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
712   llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
713       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
714   // Check whether we've reached the end.
715   llvm::Value *Done =
716       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
717   Builder.CreateCondBr(Done, DoneBB, BodyBB);
718   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
719   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
720 
721   // Done.
722   EmitBlock(DoneBB, /*IsFinished=*/true);
723 }
724 
725 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
726                                   Address SrcAddr, const VarDecl *DestVD,
727                                   const VarDecl *SrcVD, const Expr *Copy) {
728   if (OriginalType->isArrayType()) {
729     const auto *BO = dyn_cast<BinaryOperator>(Copy);
730     if (BO && BO->getOpcode() == BO_Assign) {
731       // Perform simple memcpy for simple copying.
732       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
733       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
734       EmitAggregateAssign(Dest, Src, OriginalType);
735     } else {
736       // For arrays with complex element types perform element by element
737       // copying.
738       EmitOMPAggregateAssign(
739           DestAddr, SrcAddr, OriginalType,
740           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
741             // Working with the single array element, so have to remap
742             // destination and source variables to corresponding array
743             // elements.
744             CodeGenFunction::OMPPrivateScope Remap(*this);
745             Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
746             Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
747             (void)Remap.Privatize();
748             EmitIgnoredExpr(Copy);
749           });
750     }
751   } else {
752     // Remap pseudo source variable to private copy.
753     CodeGenFunction::OMPPrivateScope Remap(*this);
754     Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
755     Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
756     (void)Remap.Privatize();
757     // Emit copying of the whole variable.
758     EmitIgnoredExpr(Copy);
759   }
760 }
761 
762 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
763                                                 OMPPrivateScope &PrivateScope) {
764   if (!HaveInsertPoint())
765     return false;
766   bool DeviceConstTarget =
767       getLangOpts().OpenMPIsDevice &&
768       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
769   bool FirstprivateIsLastprivate = false;
770   llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
771   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
772     for (const auto *D : C->varlists())
773       Lastprivates.try_emplace(
774           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
775           C->getKind());
776   }
777   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
778   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
779   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
780   // Force emission of the firstprivate copy if the directive does not emit
781   // outlined function, like omp for, omp simd, omp distribute etc.
782   bool MustEmitFirstprivateCopy =
783       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
784   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
785     const auto *IRef = C->varlist_begin();
786     const auto *InitsRef = C->inits().begin();
787     for (const Expr *IInit : C->private_copies()) {
788       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
789       bool ThisFirstprivateIsLastprivate =
790           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
791       const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
792       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
793       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
794           !FD->getType()->isReferenceType() &&
795           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
796         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
797         ++IRef;
798         ++InitsRef;
799         continue;
800       }
801       // Do not emit copy for firstprivate constant variables in target regions,
802       // captured by reference.
803       if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
804           FD && FD->getType()->isReferenceType() &&
805           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
806         (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
807                                                                     OrigVD);
808         ++IRef;
809         ++InitsRef;
810         continue;
811       }
812       FirstprivateIsLastprivate =
813           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
814       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
815         const auto *VDInit =
816             cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
817         bool IsRegistered;
818         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
819                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
820                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
821         LValue OriginalLVal;
822         if (!FD) {
823           // Check if the firstprivate variable is just a constant value.
824           ConstantEmission CE = tryEmitAsConstant(&DRE);
825           if (CE && !CE.isReference()) {
826             // Constant value, no need to create a copy.
827             ++IRef;
828             ++InitsRef;
829             continue;
830           }
831           if (CE && CE.isReference()) {
832             OriginalLVal = CE.getReferenceLValue(*this, &DRE);
833           } else {
834             assert(!CE && "Expected non-constant firstprivate.");
835             OriginalLVal = EmitLValue(&DRE);
836           }
837         } else {
838           OriginalLVal = EmitLValue(&DRE);
839         }
840         QualType Type = VD->getType();
841         if (Type->isArrayType()) {
842           // Emit VarDecl with copy init for arrays.
843           // Get the address of the original variable captured in current
844           // captured region.
845           IsRegistered = PrivateScope.addPrivate(
846               OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
847                 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
848                 const Expr *Init = VD->getInit();
849                 if (!isa<CXXConstructExpr>(Init) ||
850                     isTrivialInitializer(Init)) {
851                   // Perform simple memcpy.
852                   LValue Dest =
853                       MakeAddrLValue(Emission.getAllocatedAddress(), Type);
854                   EmitAggregateAssign(Dest, OriginalLVal, Type);
855                 } else {
856                   EmitOMPAggregateAssign(
857                       Emission.getAllocatedAddress(),
858                       OriginalLVal.getAddress(*this), Type,
859                       [this, VDInit, Init](Address DestElement,
860                                            Address SrcElement) {
861                         // Clean up any temporaries needed by the
862                         // initialization.
863                         RunCleanupsScope InitScope(*this);
864                         // Emit initialization for single element.
865                         setAddrOfLocalVar(VDInit, SrcElement);
866                         EmitAnyExprToMem(Init, DestElement,
867                                          Init->getType().getQualifiers(),
868                                          /*IsInitializer*/ false);
869                         LocalDeclMap.erase(VDInit);
870                       });
871                 }
872                 EmitAutoVarCleanups(Emission);
873                 return Emission.getAllocatedAddress();
874               });
875         } else {
876           Address OriginalAddr = OriginalLVal.getAddress(*this);
877           IsRegistered =
878               PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
879                                                ThisFirstprivateIsLastprivate,
880                                                OrigVD, &Lastprivates, IRef]() {
881                 // Emit private VarDecl with copy init.
882                 // Remap temp VDInit variable to the address of the original
883                 // variable (for proper handling of captured global variables).
884                 setAddrOfLocalVar(VDInit, OriginalAddr);
885                 EmitDecl(*VD);
886                 LocalDeclMap.erase(VDInit);
887                 if (ThisFirstprivateIsLastprivate &&
888                     Lastprivates[OrigVD->getCanonicalDecl()] ==
889                         OMPC_LASTPRIVATE_conditional) {
890                   // Create/init special variable for lastprivate conditionals.
891                   Address VDAddr =
892                       CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
893                           *this, OrigVD);
894                   llvm::Value *V = EmitLoadOfScalar(
895                       MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
896                                      AlignmentSource::Decl),
897                       (*IRef)->getExprLoc());
898                   EmitStoreOfScalar(V,
899                                     MakeAddrLValue(VDAddr, (*IRef)->getType(),
900                                                    AlignmentSource::Decl));
901                   LocalDeclMap.erase(VD);
902                   setAddrOfLocalVar(VD, VDAddr);
903                   return VDAddr;
904                 }
905                 return GetAddrOfLocalVar(VD);
906               });
907         }
908         assert(IsRegistered &&
909                "firstprivate var already registered as private");
910         // Silence the warning about unused variable.
911         (void)IsRegistered;
912       }
913       ++IRef;
914       ++InitsRef;
915     }
916   }
917   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
918 }
919 
920 void CodeGenFunction::EmitOMPPrivateClause(
921     const OMPExecutableDirective &D,
922     CodeGenFunction::OMPPrivateScope &PrivateScope) {
923   if (!HaveInsertPoint())
924     return;
925   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
926   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
927     auto IRef = C->varlist_begin();
928     for (const Expr *IInit : C->private_copies()) {
929       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
930       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
931         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
932         bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
933           // Emit private VarDecl with copy init.
934           EmitDecl(*VD);
935           return GetAddrOfLocalVar(VD);
936         });
937         assert(IsRegistered && "private var already registered as private");
938         // Silence the warning about unused variable.
939         (void)IsRegistered;
940       }
941       ++IRef;
942     }
943   }
944 }
945 
946 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
947   if (!HaveInsertPoint())
948     return false;
949   // threadprivate_var1 = master_threadprivate_var1;
950   // operator=(threadprivate_var2, master_threadprivate_var2);
951   // ...
952   // __kmpc_barrier(&loc, global_tid);
953   llvm::DenseSet<const VarDecl *> CopiedVars;
954   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
955   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
956     auto IRef = C->varlist_begin();
957     auto ISrcRef = C->source_exprs().begin();
958     auto IDestRef = C->destination_exprs().begin();
959     for (const Expr *AssignOp : C->assignment_ops()) {
960       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
961       QualType Type = VD->getType();
962       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
963         // Get the address of the master variable. If we are emitting code with
964         // TLS support, the address is passed from the master as field in the
965         // captured declaration.
966         Address MasterAddr = Address::invalid();
967         if (getLangOpts().OpenMPUseTLS &&
968             getContext().getTargetInfo().isTLSSupported()) {
969           assert(CapturedStmtInfo->lookup(VD) &&
970                  "Copyin threadprivates should have been captured!");
971           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
972                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
973           MasterAddr = EmitLValue(&DRE).getAddress(*this);
974           LocalDeclMap.erase(VD);
975         } else {
976           MasterAddr =
977             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
978                                         : CGM.GetAddrOfGlobal(VD),
979                     getContext().getDeclAlign(VD));
980         }
981         // Get the address of the threadprivate variable.
982         Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
983         if (CopiedVars.size() == 1) {
984           // At first check if current thread is a master thread. If it is, no
985           // need to copy data.
986           CopyBegin = createBasicBlock("copyin.not.master");
987           CopyEnd = createBasicBlock("copyin.not.master.end");
988           Builder.CreateCondBr(
989               Builder.CreateICmpNE(
990                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
991                   Builder.CreatePtrToInt(PrivateAddr.getPointer(),
992                                          CGM.IntPtrTy)),
993               CopyBegin, CopyEnd);
994           EmitBlock(CopyBegin);
995         }
996         const auto *SrcVD =
997             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
998         const auto *DestVD =
999             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1000         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1001       }
1002       ++IRef;
1003       ++ISrcRef;
1004       ++IDestRef;
1005     }
1006   }
1007   if (CopyEnd) {
1008     // Exit out of copying procedure for non-master thread.
1009     EmitBlock(CopyEnd, /*IsFinished=*/true);
1010     return true;
1011   }
1012   return false;
1013 }
1014 
1015 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1016     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1017   if (!HaveInsertPoint())
1018     return false;
1019   bool HasAtLeastOneLastprivate = false;
1020   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1021   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1022     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1023     for (const Expr *C : LoopDirective->counters()) {
1024       SIMDLCVs.insert(
1025           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1026     }
1027   }
1028   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1029   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1030     HasAtLeastOneLastprivate = true;
1031     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1032         !getLangOpts().OpenMPSimd)
1033       break;
1034     const auto *IRef = C->varlist_begin();
1035     const auto *IDestRef = C->destination_exprs().begin();
1036     for (const Expr *IInit : C->private_copies()) {
1037       // Keep the address of the original variable for future update at the end
1038       // of the loop.
1039       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1040       // Taskloops do not require additional initialization, it is done in
1041       // runtime support library.
1042       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1043         const auto *DestVD =
1044             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1045         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1046           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1047                           /*RefersToEnclosingVariableOrCapture=*/
1048                               CapturedStmtInfo->lookup(OrigVD) != nullptr,
1049                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1050           return EmitLValue(&DRE).getAddress(*this);
1051         });
1052         // Check if the variable is also a firstprivate: in this case IInit is
1053         // not generated. Initialization of this variable will happen in codegen
1054         // for 'firstprivate' clause.
1055         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1056           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1057           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
1058                                                                OrigVD]() {
1059             if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1060               Address VDAddr =
1061                   CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
1062                                                                         OrigVD);
1063               setAddrOfLocalVar(VD, VDAddr);
1064               return VDAddr;
1065             }
1066             // Emit private VarDecl with copy init.
1067             EmitDecl(*VD);
1068             return GetAddrOfLocalVar(VD);
1069           });
1070           assert(IsRegistered &&
1071                  "lastprivate var already registered as private");
1072           (void)IsRegistered;
1073         }
1074       }
1075       ++IRef;
1076       ++IDestRef;
1077     }
1078   }
1079   return HasAtLeastOneLastprivate;
1080 }
1081 
1082 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1083     const OMPExecutableDirective &D, bool NoFinals,
1084     llvm::Value *IsLastIterCond) {
1085   if (!HaveInsertPoint())
1086     return;
1087   // Emit following code:
1088   // if (<IsLastIterCond>) {
1089   //   orig_var1 = private_orig_var1;
1090   //   ...
1091   //   orig_varn = private_orig_varn;
1092   // }
1093   llvm::BasicBlock *ThenBB = nullptr;
1094   llvm::BasicBlock *DoneBB = nullptr;
1095   if (IsLastIterCond) {
1096     // Emit implicit barrier if at least one lastprivate conditional is found
1097     // and this is not a simd mode.
1098     if (!getLangOpts().OpenMPSimd &&
1099         llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1100                      [](const OMPLastprivateClause *C) {
1101                        return C->getKind() == OMPC_LASTPRIVATE_conditional;
1102                      })) {
1103       CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1104                                              OMPD_unknown,
1105                                              /*EmitChecks=*/false,
1106                                              /*ForceSimpleCall=*/true);
1107     }
1108     ThenBB = createBasicBlock(".omp.lastprivate.then");
1109     DoneBB = createBasicBlock(".omp.lastprivate.done");
1110     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1111     EmitBlock(ThenBB);
1112   }
1113   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1114   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1115   if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1116     auto IC = LoopDirective->counters().begin();
1117     for (const Expr *F : LoopDirective->finals()) {
1118       const auto *D =
1119           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1120       if (NoFinals)
1121         AlreadyEmittedVars.insert(D);
1122       else
1123         LoopCountersAndUpdates[D] = F;
1124       ++IC;
1125     }
1126   }
1127   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1128     auto IRef = C->varlist_begin();
1129     auto ISrcRef = C->source_exprs().begin();
1130     auto IDestRef = C->destination_exprs().begin();
1131     for (const Expr *AssignOp : C->assignment_ops()) {
1132       const auto *PrivateVD =
1133           cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1134       QualType Type = PrivateVD->getType();
1135       const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1136       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1137         // If lastprivate variable is a loop control variable for loop-based
1138         // directive, update its value before copyin back to original
1139         // variable.
1140         if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1141           EmitIgnoredExpr(FinalExpr);
1142         const auto *SrcVD =
1143             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1144         const auto *DestVD =
1145             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1146         // Get the address of the private variable.
1147         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1148         if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1149           PrivateAddr =
1150               Address(Builder.CreateLoad(PrivateAddr),
1151                       CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1152         // Store the last value to the private copy in the last iteration.
1153         if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1154           CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1155               *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1156               (*IRef)->getExprLoc());
1157         // Get the address of the original variable.
1158         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1159         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1160       }
1161       ++IRef;
1162       ++ISrcRef;
1163       ++IDestRef;
1164     }
1165     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1166       EmitIgnoredExpr(PostUpdate);
1167   }
1168   if (IsLastIterCond)
1169     EmitBlock(DoneBB, /*IsFinished=*/true);
1170 }
1171 
1172 void CodeGenFunction::EmitOMPReductionClauseInit(
1173     const OMPExecutableDirective &D,
1174     CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1175   if (!HaveInsertPoint())
1176     return;
1177   SmallVector<const Expr *, 4> Shareds;
1178   SmallVector<const Expr *, 4> Privates;
1179   SmallVector<const Expr *, 4> ReductionOps;
1180   SmallVector<const Expr *, 4> LHSs;
1181   SmallVector<const Expr *, 4> RHSs;
1182   OMPTaskDataTy Data;
1183   SmallVector<const Expr *, 4> TaskLHSs;
1184   SmallVector<const Expr *, 4> TaskRHSs;
1185   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1186     if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1187       continue;
1188     Shareds.append(C->varlist_begin(), C->varlist_end());
1189     Privates.append(C->privates().begin(), C->privates().end());
1190     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1191     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1192     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1193     if (C->getModifier() == OMPC_REDUCTION_task) {
1194       Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1195       Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1196       Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1197       Data.ReductionOps.append(C->reduction_ops().begin(),
1198                                C->reduction_ops().end());
1199       TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1200       TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1201     }
1202   }
1203   ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1204   unsigned Count = 0;
1205   auto *ILHS = LHSs.begin();
1206   auto *IRHS = RHSs.begin();
1207   auto *IPriv = Privates.begin();
1208   for (const Expr *IRef : Shareds) {
1209     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1210     // Emit private VarDecl with reduction init.
1211     RedCG.emitSharedOrigLValue(*this, Count);
1212     RedCG.emitAggregateType(*this, Count);
1213     AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1214     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1215                              RedCG.getSharedLValue(Count),
1216                              [&Emission](CodeGenFunction &CGF) {
1217                                CGF.EmitAutoVarInit(Emission);
1218                                return true;
1219                              });
1220     EmitAutoVarCleanups(Emission);
1221     Address BaseAddr = RedCG.adjustPrivateAddress(
1222         *this, Count, Emission.getAllocatedAddress());
1223     bool IsRegistered = PrivateScope.addPrivate(
1224         RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1225     assert(IsRegistered && "private var already registered as private");
1226     // Silence the warning about unused variable.
1227     (void)IsRegistered;
1228 
1229     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1230     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1231     QualType Type = PrivateVD->getType();
1232     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1233     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1234       // Store the address of the original variable associated with the LHS
1235       // implicit variable.
1236       PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1237         return RedCG.getSharedLValue(Count).getAddress(*this);
1238       });
1239       PrivateScope.addPrivate(
1240           RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1241     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1242                isa<ArraySubscriptExpr>(IRef)) {
1243       // Store the address of the original variable associated with the LHS
1244       // implicit variable.
1245       PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1246         return RedCG.getSharedLValue(Count).getAddress(*this);
1247       });
1248       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1249         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1250                                             ConvertTypeForMem(RHSVD->getType()),
1251                                             "rhs.begin");
1252       });
1253     } else {
1254       QualType Type = PrivateVD->getType();
1255       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1256       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1257       // Store the address of the original variable associated with the LHS
1258       // implicit variable.
1259       if (IsArray) {
1260         OriginalAddr = Builder.CreateElementBitCast(
1261             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1262       }
1263       PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1264       PrivateScope.addPrivate(
1265           RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1266             return IsArray
1267                        ? Builder.CreateElementBitCast(
1268                              GetAddrOfLocalVar(PrivateVD),
1269                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1270                        : GetAddrOfLocalVar(PrivateVD);
1271           });
1272     }
1273     ++ILHS;
1274     ++IRHS;
1275     ++IPriv;
1276     ++Count;
1277   }
1278   if (!Data.ReductionVars.empty()) {
1279     Data.IsReductionWithTaskMod = true;
1280     Data.IsWorksharingReduction =
1281         isOpenMPWorksharingDirective(D.getDirectiveKind());
1282     llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1283         *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1284     const Expr *TaskRedRef = nullptr;
1285     switch (D.getDirectiveKind()) {
1286     case OMPD_parallel:
1287       TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1288       break;
1289     case OMPD_for:
1290       TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1291       break;
1292     case OMPD_sections:
1293       TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1294       break;
1295     case OMPD_parallel_for:
1296       TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1297       break;
1298     case OMPD_parallel_master:
1299       TaskRedRef =
1300           cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1301       break;
1302     case OMPD_parallel_sections:
1303       TaskRedRef =
1304           cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1305       break;
1306     case OMPD_target_parallel:
1307       TaskRedRef =
1308           cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1309       break;
1310     case OMPD_target_parallel_for:
1311       TaskRedRef =
1312           cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1313       break;
1314     case OMPD_distribute_parallel_for:
1315       TaskRedRef =
1316           cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1317       break;
1318     case OMPD_teams_distribute_parallel_for:
1319       TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1320                        .getTaskReductionRefExpr();
1321       break;
1322     case OMPD_target_teams_distribute_parallel_for:
1323       TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1324                        .getTaskReductionRefExpr();
1325       break;
1326     case OMPD_simd:
1327     case OMPD_for_simd:
1328     case OMPD_section:
1329     case OMPD_single:
1330     case OMPD_master:
1331     case OMPD_critical:
1332     case OMPD_parallel_for_simd:
1333     case OMPD_task:
1334     case OMPD_taskyield:
1335     case OMPD_barrier:
1336     case OMPD_taskwait:
1337     case OMPD_taskgroup:
1338     case OMPD_flush:
1339     case OMPD_depobj:
1340     case OMPD_scan:
1341     case OMPD_ordered:
1342     case OMPD_atomic:
1343     case OMPD_teams:
1344     case OMPD_target:
1345     case OMPD_cancellation_point:
1346     case OMPD_cancel:
1347     case OMPD_target_data:
1348     case OMPD_target_enter_data:
1349     case OMPD_target_exit_data:
1350     case OMPD_taskloop:
1351     case OMPD_taskloop_simd:
1352     case OMPD_master_taskloop:
1353     case OMPD_master_taskloop_simd:
1354     case OMPD_parallel_master_taskloop:
1355     case OMPD_parallel_master_taskloop_simd:
1356     case OMPD_distribute:
1357     case OMPD_target_update:
1358     case OMPD_distribute_parallel_for_simd:
1359     case OMPD_distribute_simd:
1360     case OMPD_target_parallel_for_simd:
1361     case OMPD_target_simd:
1362     case OMPD_teams_distribute:
1363     case OMPD_teams_distribute_simd:
1364     case OMPD_teams_distribute_parallel_for_simd:
1365     case OMPD_target_teams:
1366     case OMPD_target_teams_distribute:
1367     case OMPD_target_teams_distribute_parallel_for_simd:
1368     case OMPD_target_teams_distribute_simd:
1369     case OMPD_declare_target:
1370     case OMPD_end_declare_target:
1371     case OMPD_threadprivate:
1372     case OMPD_allocate:
1373     case OMPD_declare_reduction:
1374     case OMPD_declare_mapper:
1375     case OMPD_declare_simd:
1376     case OMPD_requires:
1377     case OMPD_declare_variant:
1378     case OMPD_begin_declare_variant:
1379     case OMPD_end_declare_variant:
1380     case OMPD_unknown:
1381     default:
1382       llvm_unreachable("Enexpected directive with task reductions.");
1383     }
1384 
1385     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1386     EmitVarDecl(*VD);
1387     EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1388                       /*Volatile=*/false, TaskRedRef->getType());
1389   }
1390 }
1391 
1392 void CodeGenFunction::EmitOMPReductionClauseFinal(
1393     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1394   if (!HaveInsertPoint())
1395     return;
1396   llvm::SmallVector<const Expr *, 8> Privates;
1397   llvm::SmallVector<const Expr *, 8> LHSExprs;
1398   llvm::SmallVector<const Expr *, 8> RHSExprs;
1399   llvm::SmallVector<const Expr *, 8> ReductionOps;
1400   bool HasAtLeastOneReduction = false;
1401   bool IsReductionWithTaskMod = false;
1402   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1403     // Do not emit for inscan reductions.
1404     if (C->getModifier() == OMPC_REDUCTION_inscan)
1405       continue;
1406     HasAtLeastOneReduction = true;
1407     Privates.append(C->privates().begin(), C->privates().end());
1408     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1409     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1410     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1411     IsReductionWithTaskMod =
1412         IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1413   }
1414   if (HasAtLeastOneReduction) {
1415     if (IsReductionWithTaskMod) {
1416       CGM.getOpenMPRuntime().emitTaskReductionFini(
1417           *this, D.getBeginLoc(),
1418           isOpenMPWorksharingDirective(D.getDirectiveKind()));
1419     }
1420     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1421                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1422                       ReductionKind == OMPD_simd;
1423     bool SimpleReduction = ReductionKind == OMPD_simd;
1424     // Emit nowait reduction if nowait clause is present or directive is a
1425     // parallel directive (it always has implicit barrier).
1426     CGM.getOpenMPRuntime().emitReduction(
1427         *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1428         {WithNowait, SimpleReduction, ReductionKind});
1429   }
1430 }
1431 
1432 static void emitPostUpdateForReductionClause(
1433     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1434     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1435   if (!CGF.HaveInsertPoint())
1436     return;
1437   llvm::BasicBlock *DoneBB = nullptr;
1438   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1439     if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1440       if (!DoneBB) {
1441         if (llvm::Value *Cond = CondGen(CGF)) {
1442           // If the first post-update expression is found, emit conditional
1443           // block if it was requested.
1444           llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1445           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1446           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1447           CGF.EmitBlock(ThenBB);
1448         }
1449       }
1450       CGF.EmitIgnoredExpr(PostUpdate);
1451     }
1452   }
1453   if (DoneBB)
1454     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1455 }
1456 
1457 namespace {
1458 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1459 /// parallel function. This is necessary for combined constructs such as
1460 /// 'distribute parallel for'
1461 typedef llvm::function_ref<void(CodeGenFunction &,
1462                                 const OMPExecutableDirective &,
1463                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1464     CodeGenBoundParametersTy;
1465 } // anonymous namespace
1466 
1467 static void
1468 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1469                                      const OMPExecutableDirective &S) {
1470   if (CGF.getLangOpts().OpenMP < 50)
1471     return;
1472   llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1473   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1474     for (const Expr *Ref : C->varlists()) {
1475       if (!Ref->getType()->isScalarType())
1476         continue;
1477       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1478       if (!DRE)
1479         continue;
1480       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1481       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1482     }
1483   }
1484   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1485     for (const Expr *Ref : C->varlists()) {
1486       if (!Ref->getType()->isScalarType())
1487         continue;
1488       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1489       if (!DRE)
1490         continue;
1491       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1492       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1493     }
1494   }
1495   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1496     for (const Expr *Ref : C->varlists()) {
1497       if (!Ref->getType()->isScalarType())
1498         continue;
1499       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1500       if (!DRE)
1501         continue;
1502       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1503       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1504     }
1505   }
1506   // Privates should ne analyzed since they are not captured at all.
1507   // Task reductions may be skipped - tasks are ignored.
1508   // Firstprivates do not return value but may be passed by reference - no need
1509   // to check for updated lastprivate conditional.
1510   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1511     for (const Expr *Ref : C->varlists()) {
1512       if (!Ref->getType()->isScalarType())
1513         continue;
1514       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1515       if (!DRE)
1516         continue;
1517       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1518     }
1519   }
1520   CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1521       CGF, S, PrivateDecls);
1522 }
1523 
1524 static void emitCommonOMPParallelDirective(
1525     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1526     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1527     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1528   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1529   llvm::Function *OutlinedFn =
1530       CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1531           S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1532   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1533     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1534     llvm::Value *NumThreads =
1535         CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1536                            /*IgnoreResultAssign=*/true);
1537     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1538         CGF, NumThreads, NumThreadsClause->getBeginLoc());
1539   }
1540   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1541     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1542     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1543         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1544   }
1545   const Expr *IfCond = nullptr;
1546   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1547     if (C->getNameModifier() == OMPD_unknown ||
1548         C->getNameModifier() == OMPD_parallel) {
1549       IfCond = C->getCondition();
1550       break;
1551     }
1552   }
1553 
1554   OMPParallelScope Scope(CGF, S);
1555   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1556   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1557   // lower and upper bounds with the pragma 'for' chunking mechanism.
1558   // The following lambda takes care of appending the lower and upper bound
1559   // parameters when necessary
1560   CodeGenBoundParameters(CGF, S, CapturedVars);
1561   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1562   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1563                                               CapturedVars, IfCond);
1564 }
1565 
1566 static bool isAllocatableDecl(const VarDecl *VD) {
1567   const VarDecl *CVD = VD->getCanonicalDecl();
1568   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1569     return false;
1570   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1571   // Use the default allocation.
1572   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1573             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1574            !AA->getAllocator());
1575 }
1576 
1577 static void emitEmptyBoundParameters(CodeGenFunction &,
1578                                      const OMPExecutableDirective &,
1579                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1580 
1581 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1582     CodeGenFunction &CGF, const VarDecl *VD) {
1583   CodeGenModule &CGM = CGF.CGM;
1584   auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1585 
1586   if (!VD)
1587     return Address::invalid();
1588   const VarDecl *CVD = VD->getCanonicalDecl();
1589   if (!isAllocatableDecl(CVD))
1590     return Address::invalid();
1591   llvm::Value *Size;
1592   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1593   if (CVD->getType()->isVariablyModifiedType()) {
1594     Size = CGF.getTypeSize(CVD->getType());
1595     // Align the size: ((size + align - 1) / align) * align
1596     Size = CGF.Builder.CreateNUWAdd(
1597         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1598     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1599     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1600   } else {
1601     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1602     Size = CGM.getSize(Sz.alignTo(Align));
1603   }
1604 
1605   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1606   assert(AA->getAllocator() &&
1607          "Expected allocator expression for non-default allocator.");
1608   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1609   // According to the standard, the original allocator type is a enum (integer).
1610   // Convert to pointer type, if required.
1611   if (Allocator->getType()->isIntegerTy())
1612     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1613   else if (Allocator->getType()->isPointerTy())
1614     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1615                                                                 CGM.VoidPtrTy);
1616 
1617   llvm::Value *Addr = OMPBuilder.CreateOMPAlloc(
1618       CGF.Builder, Size, Allocator,
1619       getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1620   llvm::CallInst *FreeCI =
1621       OMPBuilder.CreateOMPFree(CGF.Builder, Addr, Allocator);
1622 
1623   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1624   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1625       Addr,
1626       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1627       getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1628   return Address(Addr, Align);
1629 }
1630 
1631 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1632     CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1633     SourceLocation Loc) {
1634   CodeGenModule &CGM = CGF.CGM;
1635   if (CGM.getLangOpts().OpenMPUseTLS &&
1636       CGM.getContext().getTargetInfo().isTLSSupported())
1637     return VDAddr;
1638 
1639   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1640 
1641   llvm::Type *VarTy = VDAddr.getElementType();
1642   llvm::Value *Data =
1643       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1644   llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1645   std::string Suffix = getNameWithSeparators({"cache", ""});
1646   llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1647 
1648   llvm::CallInst *ThreadPrivateCacheCall =
1649       OMPBuilder.CreateCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1650 
1651   return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1652 }
1653 
1654 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1655     ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1656   SmallString<128> Buffer;
1657   llvm::raw_svector_ostream OS(Buffer);
1658   StringRef Sep = FirstSeparator;
1659   for (StringRef Part : Parts) {
1660     OS << Sep << Part;
1661     Sep = Separator;
1662   }
1663   return OS.str().str();
1664 }
1665 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1666   if (CGM.getLangOpts().OpenMPIRBuilder) {
1667     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1668     // Check if we have any if clause associated with the directive.
1669     llvm::Value *IfCond = nullptr;
1670     if (const auto *C = S.getSingleClause<OMPIfClause>())
1671       IfCond = EmitScalarExpr(C->getCondition(),
1672                               /*IgnoreResultAssign=*/true);
1673 
1674     llvm::Value *NumThreads = nullptr;
1675     if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1676       NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1677                                   /*IgnoreResultAssign=*/true);
1678 
1679     ProcBindKind ProcBind = OMP_PROC_BIND_default;
1680     if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1681       ProcBind = ProcBindClause->getProcBindKind();
1682 
1683     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1684 
1685     // The cleanup callback that finalizes all variabels at the given location,
1686     // thus calls destructors etc.
1687     auto FiniCB = [this](InsertPointTy IP) {
1688       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1689     };
1690 
1691     // Privatization callback that performs appropriate action for
1692     // shared/private/firstprivate/lastprivate/copyin/... variables.
1693     //
1694     // TODO: This defaults to shared right now.
1695     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1696                      llvm::Value &Val, llvm::Value *&ReplVal) {
1697       // The next line is appropriate only for variables (Val) with the
1698       // data-sharing attribute "shared".
1699       ReplVal = &Val;
1700 
1701       return CodeGenIP;
1702     };
1703 
1704     const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1705     const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1706 
1707     auto BodyGenCB = [ParallelRegionBodyStmt,
1708                       this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1709                             llvm::BasicBlock &ContinuationBB) {
1710       OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1711                                                       ContinuationBB);
1712       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1713                                              CodeGenIP, ContinuationBB);
1714     };
1715 
1716     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1717     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1718     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1719         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1720     Builder.restoreIP(
1721         OMPBuilder.CreateParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1722                                   IfCond, NumThreads, ProcBind, S.hasCancel()));
1723     return;
1724   }
1725 
1726   // Emit parallel region as a standalone region.
1727   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1728     Action.Enter(CGF);
1729     OMPPrivateScope PrivateScope(CGF);
1730     bool Copyins = CGF.EmitOMPCopyinClause(S);
1731     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1732     if (Copyins) {
1733       // Emit implicit barrier to synchronize threads and avoid data races on
1734       // propagation master's thread values of threadprivate variables to local
1735       // instances of that variables of all other implicit threads.
1736       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1737           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1738           /*ForceSimpleCall=*/true);
1739     }
1740     CGF.EmitOMPPrivateClause(S, PrivateScope);
1741     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1742     (void)PrivateScope.Privatize();
1743     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1744     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1745   };
1746   {
1747     auto LPCRegion =
1748         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1749     emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1750                                    emitEmptyBoundParameters);
1751     emitPostUpdateForReductionClause(*this, S,
1752                                      [](CodeGenFunction &) { return nullptr; });
1753   }
1754   // Check for outer lastprivate conditional update.
1755   checkForLastprivateConditionalUpdate(*this, S);
1756 }
1757 
1758 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1759                      int MaxLevel, int Level = 0) {
1760   assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1761   const Stmt *SimplifiedS = S->IgnoreContainers();
1762   if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1763     PrettyStackTraceLoc CrashInfo(
1764         CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1765         "LLVM IR generation of compound statement ('{}')");
1766 
1767     // Keep track of the current cleanup stack depth, including debug scopes.
1768     CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1769     for (const Stmt *CurStmt : CS->body())
1770       emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1771     return;
1772   }
1773   if (SimplifiedS == NextLoop) {
1774     if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1775       S = For->getBody();
1776     } else {
1777       assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1778              "Expected canonical for loop or range-based for loop.");
1779       const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1780       CGF.EmitStmt(CXXFor->getLoopVarStmt());
1781       S = CXXFor->getBody();
1782     }
1783     if (Level + 1 < MaxLevel) {
1784       NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1785           S, /*TryImperfectlyNestedLoops=*/true);
1786       emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1787       return;
1788     }
1789   }
1790   CGF.EmitStmt(S);
1791 }
1792 
1793 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1794                                       JumpDest LoopExit) {
1795   RunCleanupsScope BodyScope(*this);
1796   // Update counters values on current iteration.
1797   for (const Expr *UE : D.updates())
1798     EmitIgnoredExpr(UE);
1799   // Update the linear variables.
1800   // In distribute directives only loop counters may be marked as linear, no
1801   // need to generate the code for them.
1802   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1803     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1804       for (const Expr *UE : C->updates())
1805         EmitIgnoredExpr(UE);
1806     }
1807   }
1808 
1809   // On a continue in the body, jump to the end.
1810   JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1811   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1812   for (const Expr *E : D.finals_conditions()) {
1813     if (!E)
1814       continue;
1815     // Check that loop counter in non-rectangular nest fits into the iteration
1816     // space.
1817     llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1818     EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1819                          getProfileCount(D.getBody()));
1820     EmitBlock(NextBB);
1821   }
1822 
1823   OMPPrivateScope InscanScope(*this);
1824   EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1825   bool IsInscanRegion = InscanScope.Privatize();
1826   if (IsInscanRegion) {
1827     // Need to remember the block before and after scan directive
1828     // to dispatch them correctly depending on the clause used in
1829     // this directive, inclusive or exclusive. For inclusive scan the natural
1830     // order of the blocks is used, for exclusive clause the blocks must be
1831     // executed in reverse order.
1832     OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1833     OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1834     // No need to allocate inscan exit block, in simd mode it is selected in the
1835     // codegen for the scan directive.
1836     if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1837       OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1838     OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1839     EmitBranch(OMPScanDispatch);
1840     EmitBlock(OMPBeforeScanBlock);
1841   }
1842 
1843   // Emit loop variables for C++ range loops.
1844   const Stmt *Body =
1845       D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1846   // Emit loop body.
1847   emitBody(*this, Body,
1848            OMPLoopDirective::tryToFindNextInnerLoop(
1849                Body, /*TryImperfectlyNestedLoops=*/true),
1850            D.getCollapsedNumber());
1851 
1852   // Jump to the dispatcher at the end of the loop body.
1853   if (IsInscanRegion)
1854     EmitBranch(OMPScanExitBlock);
1855 
1856   // The end (updates/cleanups).
1857   EmitBlock(Continue.getBlock());
1858   BreakContinueStack.pop_back();
1859 }
1860 
1861 void CodeGenFunction::EmitOMPInnerLoop(
1862     const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
1863     const Expr *IncExpr,
1864     const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
1865     const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
1866   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1867 
1868   // Start the loop with a block that tests the condition.
1869   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1870   EmitBlock(CondBlock);
1871   const SourceRange R = S.getSourceRange();
1872 
1873   // If attributes are attached, push to the basic block with them.
1874   const auto &OMPED = cast<OMPExecutableDirective>(S);
1875   const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
1876   const Stmt *SS = ICS->getCapturedStmt();
1877   const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
1878   if (AS)
1879     LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
1880                    AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
1881                    SourceLocToDebugLoc(R.getEnd()));
1882   else
1883     LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1884                    SourceLocToDebugLoc(R.getEnd()));
1885 
1886   // If there are any cleanups between here and the loop-exit scope,
1887   // create a block to stage a loop exit along.
1888   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
1889   if (RequiresCleanup)
1890     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1891 
1892   llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
1893 
1894   // Emit condition.
1895   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1896   if (ExitBlock != LoopExit.getBlock()) {
1897     EmitBlock(ExitBlock);
1898     EmitBranchThroughCleanup(LoopExit);
1899   }
1900 
1901   EmitBlock(LoopBody);
1902   incrementProfileCounter(&S);
1903 
1904   // Create a block for the increment.
1905   JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1906   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1907 
1908   BodyGen(*this);
1909 
1910   // Emit "IV = IV + 1" and a back-edge to the condition block.
1911   EmitBlock(Continue.getBlock());
1912   EmitIgnoredExpr(IncExpr);
1913   PostIncGen(*this);
1914   BreakContinueStack.pop_back();
1915   EmitBranch(CondBlock);
1916   LoopStack.pop();
1917   // Emit the fall-through block.
1918   EmitBlock(LoopExit.getBlock());
1919 }
1920 
1921 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1922   if (!HaveInsertPoint())
1923     return false;
1924   // Emit inits for the linear variables.
1925   bool HasLinears = false;
1926   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1927     for (const Expr *Init : C->inits()) {
1928       HasLinears = true;
1929       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1930       if (const auto *Ref =
1931               dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1932         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1933         const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1934         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1935                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1936                         VD->getInit()->getType(), VK_LValue,
1937                         VD->getInit()->getExprLoc());
1938         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1939                                                 VD->getType()),
1940                        /*capturedByInit=*/false);
1941         EmitAutoVarCleanups(Emission);
1942       } else {
1943         EmitVarDecl(*VD);
1944       }
1945     }
1946     // Emit the linear steps for the linear clauses.
1947     // If a step is not constant, it is pre-calculated before the loop.
1948     if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1949       if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1950         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1951         // Emit calculation of the linear step.
1952         EmitIgnoredExpr(CS);
1953       }
1954   }
1955   return HasLinears;
1956 }
1957 
1958 void CodeGenFunction::EmitOMPLinearClauseFinal(
1959     const OMPLoopDirective &D,
1960     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1961   if (!HaveInsertPoint())
1962     return;
1963   llvm::BasicBlock *DoneBB = nullptr;
1964   // Emit the final values of the linear variables.
1965   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1966     auto IC = C->varlist_begin();
1967     for (const Expr *F : C->finals()) {
1968       if (!DoneBB) {
1969         if (llvm::Value *Cond = CondGen(*this)) {
1970           // If the first post-update expression is found, emit conditional
1971           // block if it was requested.
1972           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
1973           DoneBB = createBasicBlock(".omp.linear.pu.done");
1974           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1975           EmitBlock(ThenBB);
1976         }
1977       }
1978       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1979       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1980                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1981                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1982       Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
1983       CodeGenFunction::OMPPrivateScope VarScope(*this);
1984       VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1985       (void)VarScope.Privatize();
1986       EmitIgnoredExpr(F);
1987       ++IC;
1988     }
1989     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1990       EmitIgnoredExpr(PostUpdate);
1991   }
1992   if (DoneBB)
1993     EmitBlock(DoneBB, /*IsFinished=*/true);
1994 }
1995 
1996 static void emitAlignedClause(CodeGenFunction &CGF,
1997                               const OMPExecutableDirective &D) {
1998   if (!CGF.HaveInsertPoint())
1999     return;
2000   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2001     llvm::APInt ClauseAlignment(64, 0);
2002     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2003       auto *AlignmentCI =
2004           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2005       ClauseAlignment = AlignmentCI->getValue();
2006     }
2007     for (const Expr *E : Clause->varlists()) {
2008       llvm::APInt Alignment(ClauseAlignment);
2009       if (Alignment == 0) {
2010         // OpenMP [2.8.1, Description]
2011         // If no optional parameter is specified, implementation-defined default
2012         // alignments for SIMD instructions on the target platforms are assumed.
2013         Alignment =
2014             CGF.getContext()
2015                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2016                     E->getType()->getPointeeType()))
2017                 .getQuantity();
2018       }
2019       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2020              "alignment is not power of 2");
2021       if (Alignment != 0) {
2022         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2023         CGF.emitAlignmentAssumption(
2024             PtrValue, E, /*No second loc needed*/ SourceLocation(),
2025             llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2026       }
2027     }
2028   }
2029 }
2030 
2031 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2032     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2033   if (!HaveInsertPoint())
2034     return;
2035   auto I = S.private_counters().begin();
2036   for (const Expr *E : S.counters()) {
2037     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2038     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2039     // Emit var without initialization.
2040     AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2041     EmitAutoVarCleanups(VarEmission);
2042     LocalDeclMap.erase(PrivateVD);
2043     (void)LoopScope.addPrivate(VD, [&VarEmission]() {
2044       return VarEmission.getAllocatedAddress();
2045     });
2046     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2047         VD->hasGlobalStorage()) {
2048       (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2049         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2050                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2051                         E->getType(), VK_LValue, E->getExprLoc());
2052         return EmitLValue(&DRE).getAddress(*this);
2053       });
2054     } else {
2055       (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2056         return VarEmission.getAllocatedAddress();
2057       });
2058     }
2059     ++I;
2060   }
2061   // Privatize extra loop counters used in loops for ordered(n) clauses.
2062   for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2063     if (!C->getNumForLoops())
2064       continue;
2065     for (unsigned I = S.getCollapsedNumber(),
2066                   E = C->getLoopNumIterations().size();
2067          I < E; ++I) {
2068       const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2069       const auto *VD = cast<VarDecl>(DRE->getDecl());
2070       // Override only those variables that can be captured to avoid re-emission
2071       // of the variables declared within the loops.
2072       if (DRE->refersToEnclosingVariableOrCapture()) {
2073         (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2074           return CreateMemTemp(DRE->getType(), VD->getName());
2075         });
2076       }
2077     }
2078   }
2079 }
2080 
2081 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2082                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
2083                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2084   if (!CGF.HaveInsertPoint())
2085     return;
2086   {
2087     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2088     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2089     (void)PreCondScope.Privatize();
2090     // Get initial values of real counters.
2091     for (const Expr *I : S.inits()) {
2092       CGF.EmitIgnoredExpr(I);
2093     }
2094   }
2095   // Create temp loop control variables with their init values to support
2096   // non-rectangular loops.
2097   CodeGenFunction::OMPMapVars PreCondVars;
2098   for (const Expr * E: S.dependent_counters()) {
2099     if (!E)
2100       continue;
2101     assert(!E->getType().getNonReferenceType()->isRecordType() &&
2102            "dependent counter must not be an iterator.");
2103     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2104     Address CounterAddr =
2105         CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2106     (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2107   }
2108   (void)PreCondVars.apply(CGF);
2109   for (const Expr *E : S.dependent_inits()) {
2110     if (!E)
2111       continue;
2112     CGF.EmitIgnoredExpr(E);
2113   }
2114   // Check that loop is executed at least one time.
2115   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2116   PreCondVars.restore(CGF);
2117 }
2118 
2119 void CodeGenFunction::EmitOMPLinearClause(
2120     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2121   if (!HaveInsertPoint())
2122     return;
2123   llvm::DenseSet<const VarDecl *> SIMDLCVs;
2124   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2125     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2126     for (const Expr *C : LoopDirective->counters()) {
2127       SIMDLCVs.insert(
2128           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2129     }
2130   }
2131   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2132     auto CurPrivate = C->privates().begin();
2133     for (const Expr *E : C->varlists()) {
2134       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2135       const auto *PrivateVD =
2136           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2137       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2138         bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2139           // Emit private VarDecl with copy init.
2140           EmitVarDecl(*PrivateVD);
2141           return GetAddrOfLocalVar(PrivateVD);
2142         });
2143         assert(IsRegistered && "linear var already registered as private");
2144         // Silence the warning about unused variable.
2145         (void)IsRegistered;
2146       } else {
2147         EmitVarDecl(*PrivateVD);
2148       }
2149       ++CurPrivate;
2150     }
2151   }
2152 }
2153 
2154 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2155                                      const OMPExecutableDirective &D,
2156                                      bool IsMonotonic) {
2157   if (!CGF.HaveInsertPoint())
2158     return;
2159   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2160     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2161                                  /*ignoreResult=*/true);
2162     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2163     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2164     // In presence of finite 'safelen', it may be unsafe to mark all
2165     // the memory instructions parallel, because loop-carried
2166     // dependences of 'safelen' iterations are possible.
2167     if (!IsMonotonic)
2168       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2169   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2170     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2171                                  /*ignoreResult=*/true);
2172     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2173     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2174     // In presence of finite 'safelen', it may be unsafe to mark all
2175     // the memory instructions parallel, because loop-carried
2176     // dependences of 'safelen' iterations are possible.
2177     CGF.LoopStack.setParallel(/*Enable=*/false);
2178   }
2179 }
2180 
2181 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
2182                                       bool IsMonotonic) {
2183   // Walk clauses and process safelen/lastprivate.
2184   LoopStack.setParallel(!IsMonotonic);
2185   LoopStack.setVectorizeEnable();
2186   emitSimdlenSafelenClause(*this, D, IsMonotonic);
2187   if (const auto *C = D.getSingleClause<OMPOrderClause>())
2188     if (C->getKind() == OMPC_ORDER_concurrent)
2189       LoopStack.setParallel(/*Enable=*/true);
2190   if ((D.getDirectiveKind() == OMPD_simd ||
2191        (getLangOpts().OpenMPSimd &&
2192         isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2193       llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2194                    [](const OMPReductionClause *C) {
2195                      return C->getModifier() == OMPC_REDUCTION_inscan;
2196                    }))
2197     // Disable parallel access in case of prefix sum.
2198     LoopStack.setParallel(/*Enable=*/false);
2199 }
2200 
2201 void CodeGenFunction::EmitOMPSimdFinal(
2202     const OMPLoopDirective &D,
2203     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2204   if (!HaveInsertPoint())
2205     return;
2206   llvm::BasicBlock *DoneBB = nullptr;
2207   auto IC = D.counters().begin();
2208   auto IPC = D.private_counters().begin();
2209   for (const Expr *F : D.finals()) {
2210     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2211     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2212     const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2213     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2214         OrigVD->hasGlobalStorage() || CED) {
2215       if (!DoneBB) {
2216         if (llvm::Value *Cond = CondGen(*this)) {
2217           // If the first post-update expression is found, emit conditional
2218           // block if it was requested.
2219           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2220           DoneBB = createBasicBlock(".omp.final.done");
2221           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2222           EmitBlock(ThenBB);
2223         }
2224       }
2225       Address OrigAddr = Address::invalid();
2226       if (CED) {
2227         OrigAddr =
2228             EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2229       } else {
2230         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2231                         /*RefersToEnclosingVariableOrCapture=*/false,
2232                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2233         OrigAddr = EmitLValue(&DRE).getAddress(*this);
2234       }
2235       OMPPrivateScope VarScope(*this);
2236       VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2237       (void)VarScope.Privatize();
2238       EmitIgnoredExpr(F);
2239     }
2240     ++IC;
2241     ++IPC;
2242   }
2243   if (DoneBB)
2244     EmitBlock(DoneBB, /*IsFinished=*/true);
2245 }
2246 
2247 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2248                                          const OMPLoopDirective &S,
2249                                          CodeGenFunction::JumpDest LoopExit) {
2250   CGF.EmitOMPLoopBody(S, LoopExit);
2251   CGF.EmitStopPoint(&S);
2252 }
2253 
2254 /// Emit a helper variable and return corresponding lvalue.
2255 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2256                                const DeclRefExpr *Helper) {
2257   auto VDecl = cast<VarDecl>(Helper->getDecl());
2258   CGF.EmitVarDecl(*VDecl);
2259   return CGF.EmitLValue(Helper);
2260 }
2261 
2262 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2263                                const RegionCodeGenTy &SimdInitGen,
2264                                const RegionCodeGenTy &BodyCodeGen) {
2265   auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2266                                                     PrePostActionTy &) {
2267     CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2268     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2269     SimdInitGen(CGF);
2270 
2271     BodyCodeGen(CGF);
2272   };
2273   auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2274     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2275     CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2276 
2277     BodyCodeGen(CGF);
2278   };
2279   const Expr *IfCond = nullptr;
2280   if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2281     for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2282       if (CGF.getLangOpts().OpenMP >= 50 &&
2283           (C->getNameModifier() == OMPD_unknown ||
2284            C->getNameModifier() == OMPD_simd)) {
2285         IfCond = C->getCondition();
2286         break;
2287       }
2288     }
2289   }
2290   if (IfCond) {
2291     CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2292   } else {
2293     RegionCodeGenTy ThenRCG(ThenGen);
2294     ThenRCG(CGF);
2295   }
2296 }
2297 
2298 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2299                               PrePostActionTy &Action) {
2300   Action.Enter(CGF);
2301   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2302          "Expected simd directive");
2303   OMPLoopScope PreInitScope(CGF, S);
2304   // if (PreCond) {
2305   //   for (IV in 0..LastIteration) BODY;
2306   //   <Final counter/linear vars updates>;
2307   // }
2308   //
2309   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2310       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2311       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2312     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2313     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2314   }
2315 
2316   // Emit: if (PreCond) - begin.
2317   // If the condition constant folds and can be elided, avoid emitting the
2318   // whole loop.
2319   bool CondConstant;
2320   llvm::BasicBlock *ContBlock = nullptr;
2321   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2322     if (!CondConstant)
2323       return;
2324   } else {
2325     llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2326     ContBlock = CGF.createBasicBlock("simd.if.end");
2327     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2328                 CGF.getProfileCount(&S));
2329     CGF.EmitBlock(ThenBlock);
2330     CGF.incrementProfileCounter(&S);
2331   }
2332 
2333   // Emit the loop iteration variable.
2334   const Expr *IVExpr = S.getIterationVariable();
2335   const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2336   CGF.EmitVarDecl(*IVDecl);
2337   CGF.EmitIgnoredExpr(S.getInit());
2338 
2339   // Emit the iterations count variable.
2340   // If it is not a variable, Sema decided to calculate iterations count on
2341   // each iteration (e.g., it is foldable into a constant).
2342   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2343     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2344     // Emit calculation of the iterations count.
2345     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2346   }
2347 
2348   emitAlignedClause(CGF, S);
2349   (void)CGF.EmitOMPLinearClauseInit(S);
2350   {
2351     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2352     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2353     CGF.EmitOMPLinearClause(S, LoopScope);
2354     CGF.EmitOMPPrivateClause(S, LoopScope);
2355     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2356     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2357         CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2358     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2359     (void)LoopScope.Privatize();
2360     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2361       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2362 
2363     emitCommonSimdLoop(
2364         CGF, S,
2365         [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2366           CGF.EmitOMPSimdInit(S);
2367         },
2368         [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2369           CGF.EmitOMPInnerLoop(
2370               S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2371               [&S](CodeGenFunction &CGF) {
2372                 emitOMPLoopBodyWithStopPoint(CGF, S,
2373                                              CodeGenFunction::JumpDest());
2374               },
2375               [](CodeGenFunction &) {});
2376         });
2377     CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2378     // Emit final copy of the lastprivate variables at the end of loops.
2379     if (HasLastprivateClause)
2380       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2381     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2382     emitPostUpdateForReductionClause(CGF, S,
2383                                      [](CodeGenFunction &) { return nullptr; });
2384   }
2385   CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2386   // Emit: if (PreCond) - end.
2387   if (ContBlock) {
2388     CGF.EmitBranch(ContBlock);
2389     CGF.EmitBlock(ContBlock, true);
2390   }
2391 }
2392 
2393 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2394   ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2395   OMPFirstScanLoop = true;
2396   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2397     emitOMPSimdRegion(CGF, S, Action);
2398   };
2399   {
2400     auto LPCRegion =
2401         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2402     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2403     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2404   }
2405   // Check for outer lastprivate conditional update.
2406   checkForLastprivateConditionalUpdate(*this, S);
2407 }
2408 
2409 void CodeGenFunction::EmitOMPOuterLoop(
2410     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2411     CodeGenFunction::OMPPrivateScope &LoopScope,
2412     const CodeGenFunction::OMPLoopArguments &LoopArgs,
2413     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2414     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2415   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2416 
2417   const Expr *IVExpr = S.getIterationVariable();
2418   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2419   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2420 
2421   JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2422 
2423   // Start the loop with a block that tests the condition.
2424   llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2425   EmitBlock(CondBlock);
2426   const SourceRange R = S.getSourceRange();
2427   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2428                  SourceLocToDebugLoc(R.getEnd()));
2429 
2430   llvm::Value *BoolCondVal = nullptr;
2431   if (!DynamicOrOrdered) {
2432     // UB = min(UB, GlobalUB) or
2433     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2434     // 'distribute parallel for')
2435     EmitIgnoredExpr(LoopArgs.EUB);
2436     // IV = LB
2437     EmitIgnoredExpr(LoopArgs.Init);
2438     // IV < UB
2439     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2440   } else {
2441     BoolCondVal =
2442         RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2443                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2444   }
2445 
2446   // If there are any cleanups between here and the loop-exit scope,
2447   // create a block to stage a loop exit along.
2448   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2449   if (LoopScope.requiresCleanups())
2450     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2451 
2452   llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2453   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2454   if (ExitBlock != LoopExit.getBlock()) {
2455     EmitBlock(ExitBlock);
2456     EmitBranchThroughCleanup(LoopExit);
2457   }
2458   EmitBlock(LoopBody);
2459 
2460   // Emit "IV = LB" (in case of static schedule, we have already calculated new
2461   // LB for loop condition and emitted it above).
2462   if (DynamicOrOrdered)
2463     EmitIgnoredExpr(LoopArgs.Init);
2464 
2465   // Create a block for the increment.
2466   JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2467   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2468 
2469   emitCommonSimdLoop(
2470       *this, S,
2471       [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2472         // Generate !llvm.loop.parallel metadata for loads and stores for loops
2473         // with dynamic/guided scheduling and without ordered clause.
2474         if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2475           CGF.LoopStack.setParallel(!IsMonotonic);
2476           if (const auto *C = S.getSingleClause<OMPOrderClause>())
2477             if (C->getKind() == OMPC_ORDER_concurrent)
2478               CGF.LoopStack.setParallel(/*Enable=*/true);
2479         } else {
2480           CGF.EmitOMPSimdInit(S, IsMonotonic);
2481         }
2482       },
2483       [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2484        &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2485         SourceLocation Loc = S.getBeginLoc();
2486         // when 'distribute' is not combined with a 'for':
2487         // while (idx <= UB) { BODY; ++idx; }
2488         // when 'distribute' is combined with a 'for'
2489         // (e.g. 'distribute parallel for')
2490         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2491         CGF.EmitOMPInnerLoop(
2492             S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2493             [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2494               CodeGenLoop(CGF, S, LoopExit);
2495             },
2496             [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2497               CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2498             });
2499       });
2500 
2501   EmitBlock(Continue.getBlock());
2502   BreakContinueStack.pop_back();
2503   if (!DynamicOrOrdered) {
2504     // Emit "LB = LB + Stride", "UB = UB + Stride".
2505     EmitIgnoredExpr(LoopArgs.NextLB);
2506     EmitIgnoredExpr(LoopArgs.NextUB);
2507   }
2508 
2509   EmitBranch(CondBlock);
2510   LoopStack.pop();
2511   // Emit the fall-through block.
2512   EmitBlock(LoopExit.getBlock());
2513 
2514   // Tell the runtime we are done.
2515   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2516     if (!DynamicOrOrdered)
2517       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2518                                                      S.getDirectiveKind());
2519   };
2520   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2521 }
2522 
2523 void CodeGenFunction::EmitOMPForOuterLoop(
2524     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2525     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2526     const OMPLoopArguments &LoopArgs,
2527     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2528   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2529 
2530   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2531   const bool DynamicOrOrdered =
2532       Ordered || RT.isDynamic(ScheduleKind.Schedule);
2533 
2534   assert((Ordered ||
2535           !RT.isStaticNonchunked(ScheduleKind.Schedule,
2536                                  LoopArgs.Chunk != nullptr)) &&
2537          "static non-chunked schedule does not need outer loop");
2538 
2539   // Emit outer loop.
2540   //
2541   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2542   // When schedule(dynamic,chunk_size) is specified, the iterations are
2543   // distributed to threads in the team in chunks as the threads request them.
2544   // Each thread executes a chunk of iterations, then requests another chunk,
2545   // until no chunks remain to be distributed. Each chunk contains chunk_size
2546   // iterations, except for the last chunk to be distributed, which may have
2547   // fewer iterations. When no chunk_size is specified, it defaults to 1.
2548   //
2549   // When schedule(guided,chunk_size) is specified, the iterations are assigned
2550   // to threads in the team in chunks as the executing threads request them.
2551   // Each thread executes a chunk of iterations, then requests another chunk,
2552   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2553   // each chunk is proportional to the number of unassigned iterations divided
2554   // by the number of threads in the team, decreasing to 1. For a chunk_size
2555   // with value k (greater than 1), the size of each chunk is determined in the
2556   // same way, with the restriction that the chunks do not contain fewer than k
2557   // iterations (except for the last chunk to be assigned, which may have fewer
2558   // than k iterations).
2559   //
2560   // When schedule(auto) is specified, the decision regarding scheduling is
2561   // delegated to the compiler and/or runtime system. The programmer gives the
2562   // implementation the freedom to choose any possible mapping of iterations to
2563   // threads in the team.
2564   //
2565   // When schedule(runtime) is specified, the decision regarding scheduling is
2566   // deferred until run time, and the schedule and chunk size are taken from the
2567   // run-sched-var ICV. If the ICV is set to auto, the schedule is
2568   // implementation defined
2569   //
2570   // while(__kmpc_dispatch_next(&LB, &UB)) {
2571   //   idx = LB;
2572   //   while (idx <= UB) { BODY; ++idx;
2573   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2574   //   } // inner loop
2575   // }
2576   //
2577   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2578   // When schedule(static, chunk_size) is specified, iterations are divided into
2579   // chunks of size chunk_size, and the chunks are assigned to the threads in
2580   // the team in a round-robin fashion in the order of the thread number.
2581   //
2582   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2583   //   while (idx <= UB) { BODY; ++idx; } // inner loop
2584   //   LB = LB + ST;
2585   //   UB = UB + ST;
2586   // }
2587   //
2588 
2589   const Expr *IVExpr = S.getIterationVariable();
2590   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2591   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2592 
2593   if (DynamicOrOrdered) {
2594     const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2595         CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2596     llvm::Value *LBVal = DispatchBounds.first;
2597     llvm::Value *UBVal = DispatchBounds.second;
2598     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2599                                                              LoopArgs.Chunk};
2600     RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2601                            IVSigned, Ordered, DipatchRTInputValues);
2602   } else {
2603     CGOpenMPRuntime::StaticRTInput StaticInit(
2604         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2605         LoopArgs.ST, LoopArgs.Chunk);
2606     RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2607                          ScheduleKind, StaticInit);
2608   }
2609 
2610   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2611                                     const unsigned IVSize,
2612                                     const bool IVSigned) {
2613     if (Ordered) {
2614       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2615                                                             IVSigned);
2616     }
2617   };
2618 
2619   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2620                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2621   OuterLoopArgs.IncExpr = S.getInc();
2622   OuterLoopArgs.Init = S.getInit();
2623   OuterLoopArgs.Cond = S.getCond();
2624   OuterLoopArgs.NextLB = S.getNextLowerBound();
2625   OuterLoopArgs.NextUB = S.getNextUpperBound();
2626   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2627                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2628 }
2629 
2630 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2631                              const unsigned IVSize, const bool IVSigned) {}
2632 
2633 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2634     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2635     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2636     const CodeGenLoopTy &CodeGenLoopContent) {
2637 
2638   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2639 
2640   // Emit outer loop.
2641   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2642   // dynamic
2643   //
2644 
2645   const Expr *IVExpr = S.getIterationVariable();
2646   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2647   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2648 
2649   CGOpenMPRuntime::StaticRTInput StaticInit(
2650       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2651       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2652   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2653 
2654   // for combined 'distribute' and 'for' the increment expression of distribute
2655   // is stored in DistInc. For 'distribute' alone, it is in Inc.
2656   Expr *IncExpr;
2657   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2658     IncExpr = S.getDistInc();
2659   else
2660     IncExpr = S.getInc();
2661 
2662   // this routine is shared by 'omp distribute parallel for' and
2663   // 'omp distribute': select the right EUB expression depending on the
2664   // directive
2665   OMPLoopArguments OuterLoopArgs;
2666   OuterLoopArgs.LB = LoopArgs.LB;
2667   OuterLoopArgs.UB = LoopArgs.UB;
2668   OuterLoopArgs.ST = LoopArgs.ST;
2669   OuterLoopArgs.IL = LoopArgs.IL;
2670   OuterLoopArgs.Chunk = LoopArgs.Chunk;
2671   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2672                           ? S.getCombinedEnsureUpperBound()
2673                           : S.getEnsureUpperBound();
2674   OuterLoopArgs.IncExpr = IncExpr;
2675   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2676                            ? S.getCombinedInit()
2677                            : S.getInit();
2678   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2679                            ? S.getCombinedCond()
2680                            : S.getCond();
2681   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2682                              ? S.getCombinedNextLowerBound()
2683                              : S.getNextLowerBound();
2684   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2685                              ? S.getCombinedNextUpperBound()
2686                              : S.getNextUpperBound();
2687 
2688   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2689                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
2690                    emitEmptyOrdered);
2691 }
2692 
2693 static std::pair<LValue, LValue>
2694 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2695                                      const OMPExecutableDirective &S) {
2696   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2697   LValue LB =
2698       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2699   LValue UB =
2700       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2701 
2702   // When composing 'distribute' with 'for' (e.g. as in 'distribute
2703   // parallel for') we need to use the 'distribute'
2704   // chunk lower and upper bounds rather than the whole loop iteration
2705   // space. These are parameters to the outlined function for 'parallel'
2706   // and we copy the bounds of the previous schedule into the
2707   // the current ones.
2708   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2709   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2710   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2711       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2712   PrevLBVal = CGF.EmitScalarConversion(
2713       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2714       LS.getIterationVariable()->getType(),
2715       LS.getPrevLowerBoundVariable()->getExprLoc());
2716   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2717       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2718   PrevUBVal = CGF.EmitScalarConversion(
2719       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2720       LS.getIterationVariable()->getType(),
2721       LS.getPrevUpperBoundVariable()->getExprLoc());
2722 
2723   CGF.EmitStoreOfScalar(PrevLBVal, LB);
2724   CGF.EmitStoreOfScalar(PrevUBVal, UB);
2725 
2726   return {LB, UB};
2727 }
2728 
2729 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2730 /// we need to use the LB and UB expressions generated by the worksharing
2731 /// code generation support, whereas in non combined situations we would
2732 /// just emit 0 and the LastIteration expression
2733 /// This function is necessary due to the difference of the LB and UB
2734 /// types for the RT emission routines for 'for_static_init' and
2735 /// 'for_dispatch_init'
2736 static std::pair<llvm::Value *, llvm::Value *>
2737 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2738                                         const OMPExecutableDirective &S,
2739                                         Address LB, Address UB) {
2740   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2741   const Expr *IVExpr = LS.getIterationVariable();
2742   // when implementing a dynamic schedule for a 'for' combined with a
2743   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2744   // is not normalized as each team only executes its own assigned
2745   // distribute chunk
2746   QualType IteratorTy = IVExpr->getType();
2747   llvm::Value *LBVal =
2748       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2749   llvm::Value *UBVal =
2750       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2751   return {LBVal, UBVal};
2752 }
2753 
2754 static void emitDistributeParallelForDistributeInnerBoundParams(
2755     CodeGenFunction &CGF, const OMPExecutableDirective &S,
2756     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2757   const auto &Dir = cast<OMPLoopDirective>(S);
2758   LValue LB =
2759       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2760   llvm::Value *LBCast =
2761       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
2762                                 CGF.SizeTy, /*isSigned=*/false);
2763   CapturedVars.push_back(LBCast);
2764   LValue UB =
2765       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2766 
2767   llvm::Value *UBCast =
2768       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
2769                                 CGF.SizeTy, /*isSigned=*/false);
2770   CapturedVars.push_back(UBCast);
2771 }
2772 
2773 static void
2774 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2775                                  const OMPLoopDirective &S,
2776                                  CodeGenFunction::JumpDest LoopExit) {
2777   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2778                                          PrePostActionTy &Action) {
2779     Action.Enter(CGF);
2780     bool HasCancel = false;
2781     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2782       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2783         HasCancel = D->hasCancel();
2784       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2785         HasCancel = D->hasCancel();
2786       else if (const auto *D =
2787                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2788         HasCancel = D->hasCancel();
2789     }
2790     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2791                                                      HasCancel);
2792     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2793                                emitDistributeParallelForInnerBounds,
2794                                emitDistributeParallelForDispatchBounds);
2795   };
2796 
2797   emitCommonOMPParallelDirective(
2798       CGF, S,
2799       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2800       CGInlinedWorksharingLoop,
2801       emitDistributeParallelForDistributeInnerBoundParams);
2802 }
2803 
2804 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2805     const OMPDistributeParallelForDirective &S) {
2806   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2807     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2808                               S.getDistInc());
2809   };
2810   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2811   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2812 }
2813 
2814 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2815     const OMPDistributeParallelForSimdDirective &S) {
2816   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2817     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2818                               S.getDistInc());
2819   };
2820   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2821   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2822 }
2823 
2824 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2825     const OMPDistributeSimdDirective &S) {
2826   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2827     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2828   };
2829   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2830   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2831 }
2832 
2833 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2834     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2835   // Emit SPMD target parallel for region as a standalone region.
2836   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2837     emitOMPSimdRegion(CGF, S, Action);
2838   };
2839   llvm::Function *Fn;
2840   llvm::Constant *Addr;
2841   // Emit target region as a standalone region.
2842   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2843       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2844   assert(Fn && Addr && "Target device function emission failed.");
2845 }
2846 
2847 void CodeGenFunction::EmitOMPTargetSimdDirective(
2848     const OMPTargetSimdDirective &S) {
2849   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2850     emitOMPSimdRegion(CGF, S, Action);
2851   };
2852   emitCommonOMPTargetDirective(*this, S, CodeGen);
2853 }
2854 
2855 namespace {
2856   struct ScheduleKindModifiersTy {
2857     OpenMPScheduleClauseKind Kind;
2858     OpenMPScheduleClauseModifier M1;
2859     OpenMPScheduleClauseModifier M2;
2860     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2861                             OpenMPScheduleClauseModifier M1,
2862                             OpenMPScheduleClauseModifier M2)
2863         : Kind(Kind), M1(M1), M2(M2) {}
2864   };
2865 } // namespace
2866 
2867 bool CodeGenFunction::EmitOMPWorksharingLoop(
2868     const OMPLoopDirective &S, Expr *EUB,
2869     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2870     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2871   // Emit the loop iteration variable.
2872   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2873   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
2874   EmitVarDecl(*IVDecl);
2875 
2876   // Emit the iterations count variable.
2877   // If it is not a variable, Sema decided to calculate iterations count on each
2878   // iteration (e.g., it is foldable into a constant).
2879   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2880     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2881     // Emit calculation of the iterations count.
2882     EmitIgnoredExpr(S.getCalcLastIteration());
2883   }
2884 
2885   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2886 
2887   bool HasLastprivateClause;
2888   // Check pre-condition.
2889   {
2890     OMPLoopScope PreInitScope(*this, S);
2891     // Skip the entire loop if we don't meet the precondition.
2892     // If the condition constant folds and can be elided, avoid emitting the
2893     // whole loop.
2894     bool CondConstant;
2895     llvm::BasicBlock *ContBlock = nullptr;
2896     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2897       if (!CondConstant)
2898         return false;
2899     } else {
2900       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
2901       ContBlock = createBasicBlock("omp.precond.end");
2902       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2903                   getProfileCount(&S));
2904       EmitBlock(ThenBlock);
2905       incrementProfileCounter(&S);
2906     }
2907 
2908     RunCleanupsScope DoacrossCleanupScope(*this);
2909     bool Ordered = false;
2910     if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2911       if (OrderedClause->getNumForLoops())
2912         RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
2913       else
2914         Ordered = true;
2915     }
2916 
2917     llvm::DenseSet<const Expr *> EmittedFinals;
2918     emitAlignedClause(*this, S);
2919     bool HasLinears = EmitOMPLinearClauseInit(S);
2920     // Emit helper vars inits.
2921 
2922     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2923     LValue LB = Bounds.first;
2924     LValue UB = Bounds.second;
2925     LValue ST =
2926         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2927     LValue IL =
2928         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2929 
2930     // Emit 'then' code.
2931     {
2932       OMPPrivateScope LoopScope(*this);
2933       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2934         // Emit implicit barrier to synchronize threads and avoid data races on
2935         // initialization of firstprivate variables and post-update of
2936         // lastprivate variables.
2937         CGM.getOpenMPRuntime().emitBarrierCall(
2938             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2939             /*ForceSimpleCall=*/true);
2940       }
2941       EmitOMPPrivateClause(S, LoopScope);
2942       CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2943           *this, S, EmitLValue(S.getIterationVariable()));
2944       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2945       EmitOMPReductionClauseInit(S, LoopScope);
2946       EmitOMPPrivateLoopCounters(S, LoopScope);
2947       EmitOMPLinearClause(S, LoopScope);
2948       (void)LoopScope.Privatize();
2949       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2950         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
2951 
2952       // Detect the loop schedule kind and chunk.
2953       const Expr *ChunkExpr = nullptr;
2954       OpenMPScheduleTy ScheduleKind;
2955       if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
2956         ScheduleKind.Schedule = C->getScheduleKind();
2957         ScheduleKind.M1 = C->getFirstScheduleModifier();
2958         ScheduleKind.M2 = C->getSecondScheduleModifier();
2959         ChunkExpr = C->getChunkSize();
2960       } else {
2961         // Default behaviour for schedule clause.
2962         CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
2963             *this, S, ScheduleKind.Schedule, ChunkExpr);
2964       }
2965       bool HasChunkSizeOne = false;
2966       llvm::Value *Chunk = nullptr;
2967       if (ChunkExpr) {
2968         Chunk = EmitScalarExpr(ChunkExpr);
2969         Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
2970                                      S.getIterationVariable()->getType(),
2971                                      S.getBeginLoc());
2972         Expr::EvalResult Result;
2973         if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
2974           llvm::APSInt EvaluatedChunk = Result.Val.getInt();
2975           HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
2976         }
2977       }
2978       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2979       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2980       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2981       // If the static schedule kind is specified or if the ordered clause is
2982       // specified, and if no monotonic modifier is specified, the effect will
2983       // be as if the monotonic modifier was specified.
2984       bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
2985           /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
2986           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
2987       bool IsMonotonic =
2988           Ordered ||
2989           ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2990             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
2991            !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
2992              ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
2993           ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2994           ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2995       if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
2996                                  /* Chunked */ Chunk != nullptr) ||
2997            StaticChunkedOne) &&
2998           !Ordered) {
2999         JumpDest LoopExit =
3000             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3001         emitCommonSimdLoop(
3002             *this, S,
3003             [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
3004               if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3005                 CGF.EmitOMPSimdInit(S, IsMonotonic);
3006               } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3007                 if (C->getKind() == OMPC_ORDER_concurrent)
3008                   CGF.LoopStack.setParallel(/*Enable=*/true);
3009               }
3010             },
3011             [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3012              &S, ScheduleKind, LoopExit,
3013              &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3014               // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3015               // When no chunk_size is specified, the iteration space is divided
3016               // into chunks that are approximately equal in size, and at most
3017               // one chunk is distributed to each thread. Note that the size of
3018               // the chunks is unspecified in this case.
3019               CGOpenMPRuntime::StaticRTInput StaticInit(
3020                   IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3021                   LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3022                   StaticChunkedOne ? Chunk : nullptr);
3023               CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3024                   CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3025                   StaticInit);
3026               // UB = min(UB, GlobalUB);
3027               if (!StaticChunkedOne)
3028                 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3029               // IV = LB;
3030               CGF.EmitIgnoredExpr(S.getInit());
3031               // For unchunked static schedule generate:
3032               //
3033               // while (idx <= UB) {
3034               //   BODY;
3035               //   ++idx;
3036               // }
3037               //
3038               // For static schedule with chunk one:
3039               //
3040               // while (IV <= PrevUB) {
3041               //   BODY;
3042               //   IV += ST;
3043               // }
3044               CGF.EmitOMPInnerLoop(
3045                   S, LoopScope.requiresCleanups(),
3046                   StaticChunkedOne ? S.getCombinedParForInDistCond()
3047                                    : S.getCond(),
3048                   StaticChunkedOne ? S.getDistInc() : S.getInc(),
3049                   [&S, LoopExit](CodeGenFunction &CGF) {
3050                     emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3051                   },
3052                   [](CodeGenFunction &) {});
3053             });
3054         EmitBlock(LoopExit.getBlock());
3055         // Tell the runtime we are done.
3056         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3057           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3058                                                          S.getDirectiveKind());
3059         };
3060         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3061       } else {
3062         // Emit the outer loop, which requests its work chunk [LB..UB] from
3063         // runtime and runs the inner loop to process it.
3064         const OMPLoopArguments LoopArguments(
3065             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3066             IL.getAddress(*this), Chunk, EUB);
3067         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3068                             LoopArguments, CGDispatchBounds);
3069       }
3070       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3071         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3072           return CGF.Builder.CreateIsNotNull(
3073               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3074         });
3075       }
3076       EmitOMPReductionClauseFinal(
3077           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3078                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3079                  : /*Parallel only*/ OMPD_parallel);
3080       // Emit post-update of the reduction variables if IsLastIter != 0.
3081       emitPostUpdateForReductionClause(
3082           *this, S, [IL, &S](CodeGenFunction &CGF) {
3083             return CGF.Builder.CreateIsNotNull(
3084                 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3085           });
3086       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3087       if (HasLastprivateClause)
3088         EmitOMPLastprivateClauseFinal(
3089             S, isOpenMPSimdDirective(S.getDirectiveKind()),
3090             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3091     }
3092     EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3093       return CGF.Builder.CreateIsNotNull(
3094           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3095     });
3096     DoacrossCleanupScope.ForceCleanup();
3097     // We're now done with the loop, so jump to the continuation block.
3098     if (ContBlock) {
3099       EmitBranch(ContBlock);
3100       EmitBlock(ContBlock, /*IsFinished=*/true);
3101     }
3102   }
3103   return HasLastprivateClause;
3104 }
3105 
3106 /// The following two functions generate expressions for the loop lower
3107 /// and upper bounds in case of static and dynamic (dispatch) schedule
3108 /// of the associated 'for' or 'distribute' loop.
3109 static std::pair<LValue, LValue>
3110 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3111   const auto &LS = cast<OMPLoopDirective>(S);
3112   LValue LB =
3113       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3114   LValue UB =
3115       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3116   return {LB, UB};
3117 }
3118 
3119 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3120 /// consider the lower and upper bound expressions generated by the
3121 /// worksharing loop support, but we use 0 and the iteration space size as
3122 /// constants
3123 static std::pair<llvm::Value *, llvm::Value *>
3124 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3125                           Address LB, Address UB) {
3126   const auto &LS = cast<OMPLoopDirective>(S);
3127   const Expr *IVExpr = LS.getIterationVariable();
3128   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3129   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3130   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3131   return {LBVal, UBVal};
3132 }
3133 
3134 /// Emits the code for the directive with inscan reductions.
3135 /// The code is the following:
3136 /// \code
3137 /// size num_iters = <num_iters>;
3138 /// <type> buffer[num_iters];
3139 /// #pragma omp ...
3140 /// for (i: 0..<num_iters>) {
3141 ///   <input phase>;
3142 ///   buffer[i] = red;
3143 /// }
3144 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3145 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3146 ///   buffer[i] op= buffer[i-pow(2,k)];
3147 /// #pragma omp ...
3148 /// for (0..<num_iters>) {
3149 ///   red = InclusiveScan ? buffer[i] : buffer[i-1];
3150 ///   <scan phase>;
3151 /// }
3152 /// \endcode
3153 static void emitScanBasedDirective(
3154     CodeGenFunction &CGF, const OMPLoopDirective &S,
3155     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3156     llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3157     llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3158   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3159       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3160   SmallVector<const Expr *, 4> Shareds;
3161   SmallVector<const Expr *, 4> Privates;
3162   SmallVector<const Expr *, 4> ReductionOps;
3163   SmallVector<const Expr *, 4> LHSs;
3164   SmallVector<const Expr *, 4> RHSs;
3165   SmallVector<const Expr *, 4> CopyOps;
3166   SmallVector<const Expr *, 4> CopyArrayTemps;
3167   SmallVector<const Expr *, 4> CopyArrayElems;
3168   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3169     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3170            "Only inscan reductions are expected.");
3171     Shareds.append(C->varlist_begin(), C->varlist_end());
3172     Privates.append(C->privates().begin(), C->privates().end());
3173     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3174     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3175     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3176     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3177     CopyArrayTemps.append(C->copy_array_temps().begin(),
3178                           C->copy_array_temps().end());
3179     CopyArrayElems.append(C->copy_array_elems().begin(),
3180                           C->copy_array_elems().end());
3181   }
3182   {
3183     // Emit buffers for each reduction variables.
3184     // ReductionCodeGen is required to emit correctly the code for array
3185     // reductions.
3186     ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3187     unsigned Count = 0;
3188     auto *ITA = CopyArrayTemps.begin();
3189     for (const Expr *IRef : Privates) {
3190       const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3191       // Emit variably modified arrays, used for arrays/array sections
3192       // reductions.
3193       if (PrivateVD->getType()->isVariablyModifiedType()) {
3194         RedCG.emitSharedOrigLValue(CGF, Count);
3195         RedCG.emitAggregateType(CGF, Count);
3196       }
3197       CodeGenFunction::OpaqueValueMapping DimMapping(
3198           CGF,
3199           cast<OpaqueValueExpr>(
3200               cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3201                   ->getSizeExpr()),
3202           RValue::get(OMPScanNumIterations));
3203       // Emit temp buffer.
3204       CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3205       ++ITA;
3206       ++Count;
3207     }
3208   }
3209   CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3210   {
3211     // Emit loop with input phase:
3212     // #pragma omp ...
3213     // for (i: 0..<num_iters>) {
3214     //   <input phase>;
3215     //   buffer[i] = red;
3216     // }
3217     CGF.OMPFirstScanLoop = true;
3218     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3219     FirstGen(CGF);
3220   }
3221   // Emit prefix reduction:
3222   // for (int k = 0; k <= ceil(log2(n)); ++k)
3223   llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3224   llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3225   llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3226   llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3227   llvm::Value *Arg =
3228       CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3229   llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3230   F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3231   LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3232   LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3233   llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3234       OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3235   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3236   CGF.EmitBlock(LoopBB);
3237   auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3238   // size pow2k = 1;
3239   auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3240   Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3241   Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3242   // for (size i = n - 1; i >= 2 ^ k; --i)
3243   //   tmp[i] op= tmp[i-pow2k];
3244   llvm::BasicBlock *InnerLoopBB =
3245       CGF.createBasicBlock("omp.inner.log.scan.body");
3246   llvm::BasicBlock *InnerExitBB =
3247       CGF.createBasicBlock("omp.inner.log.scan.exit");
3248   llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3249   CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3250   CGF.EmitBlock(InnerLoopBB);
3251   auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3252   IVal->addIncoming(NMin1, LoopBB);
3253   {
3254     CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3255     auto *ILHS = LHSs.begin();
3256     auto *IRHS = RHSs.begin();
3257     for (const Expr *CopyArrayElem : CopyArrayElems) {
3258       const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3259       const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3260       Address LHSAddr = Address::invalid();
3261       {
3262         CodeGenFunction::OpaqueValueMapping IdxMapping(
3263             CGF,
3264             cast<OpaqueValueExpr>(
3265                 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3266             RValue::get(IVal));
3267         LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3268       }
3269       PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3270       Address RHSAddr = Address::invalid();
3271       {
3272         llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3273         CodeGenFunction::OpaqueValueMapping IdxMapping(
3274             CGF,
3275             cast<OpaqueValueExpr>(
3276                 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3277             RValue::get(OffsetIVal));
3278         RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3279       }
3280       PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3281       ++ILHS;
3282       ++IRHS;
3283     }
3284     PrivScope.Privatize();
3285     CGF.CGM.getOpenMPRuntime().emitReduction(
3286         CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3287         {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3288   }
3289   llvm::Value *NextIVal =
3290       CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3291   IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3292   CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3293   CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3294   CGF.EmitBlock(InnerExitBB);
3295   llvm::Value *Next =
3296       CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3297   Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3298   // pow2k <<= 1;
3299   llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3300   Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3301   llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3302   CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3303   auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3304   CGF.EmitBlock(ExitBB);
3305 
3306   CGF.OMPFirstScanLoop = false;
3307   SecondGen(CGF);
3308 }
3309 
3310 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3311                                      const OMPLoopDirective &S,
3312                                      bool HasCancel) {
3313   bool HasLastprivates;
3314   if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3315                    [](const OMPReductionClause *C) {
3316                      return C->getModifier() == OMPC_REDUCTION_inscan;
3317                    })) {
3318     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3319       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3320       OMPLoopScope LoopScope(CGF, S);
3321       return CGF.EmitScalarExpr(S.getNumIterations());
3322     };
3323     const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3324       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3325           CGF, S.getDirectiveKind(), HasCancel);
3326       (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3327                                        emitForLoopBounds,
3328                                        emitDispatchForLoopBounds);
3329       // Emit an implicit barrier at the end.
3330       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3331                                                  OMPD_for);
3332     };
3333     const auto &&SecondGen = [&S, HasCancel,
3334                               &HasLastprivates](CodeGenFunction &CGF) {
3335       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3336           CGF, S.getDirectiveKind(), HasCancel);
3337       HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3338                                                    emitForLoopBounds,
3339                                                    emitDispatchForLoopBounds);
3340     };
3341     emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3342   } else {
3343     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3344                                                      HasCancel);
3345     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3346                                                  emitForLoopBounds,
3347                                                  emitDispatchForLoopBounds);
3348   }
3349   return HasLastprivates;
3350 }
3351 
3352 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3353   bool HasLastprivates = false;
3354   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3355                                           PrePostActionTy &) {
3356     HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3357   };
3358   {
3359     auto LPCRegion =
3360         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3361     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3362     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3363                                                 S.hasCancel());
3364   }
3365 
3366   // Emit an implicit barrier at the end.
3367   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3368     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3369   // Check for outer lastprivate conditional update.
3370   checkForLastprivateConditionalUpdate(*this, S);
3371 }
3372 
3373 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3374   bool HasLastprivates = false;
3375   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3376                                           PrePostActionTy &) {
3377     HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3378   };
3379   {
3380     auto LPCRegion =
3381         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3382     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3383     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3384   }
3385 
3386   // Emit an implicit barrier at the end.
3387   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3388     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3389   // Check for outer lastprivate conditional update.
3390   checkForLastprivateConditionalUpdate(*this, S);
3391 }
3392 
3393 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3394                                 const Twine &Name,
3395                                 llvm::Value *Init = nullptr) {
3396   LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3397   if (Init)
3398     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3399   return LVal;
3400 }
3401 
3402 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3403   const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3404   const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3405   bool HasLastprivates = false;
3406   auto &&CodeGen = [&S, CapturedStmt, CS,
3407                     &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3408     const ASTContext &C = CGF.getContext();
3409     QualType KmpInt32Ty =
3410         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3411     // Emit helper vars inits.
3412     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3413                                   CGF.Builder.getInt32(0));
3414     llvm::ConstantInt *GlobalUBVal = CS != nullptr
3415                                          ? CGF.Builder.getInt32(CS->size() - 1)
3416                                          : CGF.Builder.getInt32(0);
3417     LValue UB =
3418         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3419     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3420                                   CGF.Builder.getInt32(1));
3421     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3422                                   CGF.Builder.getInt32(0));
3423     // Loop counter.
3424     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3425     OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3426     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3427     OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3428     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3429     // Generate condition for loop.
3430     BinaryOperator *Cond = BinaryOperator::Create(
3431         C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
3432         S.getBeginLoc(), FPOptionsOverride());
3433     // Increment for loop counter.
3434     UnaryOperator *Inc = UnaryOperator::Create(
3435         C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
3436         S.getBeginLoc(), true, FPOptionsOverride());
3437     auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3438       // Iterate through all sections and emit a switch construct:
3439       // switch (IV) {
3440       //   case 0:
3441       //     <SectionStmt[0]>;
3442       //     break;
3443       // ...
3444       //   case <NumSection> - 1:
3445       //     <SectionStmt[<NumSection> - 1]>;
3446       //     break;
3447       // }
3448       // .omp.sections.exit:
3449       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3450       llvm::SwitchInst *SwitchStmt =
3451           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3452                                    ExitBB, CS == nullptr ? 1 : CS->size());
3453       if (CS) {
3454         unsigned CaseNumber = 0;
3455         for (const Stmt *SubStmt : CS->children()) {
3456           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3457           CGF.EmitBlock(CaseBB);
3458           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3459           CGF.EmitStmt(SubStmt);
3460           CGF.EmitBranch(ExitBB);
3461           ++CaseNumber;
3462         }
3463       } else {
3464         llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3465         CGF.EmitBlock(CaseBB);
3466         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3467         CGF.EmitStmt(CapturedStmt);
3468         CGF.EmitBranch(ExitBB);
3469       }
3470       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3471     };
3472 
3473     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3474     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3475       // Emit implicit barrier to synchronize threads and avoid data races on
3476       // initialization of firstprivate variables and post-update of lastprivate
3477       // variables.
3478       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3479           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3480           /*ForceSimpleCall=*/true);
3481     }
3482     CGF.EmitOMPPrivateClause(S, LoopScope);
3483     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3484     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3485     CGF.EmitOMPReductionClauseInit(S, LoopScope);
3486     (void)LoopScope.Privatize();
3487     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3488       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3489 
3490     // Emit static non-chunked loop.
3491     OpenMPScheduleTy ScheduleKind;
3492     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3493     CGOpenMPRuntime::StaticRTInput StaticInit(
3494         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3495         LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3496     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3497         CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3498     // UB = min(UB, GlobalUB);
3499     llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3500     llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3501         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3502     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3503     // IV = LB;
3504     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3505     // while (idx <= UB) { BODY; ++idx; }
3506     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3507                          [](CodeGenFunction &) {});
3508     // Tell the runtime we are done.
3509     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3510       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3511                                                      S.getDirectiveKind());
3512     };
3513     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3514     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3515     // Emit post-update of the reduction variables if IsLastIter != 0.
3516     emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3517       return CGF.Builder.CreateIsNotNull(
3518           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3519     });
3520 
3521     // Emit final copy of the lastprivate variables if IsLastIter != 0.
3522     if (HasLastprivates)
3523       CGF.EmitOMPLastprivateClauseFinal(
3524           S, /*NoFinals=*/false,
3525           CGF.Builder.CreateIsNotNull(
3526               CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3527   };
3528 
3529   bool HasCancel = false;
3530   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3531     HasCancel = OSD->hasCancel();
3532   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3533     HasCancel = OPSD->hasCancel();
3534   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3535   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3536                                               HasCancel);
3537   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3538   // clause. Otherwise the barrier will be generated by the codegen for the
3539   // directive.
3540   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
3541     // Emit implicit barrier to synchronize threads and avoid data races on
3542     // initialization of firstprivate variables.
3543     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3544                                            OMPD_unknown);
3545   }
3546 }
3547 
3548 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3549   {
3550     auto LPCRegion =
3551         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3552     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3553     EmitSections(S);
3554   }
3555   // Emit an implicit barrier at the end.
3556   if (!S.getSingleClause<OMPNowaitClause>()) {
3557     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3558                                            OMPD_sections);
3559   }
3560   // Check for outer lastprivate conditional update.
3561   checkForLastprivateConditionalUpdate(*this, S);
3562 }
3563 
3564 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3565   LexicalScope Scope(*this, S.getSourceRange());
3566   EmitStopPoint(&S);
3567   EmitStmt(S.getAssociatedStmt());
3568 }
3569 
3570 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3571   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3572   llvm::SmallVector<const Expr *, 8> DestExprs;
3573   llvm::SmallVector<const Expr *, 8> SrcExprs;
3574   llvm::SmallVector<const Expr *, 8> AssignmentOps;
3575   // Check if there are any 'copyprivate' clauses associated with this
3576   // 'single' construct.
3577   // Build a list of copyprivate variables along with helper expressions
3578   // (<source>, <destination>, <destination>=<source> expressions)
3579   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3580     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3581     DestExprs.append(C->destination_exprs().begin(),
3582                      C->destination_exprs().end());
3583     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3584     AssignmentOps.append(C->assignment_ops().begin(),
3585                          C->assignment_ops().end());
3586   }
3587   // Emit code for 'single' region along with 'copyprivate' clauses
3588   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3589     Action.Enter(CGF);
3590     OMPPrivateScope SingleScope(CGF);
3591     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
3592     CGF.EmitOMPPrivateClause(S, SingleScope);
3593     (void)SingleScope.Privatize();
3594     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3595   };
3596   {
3597     auto LPCRegion =
3598         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3599     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3600     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
3601                                             CopyprivateVars, DestExprs,
3602                                             SrcExprs, AssignmentOps);
3603   }
3604   // Emit an implicit barrier at the end (to avoid data race on firstprivate
3605   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
3606   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
3607     CGM.getOpenMPRuntime().emitBarrierCall(
3608         *this, S.getBeginLoc(),
3609         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
3610   }
3611   // Check for outer lastprivate conditional update.
3612   checkForLastprivateConditionalUpdate(*this, S);
3613 }
3614 
3615 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3616   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3617     Action.Enter(CGF);
3618     CGF.EmitStmt(S.getRawStmt());
3619   };
3620   CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3621 }
3622 
3623 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
3624   if (CGM.getLangOpts().OpenMPIRBuilder) {
3625     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3626     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3627 
3628     const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
3629 
3630     auto FiniCB = [this](InsertPointTy IP) {
3631       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3632     };
3633 
3634     auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
3635                                                   InsertPointTy CodeGenIP,
3636                                                   llvm::BasicBlock &FiniBB) {
3637       OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3638       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3639                                              CodeGenIP, FiniBB);
3640     };
3641 
3642     LexicalScope Scope(*this, S.getSourceRange());
3643     EmitStopPoint(&S);
3644     Builder.restoreIP(OMPBuilder.CreateMaster(Builder, BodyGenCB, FiniCB));
3645 
3646     return;
3647   }
3648   LexicalScope Scope(*this, S.getSourceRange());
3649   EmitStopPoint(&S);
3650   emitMaster(*this, S);
3651 }
3652 
3653 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
3654   if (CGM.getLangOpts().OpenMPIRBuilder) {
3655     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3656     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3657 
3658     const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
3659     const Expr *Hint = nullptr;
3660     if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3661       Hint = HintClause->getHint();
3662 
3663     // TODO: This is slightly different from what's currently being done in
3664     // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
3665     // about typing is final.
3666     llvm::Value *HintInst = nullptr;
3667     if (Hint)
3668       HintInst =
3669           Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
3670 
3671     auto FiniCB = [this](InsertPointTy IP) {
3672       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3673     };
3674 
3675     auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
3676                                                     InsertPointTy CodeGenIP,
3677                                                     llvm::BasicBlock &FiniBB) {
3678       OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3679       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
3680                                              CodeGenIP, FiniBB);
3681     };
3682 
3683     LexicalScope Scope(*this, S.getSourceRange());
3684     EmitStopPoint(&S);
3685     Builder.restoreIP(OMPBuilder.CreateCritical(
3686         Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
3687         HintInst));
3688 
3689     return;
3690   }
3691 
3692   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3693     Action.Enter(CGF);
3694     CGF.EmitStmt(S.getAssociatedStmt());
3695   };
3696   const Expr *Hint = nullptr;
3697   if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3698     Hint = HintClause->getHint();
3699   LexicalScope Scope(*this, S.getSourceRange());
3700   EmitStopPoint(&S);
3701   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
3702                                             S.getDirectiveName().getAsString(),
3703                                             CodeGen, S.getBeginLoc(), Hint);
3704 }
3705 
3706 void CodeGenFunction::EmitOMPParallelForDirective(
3707     const OMPParallelForDirective &S) {
3708   // Emit directive as a combined directive that consists of two implicit
3709   // directives: 'parallel' with 'for' directive.
3710   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3711     Action.Enter(CGF);
3712     (void)emitWorksharingDirective(CGF, S, S.hasCancel());
3713   };
3714   {
3715     auto LPCRegion =
3716         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3717     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
3718                                    emitEmptyBoundParameters);
3719   }
3720   // Check for outer lastprivate conditional update.
3721   checkForLastprivateConditionalUpdate(*this, S);
3722 }
3723 
3724 void CodeGenFunction::EmitOMPParallelForSimdDirective(
3725     const OMPParallelForSimdDirective &S) {
3726   // Emit directive as a combined directive that consists of two implicit
3727   // directives: 'parallel' with 'for' directive.
3728   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3729     Action.Enter(CGF);
3730     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3731   };
3732   {
3733     auto LPCRegion =
3734         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3735     emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
3736                                    emitEmptyBoundParameters);
3737   }
3738   // Check for outer lastprivate conditional update.
3739   checkForLastprivateConditionalUpdate(*this, S);
3740 }
3741 
3742 void CodeGenFunction::EmitOMPParallelMasterDirective(
3743     const OMPParallelMasterDirective &S) {
3744   // Emit directive as a combined directive that consists of two implicit
3745   // directives: 'parallel' with 'master' directive.
3746   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3747     Action.Enter(CGF);
3748     OMPPrivateScope PrivateScope(CGF);
3749     bool Copyins = CGF.EmitOMPCopyinClause(S);
3750     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3751     if (Copyins) {
3752       // Emit implicit barrier to synchronize threads and avoid data races on
3753       // propagation master's thread values of threadprivate variables to local
3754       // instances of that variables of all other implicit threads.
3755       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3756           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3757           /*ForceSimpleCall=*/true);
3758     }
3759     CGF.EmitOMPPrivateClause(S, PrivateScope);
3760     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3761     (void)PrivateScope.Privatize();
3762     emitMaster(CGF, S);
3763     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3764   };
3765   {
3766     auto LPCRegion =
3767         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3768     emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
3769                                    emitEmptyBoundParameters);
3770     emitPostUpdateForReductionClause(*this, S,
3771                                      [](CodeGenFunction &) { return nullptr; });
3772   }
3773   // Check for outer lastprivate conditional update.
3774   checkForLastprivateConditionalUpdate(*this, S);
3775 }
3776 
3777 void CodeGenFunction::EmitOMPParallelSectionsDirective(
3778     const OMPParallelSectionsDirective &S) {
3779   // Emit directive as a combined directive that consists of two implicit
3780   // directives: 'parallel' with 'sections' directive.
3781   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3782     Action.Enter(CGF);
3783     CGF.EmitSections(S);
3784   };
3785   {
3786     auto LPCRegion =
3787         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3788     emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
3789                                    emitEmptyBoundParameters);
3790   }
3791   // Check for outer lastprivate conditional update.
3792   checkForLastprivateConditionalUpdate(*this, S);
3793 }
3794 
3795 namespace {
3796 /// Get the list of variables declared in the context of the untied tasks.
3797 class CheckVarsEscapingUntiedTaskDeclContext final
3798     : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
3799   llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
3800 
3801 public:
3802   explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
3803   virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
3804   void VisitDeclStmt(const DeclStmt *S) {
3805     if (!S)
3806       return;
3807     // Need to privatize only local vars, static locals can be processed as is.
3808     for (const Decl *D : S->decls()) {
3809       if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
3810         if (VD->hasLocalStorage())
3811           PrivateDecls.push_back(VD);
3812     }
3813   }
3814   void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
3815   void VisitCapturedStmt(const CapturedStmt *) { return; }
3816   void VisitLambdaExpr(const LambdaExpr *) { return; }
3817   void VisitBlockExpr(const BlockExpr *) { return; }
3818   void VisitStmt(const Stmt *S) {
3819     if (!S)
3820       return;
3821     for (const Stmt *Child : S->children())
3822       if (Child)
3823         Visit(Child);
3824   }
3825 
3826   /// Swaps list of vars with the provided one.
3827   ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
3828 };
3829 } // anonymous namespace
3830 
3831 void CodeGenFunction::EmitOMPTaskBasedDirective(
3832     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
3833     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
3834     OMPTaskDataTy &Data) {
3835   // Emit outlined function for task construct.
3836   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
3837   auto I = CS->getCapturedDecl()->param_begin();
3838   auto PartId = std::next(I);
3839   auto TaskT = std::next(I, 4);
3840   // Check if the task is final
3841   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
3842     // If the condition constant folds and can be elided, try to avoid emitting
3843     // the condition and the dead arm of the if/else.
3844     const Expr *Cond = Clause->getCondition();
3845     bool CondConstant;
3846     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
3847       Data.Final.setInt(CondConstant);
3848     else
3849       Data.Final.setPointer(EvaluateExprAsBool(Cond));
3850   } else {
3851     // By default the task is not final.
3852     Data.Final.setInt(/*IntVal=*/false);
3853   }
3854   // Check if the task has 'priority' clause.
3855   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
3856     const Expr *Prio = Clause->getPriority();
3857     Data.Priority.setInt(/*IntVal=*/true);
3858     Data.Priority.setPointer(EmitScalarConversion(
3859         EmitScalarExpr(Prio), Prio->getType(),
3860         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
3861         Prio->getExprLoc()));
3862   }
3863   // The first function argument for tasks is a thread id, the second one is a
3864   // part id (0 for tied tasks, >=0 for untied task).
3865   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
3866   // Get list of private variables.
3867   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
3868     auto IRef = C->varlist_begin();
3869     for (const Expr *IInit : C->private_copies()) {
3870       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3871       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3872         Data.PrivateVars.push_back(*IRef);
3873         Data.PrivateCopies.push_back(IInit);
3874       }
3875       ++IRef;
3876     }
3877   }
3878   EmittedAsPrivate.clear();
3879   // Get list of firstprivate variables.
3880   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3881     auto IRef = C->varlist_begin();
3882     auto IElemInitRef = C->inits().begin();
3883     for (const Expr *IInit : C->private_copies()) {
3884       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3885       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3886         Data.FirstprivateVars.push_back(*IRef);
3887         Data.FirstprivateCopies.push_back(IInit);
3888         Data.FirstprivateInits.push_back(*IElemInitRef);
3889       }
3890       ++IRef;
3891       ++IElemInitRef;
3892     }
3893   }
3894   // Get list of lastprivate variables (for taskloops).
3895   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
3896   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
3897     auto IRef = C->varlist_begin();
3898     auto ID = C->destination_exprs().begin();
3899     for (const Expr *IInit : C->private_copies()) {
3900       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3901       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3902         Data.LastprivateVars.push_back(*IRef);
3903         Data.LastprivateCopies.push_back(IInit);
3904       }
3905       LastprivateDstsOrigs.insert(
3906           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
3907            cast<DeclRefExpr>(*IRef)});
3908       ++IRef;
3909       ++ID;
3910     }
3911   }
3912   SmallVector<const Expr *, 4> LHSs;
3913   SmallVector<const Expr *, 4> RHSs;
3914   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3915     Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
3916     Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
3917     Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
3918     Data.ReductionOps.append(C->reduction_ops().begin(),
3919                              C->reduction_ops().end());
3920     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3921     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3922   }
3923   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
3924       *this, S.getBeginLoc(), LHSs, RHSs, Data);
3925   // Build list of dependences.
3926   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
3927     OMPTaskDataTy::DependData &DD =
3928         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
3929     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
3930   }
3931   // Get list of local vars for untied tasks.
3932   if (!Data.Tied) {
3933     CheckVarsEscapingUntiedTaskDeclContext Checker;
3934     Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
3935     Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
3936                               Checker.getPrivateDecls().end());
3937   }
3938   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
3939                     CapturedRegion](CodeGenFunction &CGF,
3940                                     PrePostActionTy &Action) {
3941     llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, std::pair<Address, Address>>
3942         UntiedLocalVars;
3943     // Set proper addresses for generated private copies.
3944     OMPPrivateScope Scope(CGF);
3945     llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
3946     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
3947         !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
3948       llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
3949           CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
3950       enum { PrivatesParam = 2, CopyFnParam = 3 };
3951       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3952           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3953       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3954           CS->getCapturedDecl()->getParam(PrivatesParam)));
3955       // Map privates.
3956       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3957       llvm::SmallVector<llvm::Value *, 16> CallArgs;
3958       CallArgs.push_back(PrivatesPtr);
3959       for (const Expr *E : Data.PrivateVars) {
3960         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3961         Address PrivatePtr = CGF.CreateMemTemp(
3962             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
3963         PrivatePtrs.emplace_back(VD, PrivatePtr);
3964         CallArgs.push_back(PrivatePtr.getPointer());
3965       }
3966       for (const Expr *E : Data.FirstprivateVars) {
3967         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3968         Address PrivatePtr =
3969             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3970                               ".firstpriv.ptr.addr");
3971         PrivatePtrs.emplace_back(VD, PrivatePtr);
3972         FirstprivatePtrs.emplace_back(VD, PrivatePtr);
3973         CallArgs.push_back(PrivatePtr.getPointer());
3974       }
3975       for (const Expr *E : Data.LastprivateVars) {
3976         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3977         Address PrivatePtr =
3978             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3979                               ".lastpriv.ptr.addr");
3980         PrivatePtrs.emplace_back(VD, PrivatePtr);
3981         CallArgs.push_back(PrivatePtr.getPointer());
3982       }
3983       for (const VarDecl *VD : Data.PrivateLocals) {
3984         QualType Ty = VD->getType().getNonReferenceType();
3985         if (VD->getType()->isLValueReferenceType())
3986           Ty = CGF.getContext().getPointerType(Ty);
3987         if (isAllocatableDecl(VD))
3988           Ty = CGF.getContext().getPointerType(Ty);
3989         Address PrivatePtr = CGF.CreateMemTemp(
3990             CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
3991         UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid());
3992         CallArgs.push_back(PrivatePtr.getPointer());
3993       }
3994       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
3995           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
3996       for (const auto &Pair : LastprivateDstsOrigs) {
3997         const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
3998         DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
3999                         /*RefersToEnclosingVariableOrCapture=*/
4000                             CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4001                         Pair.second->getType(), VK_LValue,
4002                         Pair.second->getExprLoc());
4003         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4004           return CGF.EmitLValue(&DRE).getAddress(CGF);
4005         });
4006       }
4007       for (const auto &Pair : PrivatePtrs) {
4008         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4009                             CGF.getContext().getDeclAlign(Pair.first));
4010         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4011       }
4012       // Adjust mapping for internal locals by mapping actual memory instead of
4013       // a pointer to this memory.
4014       for (auto &Pair : UntiedLocalVars) {
4015         if (isAllocatableDecl(Pair.first)) {
4016           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4017           Address Replacement(Ptr, CGF.getPointerAlign());
4018           Pair.getSecond().first = Replacement;
4019           Ptr = CGF.Builder.CreateLoad(Replacement);
4020           Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4021           Pair.getSecond().second = Replacement;
4022         } else {
4023           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4024           Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4025           Pair.getSecond().first = Replacement;
4026         }
4027       }
4028     }
4029     if (Data.Reductions) {
4030       OMPPrivateScope FirstprivateScope(CGF);
4031       for (const auto &Pair : FirstprivatePtrs) {
4032         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4033                             CGF.getContext().getDeclAlign(Pair.first));
4034         FirstprivateScope.addPrivate(Pair.first,
4035                                      [Replacement]() { return Replacement; });
4036       }
4037       (void)FirstprivateScope.Privatize();
4038       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4039       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4040                              Data.ReductionCopies, Data.ReductionOps);
4041       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4042           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4043       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4044         RedCG.emitSharedOrigLValue(CGF, Cnt);
4045         RedCG.emitAggregateType(CGF, Cnt);
4046         // FIXME: This must removed once the runtime library is fixed.
4047         // Emit required threadprivate variables for
4048         // initializer/combiner/finalizer.
4049         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4050                                                            RedCG, Cnt);
4051         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4052             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4053         Replacement =
4054             Address(CGF.EmitScalarConversion(
4055                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4056                         CGF.getContext().getPointerType(
4057                             Data.ReductionCopies[Cnt]->getType()),
4058                         Data.ReductionCopies[Cnt]->getExprLoc()),
4059                     Replacement.getAlignment());
4060         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4061         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4062                          [Replacement]() { return Replacement; });
4063       }
4064     }
4065     // Privatize all private variables except for in_reduction items.
4066     (void)Scope.Privatize();
4067     SmallVector<const Expr *, 4> InRedVars;
4068     SmallVector<const Expr *, 4> InRedPrivs;
4069     SmallVector<const Expr *, 4> InRedOps;
4070     SmallVector<const Expr *, 4> TaskgroupDescriptors;
4071     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4072       auto IPriv = C->privates().begin();
4073       auto IRed = C->reduction_ops().begin();
4074       auto ITD = C->taskgroup_descriptors().begin();
4075       for (const Expr *Ref : C->varlists()) {
4076         InRedVars.emplace_back(Ref);
4077         InRedPrivs.emplace_back(*IPriv);
4078         InRedOps.emplace_back(*IRed);
4079         TaskgroupDescriptors.emplace_back(*ITD);
4080         std::advance(IPriv, 1);
4081         std::advance(IRed, 1);
4082         std::advance(ITD, 1);
4083       }
4084     }
4085     // Privatize in_reduction items here, because taskgroup descriptors must be
4086     // privatized earlier.
4087     OMPPrivateScope InRedScope(CGF);
4088     if (!InRedVars.empty()) {
4089       ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4090       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4091         RedCG.emitSharedOrigLValue(CGF, Cnt);
4092         RedCG.emitAggregateType(CGF, Cnt);
4093         // The taskgroup descriptor variable is always implicit firstprivate and
4094         // privatized already during processing of the firstprivates.
4095         // FIXME: This must removed once the runtime library is fixed.
4096         // Emit required threadprivate variables for
4097         // initializer/combiner/finalizer.
4098         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4099                                                            RedCG, Cnt);
4100         llvm::Value *ReductionsPtr;
4101         if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4102           ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4103                                                TRExpr->getExprLoc());
4104         } else {
4105           ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4106         }
4107         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4108             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4109         Replacement = Address(
4110             CGF.EmitScalarConversion(
4111                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4112                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4113                 InRedPrivs[Cnt]->getExprLoc()),
4114             Replacement.getAlignment());
4115         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4116         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4117                               [Replacement]() { return Replacement; });
4118       }
4119     }
4120     (void)InRedScope.Privatize();
4121 
4122     CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4123                                                              UntiedLocalVars);
4124     Action.Enter(CGF);
4125     BodyGen(CGF);
4126   };
4127   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4128       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4129       Data.NumberOfParts);
4130   OMPLexicalScope Scope(*this, S, llvm::None,
4131                         !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4132                             !isOpenMPSimdDirective(S.getDirectiveKind()));
4133   TaskGen(*this, OutlinedFn, Data);
4134 }
4135 
4136 static ImplicitParamDecl *
4137 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4138                                   QualType Ty, CapturedDecl *CD,
4139                                   SourceLocation Loc) {
4140   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4141                                            ImplicitParamDecl::Other);
4142   auto *OrigRef = DeclRefExpr::Create(
4143       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4144       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4145   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4146                                               ImplicitParamDecl::Other);
4147   auto *PrivateRef = DeclRefExpr::Create(
4148       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4149       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4150   QualType ElemType = C.getBaseElementType(Ty);
4151   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4152                                            ImplicitParamDecl::Other);
4153   auto *InitRef = DeclRefExpr::Create(
4154       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4155       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4156   PrivateVD->setInitStyle(VarDecl::CInit);
4157   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4158                                               InitRef, /*BasePath=*/nullptr,
4159                                               VK_RValue, FPOptionsOverride()));
4160   Data.FirstprivateVars.emplace_back(OrigRef);
4161   Data.FirstprivateCopies.emplace_back(PrivateRef);
4162   Data.FirstprivateInits.emplace_back(InitRef);
4163   return OrigVD;
4164 }
4165 
4166 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4167     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4168     OMPTargetDataInfo &InputInfo) {
4169   // Emit outlined function for task construct.
4170   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4171   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4172   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4173   auto I = CS->getCapturedDecl()->param_begin();
4174   auto PartId = std::next(I);
4175   auto TaskT = std::next(I, 4);
4176   OMPTaskDataTy Data;
4177   // The task is not final.
4178   Data.Final.setInt(/*IntVal=*/false);
4179   // Get list of firstprivate variables.
4180   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4181     auto IRef = C->varlist_begin();
4182     auto IElemInitRef = C->inits().begin();
4183     for (auto *IInit : C->private_copies()) {
4184       Data.FirstprivateVars.push_back(*IRef);
4185       Data.FirstprivateCopies.push_back(IInit);
4186       Data.FirstprivateInits.push_back(*IElemInitRef);
4187       ++IRef;
4188       ++IElemInitRef;
4189     }
4190   }
4191   OMPPrivateScope TargetScope(*this);
4192   VarDecl *BPVD = nullptr;
4193   VarDecl *PVD = nullptr;
4194   VarDecl *SVD = nullptr;
4195   VarDecl *MVD = nullptr;
4196   if (InputInfo.NumberOfTargetItems > 0) {
4197     auto *CD = CapturedDecl::Create(
4198         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4199     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4200     QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4201         getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4202         /*IndexTypeQuals=*/0);
4203     BPVD = createImplicitFirstprivateForType(
4204         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4205     PVD = createImplicitFirstprivateForType(
4206         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4207     QualType SizesType = getContext().getConstantArrayType(
4208         getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4209         ArrSize, nullptr, ArrayType::Normal,
4210         /*IndexTypeQuals=*/0);
4211     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4212                                             S.getBeginLoc());
4213     MVD = createImplicitFirstprivateForType(
4214         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4215     TargetScope.addPrivate(
4216         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4217     TargetScope.addPrivate(PVD,
4218                            [&InputInfo]() { return InputInfo.PointersArray; });
4219     TargetScope.addPrivate(SVD,
4220                            [&InputInfo]() { return InputInfo.SizesArray; });
4221     TargetScope.addPrivate(MVD,
4222                            [&InputInfo]() { return InputInfo.MappersArray; });
4223   }
4224   (void)TargetScope.Privatize();
4225   // Build list of dependences.
4226   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4227     OMPTaskDataTy::DependData &DD =
4228         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4229     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4230   }
4231   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4232                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4233     // Set proper addresses for generated private copies.
4234     OMPPrivateScope Scope(CGF);
4235     if (!Data.FirstprivateVars.empty()) {
4236       llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
4237           CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
4238       enum { PrivatesParam = 2, CopyFnParam = 3 };
4239       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4240           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4241       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4242           CS->getCapturedDecl()->getParam(PrivatesParam)));
4243       // Map privates.
4244       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4245       llvm::SmallVector<llvm::Value *, 16> CallArgs;
4246       CallArgs.push_back(PrivatesPtr);
4247       for (const Expr *E : Data.FirstprivateVars) {
4248         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4249         Address PrivatePtr =
4250             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4251                               ".firstpriv.ptr.addr");
4252         PrivatePtrs.emplace_back(VD, PrivatePtr);
4253         CallArgs.push_back(PrivatePtr.getPointer());
4254       }
4255       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4256           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4257       for (const auto &Pair : PrivatePtrs) {
4258         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4259                             CGF.getContext().getDeclAlign(Pair.first));
4260         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4261       }
4262     }
4263     // Privatize all private variables except for in_reduction items.
4264     (void)Scope.Privatize();
4265     if (InputInfo.NumberOfTargetItems > 0) {
4266       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4267           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4268       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4269           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4270       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4271           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4272       InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4273           CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4274     }
4275 
4276     Action.Enter(CGF);
4277     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4278     BodyGen(CGF);
4279   };
4280   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4281       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4282       Data.NumberOfParts);
4283   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
4284   IntegerLiteral IfCond(getContext(), TrueOrFalse,
4285                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4286                         SourceLocation());
4287 
4288   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4289                                       SharedsTy, CapturedStruct, &IfCond, Data);
4290 }
4291 
4292 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
4293   // Emit outlined function for task construct.
4294   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4295   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4296   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4297   const Expr *IfCond = nullptr;
4298   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4299     if (C->getNameModifier() == OMPD_unknown ||
4300         C->getNameModifier() == OMPD_task) {
4301       IfCond = C->getCondition();
4302       break;
4303     }
4304   }
4305 
4306   OMPTaskDataTy Data;
4307   // Check if we should emit tied or untied task.
4308   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4309   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4310     CGF.EmitStmt(CS->getCapturedStmt());
4311   };
4312   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4313                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4314                             const OMPTaskDataTy &Data) {
4315     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4316                                             SharedsTy, CapturedStruct, IfCond,
4317                                             Data);
4318   };
4319   auto LPCRegion =
4320       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4321   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4322 }
4323 
4324 void CodeGenFunction::EmitOMPTaskyieldDirective(
4325     const OMPTaskyieldDirective &S) {
4326   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4327 }
4328 
4329 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
4330   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4331 }
4332 
4333 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
4334   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
4335 }
4336 
4337 void CodeGenFunction::EmitOMPTaskgroupDirective(
4338     const OMPTaskgroupDirective &S) {
4339   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4340     Action.Enter(CGF);
4341     if (const Expr *E = S.getReductionRef()) {
4342       SmallVector<const Expr *, 4> LHSs;
4343       SmallVector<const Expr *, 4> RHSs;
4344       OMPTaskDataTy Data;
4345       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4346         Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4347         Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4348         Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4349         Data.ReductionOps.append(C->reduction_ops().begin(),
4350                                  C->reduction_ops().end());
4351         LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4352         RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4353       }
4354       llvm::Value *ReductionDesc =
4355           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4356                                                            LHSs, RHSs, Data);
4357       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4358       CGF.EmitVarDecl(*VD);
4359       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4360                             /*Volatile=*/false, E->getType());
4361     }
4362     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4363   };
4364   OMPLexicalScope Scope(*this, S, OMPD_unknown);
4365   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4366 }
4367 
4368 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
4369   llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4370                                 ? llvm::AtomicOrdering::NotAtomic
4371                                 : llvm::AtomicOrdering::AcquireRelease;
4372   CGM.getOpenMPRuntime().emitFlush(
4373       *this,
4374       [&S]() -> ArrayRef<const Expr *> {
4375         if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4376           return llvm::makeArrayRef(FlushClause->varlist_begin(),
4377                                     FlushClause->varlist_end());
4378         return llvm::None;
4379       }(),
4380       S.getBeginLoc(), AO);
4381 }
4382 
4383 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
4384   const auto *DO = S.getSingleClause<OMPDepobjClause>();
4385   LValue DOLVal = EmitLValue(DO->getDepobj());
4386   if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4387     OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4388                                            DC->getModifier());
4389     Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4390     Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4391         *this, Dependencies, DC->getBeginLoc());
4392     EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4393     return;
4394   }
4395   if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4396     CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4397     return;
4398   }
4399   if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4400     CGM.getOpenMPRuntime().emitUpdateClause(
4401         *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4402     return;
4403   }
4404 }
4405 
4406 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4407   if (!OMPParentLoopDirectiveForScan)
4408     return;
4409   const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4410   bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4411   SmallVector<const Expr *, 4> Shareds;
4412   SmallVector<const Expr *, 4> Privates;
4413   SmallVector<const Expr *, 4> LHSs;
4414   SmallVector<const Expr *, 4> RHSs;
4415   SmallVector<const Expr *, 4> ReductionOps;
4416   SmallVector<const Expr *, 4> CopyOps;
4417   SmallVector<const Expr *, 4> CopyArrayTemps;
4418   SmallVector<const Expr *, 4> CopyArrayElems;
4419   for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4420     if (C->getModifier() != OMPC_REDUCTION_inscan)
4421       continue;
4422     Shareds.append(C->varlist_begin(), C->varlist_end());
4423     Privates.append(C->privates().begin(), C->privates().end());
4424     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4425     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4426     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4427     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4428     CopyArrayTemps.append(C->copy_array_temps().begin(),
4429                           C->copy_array_temps().end());
4430     CopyArrayElems.append(C->copy_array_elems().begin(),
4431                           C->copy_array_elems().end());
4432   }
4433   if (ParentDir.getDirectiveKind() == OMPD_simd ||
4434       (getLangOpts().OpenMPSimd &&
4435        isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4436     // For simd directive and simd-based directives in simd only mode, use the
4437     // following codegen:
4438     // int x = 0;
4439     // #pragma omp simd reduction(inscan, +: x)
4440     // for (..) {
4441     //   <first part>
4442     //   #pragma omp scan inclusive(x)
4443     //   <second part>
4444     //  }
4445     // is transformed to:
4446     // int x = 0;
4447     // for (..) {
4448     //   int x_priv = 0;
4449     //   <first part>
4450     //   x = x_priv + x;
4451     //   x_priv = x;
4452     //   <second part>
4453     // }
4454     // and
4455     // int x = 0;
4456     // #pragma omp simd reduction(inscan, +: x)
4457     // for (..) {
4458     //   <first part>
4459     //   #pragma omp scan exclusive(x)
4460     //   <second part>
4461     // }
4462     // to
4463     // int x = 0;
4464     // for (..) {
4465     //   int x_priv = 0;
4466     //   <second part>
4467     //   int temp = x;
4468     //   x = x_priv + x;
4469     //   x_priv = temp;
4470     //   <first part>
4471     // }
4472     llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4473     EmitBranch(IsInclusive
4474                    ? OMPScanReduce
4475                    : BreakContinueStack.back().ContinueBlock.getBlock());
4476     EmitBlock(OMPScanDispatch);
4477     {
4478       // New scope for correct construction/destruction of temp variables for
4479       // exclusive scan.
4480       LexicalScope Scope(*this, S.getSourceRange());
4481       EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4482       EmitBlock(OMPScanReduce);
4483       if (!IsInclusive) {
4484         // Create temp var and copy LHS value to this temp value.
4485         // TMP = LHS;
4486         for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4487           const Expr *PrivateExpr = Privates[I];
4488           const Expr *TempExpr = CopyArrayTemps[I];
4489           EmitAutoVarDecl(
4490               *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4491           LValue DestLVal = EmitLValue(TempExpr);
4492           LValue SrcLVal = EmitLValue(LHSs[I]);
4493           EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4494                       SrcLVal.getAddress(*this),
4495                       cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4496                       cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4497                       CopyOps[I]);
4498         }
4499       }
4500       CGM.getOpenMPRuntime().emitReduction(
4501           *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4502           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4503       for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4504         const Expr *PrivateExpr = Privates[I];
4505         LValue DestLVal;
4506         LValue SrcLVal;
4507         if (IsInclusive) {
4508           DestLVal = EmitLValue(RHSs[I]);
4509           SrcLVal = EmitLValue(LHSs[I]);
4510         } else {
4511           const Expr *TempExpr = CopyArrayTemps[I];
4512           DestLVal = EmitLValue(RHSs[I]);
4513           SrcLVal = EmitLValue(TempExpr);
4514         }
4515         EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4516                     SrcLVal.getAddress(*this),
4517                     cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4518                     cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4519                     CopyOps[I]);
4520       }
4521     }
4522     EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4523     OMPScanExitBlock = IsInclusive
4524                            ? BreakContinueStack.back().ContinueBlock.getBlock()
4525                            : OMPScanReduce;
4526     EmitBlock(OMPAfterScanBlock);
4527     return;
4528   }
4529   if (!IsInclusive) {
4530     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4531     EmitBlock(OMPScanExitBlock);
4532   }
4533   if (OMPFirstScanLoop) {
4534     // Emit buffer[i] = red; at the end of the input phase.
4535     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4536                              .getIterationVariable()
4537                              ->IgnoreParenImpCasts();
4538     LValue IdxLVal = EmitLValue(IVExpr);
4539     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4540     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4541     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4542       const Expr *PrivateExpr = Privates[I];
4543       const Expr *OrigExpr = Shareds[I];
4544       const Expr *CopyArrayElem = CopyArrayElems[I];
4545       OpaqueValueMapping IdxMapping(
4546           *this,
4547           cast<OpaqueValueExpr>(
4548               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4549           RValue::get(IdxVal));
4550       LValue DestLVal = EmitLValue(CopyArrayElem);
4551       LValue SrcLVal = EmitLValue(OrigExpr);
4552       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4553                   SrcLVal.getAddress(*this),
4554                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4555                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4556                   CopyOps[I]);
4557     }
4558   }
4559   EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4560   if (IsInclusive) {
4561     EmitBlock(OMPScanExitBlock);
4562     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4563   }
4564   EmitBlock(OMPScanDispatch);
4565   if (!OMPFirstScanLoop) {
4566     // Emit red = buffer[i]; at the entrance to the scan phase.
4567     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4568                              .getIterationVariable()
4569                              ->IgnoreParenImpCasts();
4570     LValue IdxLVal = EmitLValue(IVExpr);
4571     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4572     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4573     llvm::BasicBlock *ExclusiveExitBB = nullptr;
4574     if (!IsInclusive) {
4575       llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
4576       ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
4577       llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
4578       Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
4579       EmitBlock(ContBB);
4580       // Use idx - 1 iteration for exclusive scan.
4581       IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
4582     }
4583     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4584       const Expr *PrivateExpr = Privates[I];
4585       const Expr *OrigExpr = Shareds[I];
4586       const Expr *CopyArrayElem = CopyArrayElems[I];
4587       OpaqueValueMapping IdxMapping(
4588           *this,
4589           cast<OpaqueValueExpr>(
4590               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4591           RValue::get(IdxVal));
4592       LValue SrcLVal = EmitLValue(CopyArrayElem);
4593       LValue DestLVal = EmitLValue(OrigExpr);
4594       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4595                   SrcLVal.getAddress(*this),
4596                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4597                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4598                   CopyOps[I]);
4599     }
4600     if (!IsInclusive) {
4601       EmitBlock(ExclusiveExitBB);
4602     }
4603   }
4604   EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
4605                                                : OMPAfterScanBlock);
4606   EmitBlock(OMPAfterScanBlock);
4607 }
4608 
4609 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
4610                                             const CodeGenLoopTy &CodeGenLoop,
4611                                             Expr *IncExpr) {
4612   // Emit the loop iteration variable.
4613   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
4614   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
4615   EmitVarDecl(*IVDecl);
4616 
4617   // Emit the iterations count variable.
4618   // If it is not a variable, Sema decided to calculate iterations count on each
4619   // iteration (e.g., it is foldable into a constant).
4620   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4621     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4622     // Emit calculation of the iterations count.
4623     EmitIgnoredExpr(S.getCalcLastIteration());
4624   }
4625 
4626   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
4627 
4628   bool HasLastprivateClause = false;
4629   // Check pre-condition.
4630   {
4631     OMPLoopScope PreInitScope(*this, S);
4632     // Skip the entire loop if we don't meet the precondition.
4633     // If the condition constant folds and can be elided, avoid emitting the
4634     // whole loop.
4635     bool CondConstant;
4636     llvm::BasicBlock *ContBlock = nullptr;
4637     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4638       if (!CondConstant)
4639         return;
4640     } else {
4641       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
4642       ContBlock = createBasicBlock("omp.precond.end");
4643       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
4644                   getProfileCount(&S));
4645       EmitBlock(ThenBlock);
4646       incrementProfileCounter(&S);
4647     }
4648 
4649     emitAlignedClause(*this, S);
4650     // Emit 'then' code.
4651     {
4652       // Emit helper vars inits.
4653 
4654       LValue LB = EmitOMPHelperVar(
4655           *this, cast<DeclRefExpr>(
4656                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4657                           ? S.getCombinedLowerBoundVariable()
4658                           : S.getLowerBoundVariable())));
4659       LValue UB = EmitOMPHelperVar(
4660           *this, cast<DeclRefExpr>(
4661                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4662                           ? S.getCombinedUpperBoundVariable()
4663                           : S.getUpperBoundVariable())));
4664       LValue ST =
4665           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
4666       LValue IL =
4667           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
4668 
4669       OMPPrivateScope LoopScope(*this);
4670       if (EmitOMPFirstprivateClause(S, LoopScope)) {
4671         // Emit implicit barrier to synchronize threads and avoid data races
4672         // on initialization of firstprivate variables and post-update of
4673         // lastprivate variables.
4674         CGM.getOpenMPRuntime().emitBarrierCall(
4675             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4676             /*ForceSimpleCall=*/true);
4677       }
4678       EmitOMPPrivateClause(S, LoopScope);
4679       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4680           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4681           !isOpenMPTeamsDirective(S.getDirectiveKind()))
4682         EmitOMPReductionClauseInit(S, LoopScope);
4683       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
4684       EmitOMPPrivateLoopCounters(S, LoopScope);
4685       (void)LoopScope.Privatize();
4686       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4687         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
4688 
4689       // Detect the distribute schedule kind and chunk.
4690       llvm::Value *Chunk = nullptr;
4691       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
4692       if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
4693         ScheduleKind = C->getDistScheduleKind();
4694         if (const Expr *Ch = C->getChunkSize()) {
4695           Chunk = EmitScalarExpr(Ch);
4696           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
4697                                        S.getIterationVariable()->getType(),
4698                                        S.getBeginLoc());
4699         }
4700       } else {
4701         // Default behaviour for dist_schedule clause.
4702         CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
4703             *this, S, ScheduleKind, Chunk);
4704       }
4705       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
4706       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
4707 
4708       // OpenMP [2.10.8, distribute Construct, Description]
4709       // If dist_schedule is specified, kind must be static. If specified,
4710       // iterations are divided into chunks of size chunk_size, chunks are
4711       // assigned to the teams of the league in a round-robin fashion in the
4712       // order of the team number. When no chunk_size is specified, the
4713       // iteration space is divided into chunks that are approximately equal
4714       // in size, and at most one chunk is distributed to each team of the
4715       // league. The size of the chunks is unspecified in this case.
4716       bool StaticChunked = RT.isStaticChunked(
4717           ScheduleKind, /* Chunked */ Chunk != nullptr) &&
4718           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
4719       if (RT.isStaticNonchunked(ScheduleKind,
4720                                 /* Chunked */ Chunk != nullptr) ||
4721           StaticChunked) {
4722         CGOpenMPRuntime::StaticRTInput StaticInit(
4723             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
4724             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
4725             StaticChunked ? Chunk : nullptr);
4726         RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
4727                                     StaticInit);
4728         JumpDest LoopExit =
4729             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
4730         // UB = min(UB, GlobalUB);
4731         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4732                             ? S.getCombinedEnsureUpperBound()
4733                             : S.getEnsureUpperBound());
4734         // IV = LB;
4735         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4736                             ? S.getCombinedInit()
4737                             : S.getInit());
4738 
4739         const Expr *Cond =
4740             isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4741                 ? S.getCombinedCond()
4742                 : S.getCond();
4743 
4744         if (StaticChunked)
4745           Cond = S.getCombinedDistCond();
4746 
4747         // For static unchunked schedules generate:
4748         //
4749         //  1. For distribute alone, codegen
4750         //    while (idx <= UB) {
4751         //      BODY;
4752         //      ++idx;
4753         //    }
4754         //
4755         //  2. When combined with 'for' (e.g. as in 'distribute parallel for')
4756         //    while (idx <= UB) {
4757         //      <CodeGen rest of pragma>(LB, UB);
4758         //      idx += ST;
4759         //    }
4760         //
4761         // For static chunk one schedule generate:
4762         //
4763         // while (IV <= GlobalUB) {
4764         //   <CodeGen rest of pragma>(LB, UB);
4765         //   LB += ST;
4766         //   UB += ST;
4767         //   UB = min(UB, GlobalUB);
4768         //   IV = LB;
4769         // }
4770         //
4771         emitCommonSimdLoop(
4772             *this, S,
4773             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4774               if (isOpenMPSimdDirective(S.getDirectiveKind()))
4775                 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
4776             },
4777             [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
4778              StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
4779               CGF.EmitOMPInnerLoop(
4780                   S, LoopScope.requiresCleanups(), Cond, IncExpr,
4781                   [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
4782                     CodeGenLoop(CGF, S, LoopExit);
4783                   },
4784                   [&S, StaticChunked](CodeGenFunction &CGF) {
4785                     if (StaticChunked) {
4786                       CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
4787                       CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
4788                       CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
4789                       CGF.EmitIgnoredExpr(S.getCombinedInit());
4790                     }
4791                   });
4792             });
4793         EmitBlock(LoopExit.getBlock());
4794         // Tell the runtime we are done.
4795         RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
4796       } else {
4797         // Emit the outer loop, which requests its work chunk [LB..UB] from
4798         // runtime and runs the inner loop to process it.
4799         const OMPLoopArguments LoopArguments = {
4800             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
4801             IL.getAddress(*this), Chunk};
4802         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
4803                                    CodeGenLoop);
4804       }
4805       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
4806         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
4807           return CGF.Builder.CreateIsNotNull(
4808               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4809         });
4810       }
4811       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4812           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4813           !isOpenMPTeamsDirective(S.getDirectiveKind())) {
4814         EmitOMPReductionClauseFinal(S, OMPD_simd);
4815         // Emit post-update of the reduction variables if IsLastIter != 0.
4816         emitPostUpdateForReductionClause(
4817             *this, S, [IL, &S](CodeGenFunction &CGF) {
4818               return CGF.Builder.CreateIsNotNull(
4819                   CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4820             });
4821       }
4822       // Emit final copy of the lastprivate variables if IsLastIter != 0.
4823       if (HasLastprivateClause) {
4824         EmitOMPLastprivateClauseFinal(
4825             S, /*NoFinals=*/false,
4826             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
4827       }
4828     }
4829 
4830     // We're now done with the loop, so jump to the continuation block.
4831     if (ContBlock) {
4832       EmitBranch(ContBlock);
4833       EmitBlock(ContBlock, true);
4834     }
4835   }
4836 }
4837 
4838 void CodeGenFunction::EmitOMPDistributeDirective(
4839     const OMPDistributeDirective &S) {
4840   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4841     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4842   };
4843   OMPLexicalScope Scope(*this, S, OMPD_unknown);
4844   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
4845 }
4846 
4847 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
4848                                                    const CapturedStmt *S,
4849                                                    SourceLocation Loc) {
4850   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
4851   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
4852   CGF.CapturedStmtInfo = &CapStmtInfo;
4853   llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
4854   Fn->setDoesNotRecurse();
4855   return Fn;
4856 }
4857 
4858 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
4859   if (S.hasClausesOfKind<OMPDependClause>()) {
4860     assert(!S.hasAssociatedStmt() &&
4861            "No associated statement must be in ordered depend construct.");
4862     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
4863       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
4864     return;
4865   }
4866   const auto *C = S.getSingleClause<OMPSIMDClause>();
4867   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
4868                                  PrePostActionTy &Action) {
4869     const CapturedStmt *CS = S.getInnermostCapturedStmt();
4870     if (C) {
4871       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4872       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4873       llvm::Function *OutlinedFn =
4874           emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
4875       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
4876                                                       OutlinedFn, CapturedVars);
4877     } else {
4878       Action.Enter(CGF);
4879       CGF.EmitStmt(CS->getCapturedStmt());
4880     }
4881   };
4882   OMPLexicalScope Scope(*this, S, OMPD_unknown);
4883   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
4884 }
4885 
4886 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
4887                                          QualType SrcType, QualType DestType,
4888                                          SourceLocation Loc) {
4889   assert(CGF.hasScalarEvaluationKind(DestType) &&
4890          "DestType must have scalar evaluation kind.");
4891   assert(!Val.isAggregate() && "Must be a scalar or complex.");
4892   return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
4893                                                    DestType, Loc)
4894                         : CGF.EmitComplexToScalarConversion(
4895                               Val.getComplexVal(), SrcType, DestType, Loc);
4896 }
4897 
4898 static CodeGenFunction::ComplexPairTy
4899 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
4900                       QualType DestType, SourceLocation Loc) {
4901   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
4902          "DestType must have complex evaluation kind.");
4903   CodeGenFunction::ComplexPairTy ComplexVal;
4904   if (Val.isScalar()) {
4905     // Convert the input element to the element type of the complex.
4906     QualType DestElementType =
4907         DestType->castAs<ComplexType>()->getElementType();
4908     llvm::Value *ScalarVal = CGF.EmitScalarConversion(
4909         Val.getScalarVal(), SrcType, DestElementType, Loc);
4910     ComplexVal = CodeGenFunction::ComplexPairTy(
4911         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
4912   } else {
4913     assert(Val.isComplex() && "Must be a scalar or complex.");
4914     QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
4915     QualType DestElementType =
4916         DestType->castAs<ComplexType>()->getElementType();
4917     ComplexVal.first = CGF.EmitScalarConversion(
4918         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
4919     ComplexVal.second = CGF.EmitScalarConversion(
4920         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
4921   }
4922   return ComplexVal;
4923 }
4924 
4925 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4926                                   LValue LVal, RValue RVal) {
4927   if (LVal.isGlobalReg())
4928     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
4929   else
4930     CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
4931 }
4932 
4933 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
4934                                    llvm::AtomicOrdering AO, LValue LVal,
4935                                    SourceLocation Loc) {
4936   if (LVal.isGlobalReg())
4937     return CGF.EmitLoadOfLValue(LVal, Loc);
4938   return CGF.EmitAtomicLoad(
4939       LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
4940       LVal.isVolatile());
4941 }
4942 
4943 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
4944                                          QualType RValTy, SourceLocation Loc) {
4945   switch (getEvaluationKind(LVal.getType())) {
4946   case TEK_Scalar:
4947     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
4948                                *this, RVal, RValTy, LVal.getType(), Loc)),
4949                            LVal);
4950     break;
4951   case TEK_Complex:
4952     EmitStoreOfComplex(
4953         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
4954         /*isInit=*/false);
4955     break;
4956   case TEK_Aggregate:
4957     llvm_unreachable("Must be a scalar or complex.");
4958   }
4959 }
4960 
4961 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4962                                   const Expr *X, const Expr *V,
4963                                   SourceLocation Loc) {
4964   // v = x;
4965   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
4966   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
4967   LValue XLValue = CGF.EmitLValue(X);
4968   LValue VLValue = CGF.EmitLValue(V);
4969   RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
4970   // OpenMP, 2.17.7, atomic Construct
4971   // If the read or capture clause is specified and the acquire, acq_rel, or
4972   // seq_cst clause is specified then the strong flush on exit from the atomic
4973   // operation is also an acquire flush.
4974   switch (AO) {
4975   case llvm::AtomicOrdering::Acquire:
4976   case llvm::AtomicOrdering::AcquireRelease:
4977   case llvm::AtomicOrdering::SequentiallyConsistent:
4978     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4979                                          llvm::AtomicOrdering::Acquire);
4980     break;
4981   case llvm::AtomicOrdering::Monotonic:
4982   case llvm::AtomicOrdering::Release:
4983     break;
4984   case llvm::AtomicOrdering::NotAtomic:
4985   case llvm::AtomicOrdering::Unordered:
4986     llvm_unreachable("Unexpected ordering.");
4987   }
4988   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
4989   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
4990 }
4991 
4992 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
4993                                    llvm::AtomicOrdering AO, const Expr *X,
4994                                    const Expr *E, SourceLocation Loc) {
4995   // x = expr;
4996   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
4997   emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
4998   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
4999   // OpenMP, 2.17.7, atomic Construct
5000   // If the write, update, or capture clause is specified and the release,
5001   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5002   // the atomic operation is also a release flush.
5003   switch (AO) {
5004   case llvm::AtomicOrdering::Release:
5005   case llvm::AtomicOrdering::AcquireRelease:
5006   case llvm::AtomicOrdering::SequentiallyConsistent:
5007     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5008                                          llvm::AtomicOrdering::Release);
5009     break;
5010   case llvm::AtomicOrdering::Acquire:
5011   case llvm::AtomicOrdering::Monotonic:
5012     break;
5013   case llvm::AtomicOrdering::NotAtomic:
5014   case llvm::AtomicOrdering::Unordered:
5015     llvm_unreachable("Unexpected ordering.");
5016   }
5017 }
5018 
5019 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
5020                                                 RValue Update,
5021                                                 BinaryOperatorKind BO,
5022                                                 llvm::AtomicOrdering AO,
5023                                                 bool IsXLHSInRHSPart) {
5024   ASTContext &Context = CGF.getContext();
5025   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
5026   // expression is simple and atomic is allowed for the given type for the
5027   // target platform.
5028   if (BO == BO_Comma || !Update.isScalar() ||
5029       !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
5030       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
5031        (Update.getScalarVal()->getType() !=
5032         X.getAddress(CGF).getElementType())) ||
5033       !X.getAddress(CGF).getElementType()->isIntegerTy() ||
5034       !Context.getTargetInfo().hasBuiltinAtomic(
5035           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
5036     return std::make_pair(false, RValue::get(nullptr));
5037 
5038   llvm::AtomicRMWInst::BinOp RMWOp;
5039   switch (BO) {
5040   case BO_Add:
5041     RMWOp = llvm::AtomicRMWInst::Add;
5042     break;
5043   case BO_Sub:
5044     if (!IsXLHSInRHSPart)
5045       return std::make_pair(false, RValue::get(nullptr));
5046     RMWOp = llvm::AtomicRMWInst::Sub;
5047     break;
5048   case BO_And:
5049     RMWOp = llvm::AtomicRMWInst::And;
5050     break;
5051   case BO_Or:
5052     RMWOp = llvm::AtomicRMWInst::Or;
5053     break;
5054   case BO_Xor:
5055     RMWOp = llvm::AtomicRMWInst::Xor;
5056     break;
5057   case BO_LT:
5058     RMWOp = X.getType()->hasSignedIntegerRepresentation()
5059                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
5060                                    : llvm::AtomicRMWInst::Max)
5061                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
5062                                    : llvm::AtomicRMWInst::UMax);
5063     break;
5064   case BO_GT:
5065     RMWOp = X.getType()->hasSignedIntegerRepresentation()
5066                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
5067                                    : llvm::AtomicRMWInst::Min)
5068                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
5069                                    : llvm::AtomicRMWInst::UMin);
5070     break;
5071   case BO_Assign:
5072     RMWOp = llvm::AtomicRMWInst::Xchg;
5073     break;
5074   case BO_Mul:
5075   case BO_Div:
5076   case BO_Rem:
5077   case BO_Shl:
5078   case BO_Shr:
5079   case BO_LAnd:
5080   case BO_LOr:
5081     return std::make_pair(false, RValue::get(nullptr));
5082   case BO_PtrMemD:
5083   case BO_PtrMemI:
5084   case BO_LE:
5085   case BO_GE:
5086   case BO_EQ:
5087   case BO_NE:
5088   case BO_Cmp:
5089   case BO_AddAssign:
5090   case BO_SubAssign:
5091   case BO_AndAssign:
5092   case BO_OrAssign:
5093   case BO_XorAssign:
5094   case BO_MulAssign:
5095   case BO_DivAssign:
5096   case BO_RemAssign:
5097   case BO_ShlAssign:
5098   case BO_ShrAssign:
5099   case BO_Comma:
5100     llvm_unreachable("Unsupported atomic update operation");
5101   }
5102   llvm::Value *UpdateVal = Update.getScalarVal();
5103   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
5104     UpdateVal = CGF.Builder.CreateIntCast(
5105         IC, X.getAddress(CGF).getElementType(),
5106         X.getType()->hasSignedIntegerRepresentation());
5107   }
5108   llvm::Value *Res =
5109       CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
5110   return std::make_pair(true, RValue::get(Res));
5111 }
5112 
5113 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
5114     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
5115     llvm::AtomicOrdering AO, SourceLocation Loc,
5116     const llvm::function_ref<RValue(RValue)> CommonGen) {
5117   // Update expressions are allowed to have the following forms:
5118   // x binop= expr; -> xrval + expr;
5119   // x++, ++x -> xrval + 1;
5120   // x--, --x -> xrval - 1;
5121   // x = x binop expr; -> xrval binop expr
5122   // x = expr Op x; - > expr binop xrval;
5123   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
5124   if (!Res.first) {
5125     if (X.isGlobalReg()) {
5126       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
5127       // 'xrval'.
5128       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
5129     } else {
5130       // Perform compare-and-swap procedure.
5131       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
5132     }
5133   }
5134   return Res;
5135 }
5136 
5137 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
5138                                     llvm::AtomicOrdering AO, const Expr *X,
5139                                     const Expr *E, const Expr *UE,
5140                                     bool IsXLHSInRHSPart, SourceLocation Loc) {
5141   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5142          "Update expr in 'atomic update' must be a binary operator.");
5143   const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5144   // Update expressions are allowed to have the following forms:
5145   // x binop= expr; -> xrval + expr;
5146   // x++, ++x -> xrval + 1;
5147   // x--, --x -> xrval - 1;
5148   // x = x binop expr; -> xrval binop expr
5149   // x = expr Op x; - > expr binop xrval;
5150   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
5151   LValue XLValue = CGF.EmitLValue(X);
5152   RValue ExprRValue = CGF.EmitAnyExpr(E);
5153   const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5154   const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5155   const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5156   const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5157   auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
5158     CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5159     CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5160     return CGF.EmitAnyExpr(UE);
5161   };
5162   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
5163       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5164   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5165   // OpenMP, 2.17.7, atomic Construct
5166   // If the write, update, or capture clause is specified and the release,
5167   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5168   // the atomic operation is also a release flush.
5169   switch (AO) {
5170   case llvm::AtomicOrdering::Release:
5171   case llvm::AtomicOrdering::AcquireRelease:
5172   case llvm::AtomicOrdering::SequentiallyConsistent:
5173     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5174                                          llvm::AtomicOrdering::Release);
5175     break;
5176   case llvm::AtomicOrdering::Acquire:
5177   case llvm::AtomicOrdering::Monotonic:
5178     break;
5179   case llvm::AtomicOrdering::NotAtomic:
5180   case llvm::AtomicOrdering::Unordered:
5181     llvm_unreachable("Unexpected ordering.");
5182   }
5183 }
5184 
5185 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
5186                             QualType SourceType, QualType ResType,
5187                             SourceLocation Loc) {
5188   switch (CGF.getEvaluationKind(ResType)) {
5189   case TEK_Scalar:
5190     return RValue::get(
5191         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
5192   case TEK_Complex: {
5193     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
5194     return RValue::getComplex(Res.first, Res.second);
5195   }
5196   case TEK_Aggregate:
5197     break;
5198   }
5199   llvm_unreachable("Must be a scalar or complex.");
5200 }
5201 
5202 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
5203                                      llvm::AtomicOrdering AO,
5204                                      bool IsPostfixUpdate, const Expr *V,
5205                                      const Expr *X, const Expr *E,
5206                                      const Expr *UE, bool IsXLHSInRHSPart,
5207                                      SourceLocation Loc) {
5208   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
5209   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
5210   RValue NewVVal;
5211   LValue VLValue = CGF.EmitLValue(V);
5212   LValue XLValue = CGF.EmitLValue(X);
5213   RValue ExprRValue = CGF.EmitAnyExpr(E);
5214   QualType NewVValType;
5215   if (UE) {
5216     // 'x' is updated with some additional value.
5217     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5218            "Update expr in 'atomic capture' must be a binary operator.");
5219     const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5220     // Update expressions are allowed to have the following forms:
5221     // x binop= expr; -> xrval + expr;
5222     // x++, ++x -> xrval + 1;
5223     // x--, --x -> xrval - 1;
5224     // x = x binop expr; -> xrval binop expr
5225     // x = expr Op x; - > expr binop xrval;
5226     const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5227     const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5228     const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5229     NewVValType = XRValExpr->getType();
5230     const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5231     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
5232                   IsPostfixUpdate](RValue XRValue) {
5233       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5234       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5235       RValue Res = CGF.EmitAnyExpr(UE);
5236       NewVVal = IsPostfixUpdate ? XRValue : Res;
5237       return Res;
5238     };
5239     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5240         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5241     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5242     if (Res.first) {
5243       // 'atomicrmw' instruction was generated.
5244       if (IsPostfixUpdate) {
5245         // Use old value from 'atomicrmw'.
5246         NewVVal = Res.second;
5247       } else {
5248         // 'atomicrmw' does not provide new value, so evaluate it using old
5249         // value of 'x'.
5250         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5251         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
5252         NewVVal = CGF.EmitAnyExpr(UE);
5253       }
5254     }
5255   } else {
5256     // 'x' is simply rewritten with some 'expr'.
5257     NewVValType = X->getType().getNonReferenceType();
5258     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
5259                                X->getType().getNonReferenceType(), Loc);
5260     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
5261       NewVVal = XRValue;
5262       return ExprRValue;
5263     };
5264     // Try to perform atomicrmw xchg, otherwise simple exchange.
5265     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5266         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
5267         Loc, Gen);
5268     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5269     if (Res.first) {
5270       // 'atomicrmw' instruction was generated.
5271       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
5272     }
5273   }
5274   // Emit post-update store to 'v' of old/new 'x' value.
5275   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
5276   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
5277   // OpenMP, 2.17.7, atomic Construct
5278   // If the write, update, or capture clause is specified and the release,
5279   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5280   // the atomic operation is also a release flush.
5281   // If the read or capture clause is specified and the acquire, acq_rel, or
5282   // seq_cst clause is specified then the strong flush on exit from the atomic
5283   // operation is also an acquire flush.
5284   switch (AO) {
5285   case llvm::AtomicOrdering::Release:
5286     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5287                                          llvm::AtomicOrdering::Release);
5288     break;
5289   case llvm::AtomicOrdering::Acquire:
5290     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5291                                          llvm::AtomicOrdering::Acquire);
5292     break;
5293   case llvm::AtomicOrdering::AcquireRelease:
5294   case llvm::AtomicOrdering::SequentiallyConsistent:
5295     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5296                                          llvm::AtomicOrdering::AcquireRelease);
5297     break;
5298   case llvm::AtomicOrdering::Monotonic:
5299     break;
5300   case llvm::AtomicOrdering::NotAtomic:
5301   case llvm::AtomicOrdering::Unordered:
5302     llvm_unreachable("Unexpected ordering.");
5303   }
5304 }
5305 
5306 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
5307                               llvm::AtomicOrdering AO, bool IsPostfixUpdate,
5308                               const Expr *X, const Expr *V, const Expr *E,
5309                               const Expr *UE, bool IsXLHSInRHSPart,
5310                               SourceLocation Loc) {
5311   switch (Kind) {
5312   case OMPC_read:
5313     emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
5314     break;
5315   case OMPC_write:
5316     emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
5317     break;
5318   case OMPC_unknown:
5319   case OMPC_update:
5320     emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
5321     break;
5322   case OMPC_capture:
5323     emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
5324                              IsXLHSInRHSPart, Loc);
5325     break;
5326   case OMPC_if:
5327   case OMPC_final:
5328   case OMPC_num_threads:
5329   case OMPC_private:
5330   case OMPC_firstprivate:
5331   case OMPC_lastprivate:
5332   case OMPC_reduction:
5333   case OMPC_task_reduction:
5334   case OMPC_in_reduction:
5335   case OMPC_safelen:
5336   case OMPC_simdlen:
5337   case OMPC_allocator:
5338   case OMPC_allocate:
5339   case OMPC_collapse:
5340   case OMPC_default:
5341   case OMPC_seq_cst:
5342   case OMPC_acq_rel:
5343   case OMPC_acquire:
5344   case OMPC_release:
5345   case OMPC_relaxed:
5346   case OMPC_shared:
5347   case OMPC_linear:
5348   case OMPC_aligned:
5349   case OMPC_copyin:
5350   case OMPC_copyprivate:
5351   case OMPC_flush:
5352   case OMPC_depobj:
5353   case OMPC_proc_bind:
5354   case OMPC_schedule:
5355   case OMPC_ordered:
5356   case OMPC_nowait:
5357   case OMPC_untied:
5358   case OMPC_threadprivate:
5359   case OMPC_depend:
5360   case OMPC_mergeable:
5361   case OMPC_device:
5362   case OMPC_threads:
5363   case OMPC_simd:
5364   case OMPC_map:
5365   case OMPC_num_teams:
5366   case OMPC_thread_limit:
5367   case OMPC_priority:
5368   case OMPC_grainsize:
5369   case OMPC_nogroup:
5370   case OMPC_num_tasks:
5371   case OMPC_hint:
5372   case OMPC_dist_schedule:
5373   case OMPC_defaultmap:
5374   case OMPC_uniform:
5375   case OMPC_to:
5376   case OMPC_from:
5377   case OMPC_use_device_ptr:
5378   case OMPC_use_device_addr:
5379   case OMPC_is_device_ptr:
5380   case OMPC_unified_address:
5381   case OMPC_unified_shared_memory:
5382   case OMPC_reverse_offload:
5383   case OMPC_dynamic_allocators:
5384   case OMPC_atomic_default_mem_order:
5385   case OMPC_device_type:
5386   case OMPC_match:
5387   case OMPC_nontemporal:
5388   case OMPC_order:
5389   case OMPC_destroy:
5390   case OMPC_detach:
5391   case OMPC_inclusive:
5392   case OMPC_exclusive:
5393   case OMPC_uses_allocators:
5394   case OMPC_affinity:
5395   default:
5396     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
5397   }
5398 }
5399 
5400 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
5401   llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
5402   bool MemOrderingSpecified = false;
5403   if (S.getSingleClause<OMPSeqCstClause>()) {
5404     AO = llvm::AtomicOrdering::SequentiallyConsistent;
5405     MemOrderingSpecified = true;
5406   } else if (S.getSingleClause<OMPAcqRelClause>()) {
5407     AO = llvm::AtomicOrdering::AcquireRelease;
5408     MemOrderingSpecified = true;
5409   } else if (S.getSingleClause<OMPAcquireClause>()) {
5410     AO = llvm::AtomicOrdering::Acquire;
5411     MemOrderingSpecified = true;
5412   } else if (S.getSingleClause<OMPReleaseClause>()) {
5413     AO = llvm::AtomicOrdering::Release;
5414     MemOrderingSpecified = true;
5415   } else if (S.getSingleClause<OMPRelaxedClause>()) {
5416     AO = llvm::AtomicOrdering::Monotonic;
5417     MemOrderingSpecified = true;
5418   }
5419   OpenMPClauseKind Kind = OMPC_unknown;
5420   for (const OMPClause *C : S.clauses()) {
5421     // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
5422     // if it is first).
5423     if (C->getClauseKind() != OMPC_seq_cst &&
5424         C->getClauseKind() != OMPC_acq_rel &&
5425         C->getClauseKind() != OMPC_acquire &&
5426         C->getClauseKind() != OMPC_release &&
5427         C->getClauseKind() != OMPC_relaxed) {
5428       Kind = C->getClauseKind();
5429       break;
5430     }
5431   }
5432   if (!MemOrderingSpecified) {
5433     llvm::AtomicOrdering DefaultOrder =
5434         CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
5435     if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
5436         DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
5437         (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
5438          Kind == OMPC_capture)) {
5439       AO = DefaultOrder;
5440     } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
5441       if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
5442         AO = llvm::AtomicOrdering::Release;
5443       } else if (Kind == OMPC_read) {
5444         assert(Kind == OMPC_read && "Unexpected atomic kind.");
5445         AO = llvm::AtomicOrdering::Acquire;
5446       }
5447     }
5448   }
5449 
5450   LexicalScope Scope(*this, S.getSourceRange());
5451   EmitStopPoint(S.getAssociatedStmt());
5452   emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
5453                     S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
5454                     S.getBeginLoc());
5455 }
5456 
5457 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
5458                                          const OMPExecutableDirective &S,
5459                                          const RegionCodeGenTy &CodeGen) {
5460   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
5461   CodeGenModule &CGM = CGF.CGM;
5462 
5463   // On device emit this construct as inlined code.
5464   if (CGM.getLangOpts().OpenMPIsDevice) {
5465     OMPLexicalScope Scope(CGF, S, OMPD_target);
5466     CGM.getOpenMPRuntime().emitInlinedDirective(
5467         CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5468           CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5469         });
5470     return;
5471   }
5472 
5473   auto LPCRegion =
5474       CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
5475   llvm::Function *Fn = nullptr;
5476   llvm::Constant *FnID = nullptr;
5477 
5478   const Expr *IfCond = nullptr;
5479   // Check for the at most one if clause associated with the target region.
5480   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5481     if (C->getNameModifier() == OMPD_unknown ||
5482         C->getNameModifier() == OMPD_target) {
5483       IfCond = C->getCondition();
5484       break;
5485     }
5486   }
5487 
5488   // Check if we have any device clause associated with the directive.
5489   llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
5490       nullptr, OMPC_DEVICE_unknown);
5491   if (auto *C = S.getSingleClause<OMPDeviceClause>())
5492     Device.setPointerAndInt(C->getDevice(), C->getModifier());
5493 
5494   // Check if we have an if clause whose conditional always evaluates to false
5495   // or if we do not have any targets specified. If so the target region is not
5496   // an offload entry point.
5497   bool IsOffloadEntry = true;
5498   if (IfCond) {
5499     bool Val;
5500     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
5501       IsOffloadEntry = false;
5502   }
5503   if (CGM.getLangOpts().OMPTargetTriples.empty())
5504     IsOffloadEntry = false;
5505 
5506   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
5507   StringRef ParentName;
5508   // In case we have Ctors/Dtors we use the complete type variant to produce
5509   // the mangling of the device outlined kernel.
5510   if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
5511     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
5512   else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
5513     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
5514   else
5515     ParentName =
5516         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
5517 
5518   // Emit target region as a standalone region.
5519   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
5520                                                     IsOffloadEntry, CodeGen);
5521   OMPLexicalScope Scope(CGF, S, OMPD_task);
5522   auto &&SizeEmitter =
5523       [IsOffloadEntry](CodeGenFunction &CGF,
5524                        const OMPLoopDirective &D) -> llvm::Value * {
5525     if (IsOffloadEntry) {
5526       OMPLoopScope(CGF, D);
5527       // Emit calculation of the iterations count.
5528       llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
5529       NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
5530                                                 /*isSigned=*/false);
5531       return NumIterations;
5532     }
5533     return nullptr;
5534   };
5535   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
5536                                         SizeEmitter);
5537 }
5538 
5539 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
5540                              PrePostActionTy &Action) {
5541   Action.Enter(CGF);
5542   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5543   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5544   CGF.EmitOMPPrivateClause(S, PrivateScope);
5545   (void)PrivateScope.Privatize();
5546   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5547     CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
5548 
5549   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
5550 }
5551 
5552 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
5553                                                   StringRef ParentName,
5554                                                   const OMPTargetDirective &S) {
5555   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5556     emitTargetRegion(CGF, S, Action);
5557   };
5558   llvm::Function *Fn;
5559   llvm::Constant *Addr;
5560   // Emit target region as a standalone region.
5561   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5562       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5563   assert(Fn && Addr && "Target device function emission failed.");
5564 }
5565 
5566 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
5567   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5568     emitTargetRegion(CGF, S, Action);
5569   };
5570   emitCommonOMPTargetDirective(*this, S, CodeGen);
5571 }
5572 
5573 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
5574                                         const OMPExecutableDirective &S,
5575                                         OpenMPDirectiveKind InnermostKind,
5576                                         const RegionCodeGenTy &CodeGen) {
5577   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
5578   llvm::Function *OutlinedFn =
5579       CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
5580           S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
5581 
5582   const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
5583   const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5584   if (NT || TL) {
5585     const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
5586     const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
5587 
5588     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
5589                                                   S.getBeginLoc());
5590   }
5591 
5592   OMPTeamsScope Scope(CGF, S);
5593   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5594   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5595   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
5596                                            CapturedVars);
5597 }
5598 
5599 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
5600   // Emit teams region as a standalone region.
5601   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5602     Action.Enter(CGF);
5603     OMPPrivateScope PrivateScope(CGF);
5604     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5605     CGF.EmitOMPPrivateClause(S, PrivateScope);
5606     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5607     (void)PrivateScope.Privatize();
5608     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
5609     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5610   };
5611   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
5612   emitPostUpdateForReductionClause(*this, S,
5613                                    [](CodeGenFunction &) { return nullptr; });
5614 }
5615 
5616 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
5617                                   const OMPTargetTeamsDirective &S) {
5618   auto *CS = S.getCapturedStmt(OMPD_teams);
5619   Action.Enter(CGF);
5620   // Emit teams region as a standalone region.
5621   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
5622     Action.Enter(CGF);
5623     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5624     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5625     CGF.EmitOMPPrivateClause(S, PrivateScope);
5626     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5627     (void)PrivateScope.Privatize();
5628     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5629       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
5630     CGF.EmitStmt(CS->getCapturedStmt());
5631     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5632   };
5633   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
5634   emitPostUpdateForReductionClause(CGF, S,
5635                                    [](CodeGenFunction &) { return nullptr; });
5636 }
5637 
5638 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
5639     CodeGenModule &CGM, StringRef ParentName,
5640     const OMPTargetTeamsDirective &S) {
5641   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5642     emitTargetTeamsRegion(CGF, Action, S);
5643   };
5644   llvm::Function *Fn;
5645   llvm::Constant *Addr;
5646   // Emit target region as a standalone region.
5647   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5648       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5649   assert(Fn && Addr && "Target device function emission failed.");
5650 }
5651 
5652 void CodeGenFunction::EmitOMPTargetTeamsDirective(
5653     const OMPTargetTeamsDirective &S) {
5654   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5655     emitTargetTeamsRegion(CGF, Action, S);
5656   };
5657   emitCommonOMPTargetDirective(*this, S, CodeGen);
5658 }
5659 
5660 static void
5661 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
5662                                 const OMPTargetTeamsDistributeDirective &S) {
5663   Action.Enter(CGF);
5664   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5665     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5666   };
5667 
5668   // Emit teams region as a standalone region.
5669   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5670                                             PrePostActionTy &Action) {
5671     Action.Enter(CGF);
5672     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5673     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5674     (void)PrivateScope.Privatize();
5675     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5676                                                     CodeGenDistribute);
5677     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5678   };
5679   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
5680   emitPostUpdateForReductionClause(CGF, S,
5681                                    [](CodeGenFunction &) { return nullptr; });
5682 }
5683 
5684 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
5685     CodeGenModule &CGM, StringRef ParentName,
5686     const OMPTargetTeamsDistributeDirective &S) {
5687   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5688     emitTargetTeamsDistributeRegion(CGF, Action, S);
5689   };
5690   llvm::Function *Fn;
5691   llvm::Constant *Addr;
5692   // Emit target region as a standalone region.
5693   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5694       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5695   assert(Fn && Addr && "Target device function emission failed.");
5696 }
5697 
5698 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
5699     const OMPTargetTeamsDistributeDirective &S) {
5700   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5701     emitTargetTeamsDistributeRegion(CGF, Action, S);
5702   };
5703   emitCommonOMPTargetDirective(*this, S, CodeGen);
5704 }
5705 
5706 static void emitTargetTeamsDistributeSimdRegion(
5707     CodeGenFunction &CGF, PrePostActionTy &Action,
5708     const OMPTargetTeamsDistributeSimdDirective &S) {
5709   Action.Enter(CGF);
5710   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5711     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5712   };
5713 
5714   // Emit teams region as a standalone region.
5715   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5716                                             PrePostActionTy &Action) {
5717     Action.Enter(CGF);
5718     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5719     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5720     (void)PrivateScope.Privatize();
5721     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5722                                                     CodeGenDistribute);
5723     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5724   };
5725   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
5726   emitPostUpdateForReductionClause(CGF, S,
5727                                    [](CodeGenFunction &) { return nullptr; });
5728 }
5729 
5730 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
5731     CodeGenModule &CGM, StringRef ParentName,
5732     const OMPTargetTeamsDistributeSimdDirective &S) {
5733   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5734     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
5735   };
5736   llvm::Function *Fn;
5737   llvm::Constant *Addr;
5738   // Emit target region as a standalone region.
5739   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5740       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5741   assert(Fn && Addr && "Target device function emission failed.");
5742 }
5743 
5744 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
5745     const OMPTargetTeamsDistributeSimdDirective &S) {
5746   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5747     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
5748   };
5749   emitCommonOMPTargetDirective(*this, S, CodeGen);
5750 }
5751 
5752 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
5753     const OMPTeamsDistributeDirective &S) {
5754 
5755   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5756     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5757   };
5758 
5759   // Emit teams region as a standalone region.
5760   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5761                                             PrePostActionTy &Action) {
5762     Action.Enter(CGF);
5763     OMPPrivateScope PrivateScope(CGF);
5764     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5765     (void)PrivateScope.Privatize();
5766     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5767                                                     CodeGenDistribute);
5768     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5769   };
5770   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
5771   emitPostUpdateForReductionClause(*this, S,
5772                                    [](CodeGenFunction &) { return nullptr; });
5773 }
5774 
5775 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
5776     const OMPTeamsDistributeSimdDirective &S) {
5777   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5778     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5779   };
5780 
5781   // Emit teams region as a standalone region.
5782   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5783                                             PrePostActionTy &Action) {
5784     Action.Enter(CGF);
5785     OMPPrivateScope PrivateScope(CGF);
5786     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5787     (void)PrivateScope.Privatize();
5788     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
5789                                                     CodeGenDistribute);
5790     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5791   };
5792   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
5793   emitPostUpdateForReductionClause(*this, S,
5794                                    [](CodeGenFunction &) { return nullptr; });
5795 }
5796 
5797 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
5798     const OMPTeamsDistributeParallelForDirective &S) {
5799   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5800     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5801                               S.getDistInc());
5802   };
5803 
5804   // Emit teams region as a standalone region.
5805   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5806                                             PrePostActionTy &Action) {
5807     Action.Enter(CGF);
5808     OMPPrivateScope PrivateScope(CGF);
5809     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5810     (void)PrivateScope.Privatize();
5811     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5812                                                     CodeGenDistribute);
5813     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5814   };
5815   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
5816   emitPostUpdateForReductionClause(*this, S,
5817                                    [](CodeGenFunction &) { return nullptr; });
5818 }
5819 
5820 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
5821     const OMPTeamsDistributeParallelForSimdDirective &S) {
5822   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5823     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5824                               S.getDistInc());
5825   };
5826 
5827   // Emit teams region as a standalone region.
5828   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5829                                             PrePostActionTy &Action) {
5830     Action.Enter(CGF);
5831     OMPPrivateScope PrivateScope(CGF);
5832     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5833     (void)PrivateScope.Privatize();
5834     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5835         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5836     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5837   };
5838   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
5839                               CodeGen);
5840   emitPostUpdateForReductionClause(*this, S,
5841                                    [](CodeGenFunction &) { return nullptr; });
5842 }
5843 
5844 static void emitTargetTeamsDistributeParallelForRegion(
5845     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
5846     PrePostActionTy &Action) {
5847   Action.Enter(CGF);
5848   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5849     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5850                               S.getDistInc());
5851   };
5852 
5853   // Emit teams region as a standalone region.
5854   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5855                                                  PrePostActionTy &Action) {
5856     Action.Enter(CGF);
5857     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5858     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5859     (void)PrivateScope.Privatize();
5860     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5861         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5862     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5863   };
5864 
5865   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
5866                               CodeGenTeams);
5867   emitPostUpdateForReductionClause(CGF, S,
5868                                    [](CodeGenFunction &) { return nullptr; });
5869 }
5870 
5871 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
5872     CodeGenModule &CGM, StringRef ParentName,
5873     const OMPTargetTeamsDistributeParallelForDirective &S) {
5874   // Emit SPMD target teams distribute parallel for region as a standalone
5875   // region.
5876   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5877     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
5878   };
5879   llvm::Function *Fn;
5880   llvm::Constant *Addr;
5881   // Emit target region as a standalone region.
5882   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5883       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5884   assert(Fn && Addr && "Target device function emission failed.");
5885 }
5886 
5887 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
5888     const OMPTargetTeamsDistributeParallelForDirective &S) {
5889   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5890     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
5891   };
5892   emitCommonOMPTargetDirective(*this, S, CodeGen);
5893 }
5894 
5895 static void emitTargetTeamsDistributeParallelForSimdRegion(
5896     CodeGenFunction &CGF,
5897     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
5898     PrePostActionTy &Action) {
5899   Action.Enter(CGF);
5900   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5901     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5902                               S.getDistInc());
5903   };
5904 
5905   // Emit teams region as a standalone region.
5906   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5907                                                  PrePostActionTy &Action) {
5908     Action.Enter(CGF);
5909     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5910     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5911     (void)PrivateScope.Privatize();
5912     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5913         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5914     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5915   };
5916 
5917   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
5918                               CodeGenTeams);
5919   emitPostUpdateForReductionClause(CGF, S,
5920                                    [](CodeGenFunction &) { return nullptr; });
5921 }
5922 
5923 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
5924     CodeGenModule &CGM, StringRef ParentName,
5925     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
5926   // Emit SPMD target teams distribute parallel for simd region as a standalone
5927   // region.
5928   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5929     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
5930   };
5931   llvm::Function *Fn;
5932   llvm::Constant *Addr;
5933   // Emit target region as a standalone region.
5934   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5935       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5936   assert(Fn && Addr && "Target device function emission failed.");
5937 }
5938 
5939 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
5940     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
5941   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5942     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
5943   };
5944   emitCommonOMPTargetDirective(*this, S, CodeGen);
5945 }
5946 
5947 void CodeGenFunction::EmitOMPCancellationPointDirective(
5948     const OMPCancellationPointDirective &S) {
5949   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
5950                                                    S.getCancelRegion());
5951 }
5952 
5953 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
5954   const Expr *IfCond = nullptr;
5955   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5956     if (C->getNameModifier() == OMPD_unknown ||
5957         C->getNameModifier() == OMPD_cancel) {
5958       IfCond = C->getCondition();
5959       break;
5960     }
5961   }
5962   if (CGM.getLangOpts().OpenMPIRBuilder) {
5963     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5964     // TODO: This check is necessary as we only generate `omp parallel` through
5965     // the OpenMPIRBuilder for now.
5966     if (S.getCancelRegion() == OMPD_parallel) {
5967       llvm::Value *IfCondition = nullptr;
5968       if (IfCond)
5969         IfCondition = EmitScalarExpr(IfCond,
5970                                      /*IgnoreResultAssign=*/true);
5971       return Builder.restoreIP(
5972           OMPBuilder.CreateCancel(Builder, IfCondition, S.getCancelRegion()));
5973     }
5974   }
5975 
5976   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
5977                                         S.getCancelRegion());
5978 }
5979 
5980 CodeGenFunction::JumpDest
5981 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
5982   if (Kind == OMPD_parallel || Kind == OMPD_task ||
5983       Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
5984       Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
5985     return ReturnBlock;
5986   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
5987          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
5988          Kind == OMPD_distribute_parallel_for ||
5989          Kind == OMPD_target_parallel_for ||
5990          Kind == OMPD_teams_distribute_parallel_for ||
5991          Kind == OMPD_target_teams_distribute_parallel_for);
5992   return OMPCancelStack.getExitBlock();
5993 }
5994 
5995 void CodeGenFunction::EmitOMPUseDevicePtrClause(
5996     const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
5997     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
5998   auto OrigVarIt = C.varlist_begin();
5999   auto InitIt = C.inits().begin();
6000   for (const Expr *PvtVarIt : C.private_copies()) {
6001     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
6002     const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
6003     const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
6004 
6005     // In order to identify the right initializer we need to match the
6006     // declaration used by the mapping logic. In some cases we may get
6007     // OMPCapturedExprDecl that refers to the original declaration.
6008     const ValueDecl *MatchingVD = OrigVD;
6009     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6010       // OMPCapturedExprDecl are used to privative fields of the current
6011       // structure.
6012       const auto *ME = cast<MemberExpr>(OED->getInit());
6013       assert(isa<CXXThisExpr>(ME->getBase()) &&
6014              "Base should be the current struct!");
6015       MatchingVD = ME->getMemberDecl();
6016     }
6017 
6018     // If we don't have information about the current list item, move on to
6019     // the next one.
6020     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6021     if (InitAddrIt == CaptureDeviceAddrMap.end())
6022       continue;
6023 
6024     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
6025                                                          InitAddrIt, InitVD,
6026                                                          PvtVD]() {
6027       // Initialize the temporary initialization variable with the address we
6028       // get from the runtime library. We have to cast the source address
6029       // because it is always a void *. References are materialized in the
6030       // privatization scope, so the initialization here disregards the fact
6031       // the original variable is a reference.
6032       QualType AddrQTy =
6033           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
6034       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
6035       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
6036       setAddrOfLocalVar(InitVD, InitAddr);
6037 
6038       // Emit private declaration, it will be initialized by the value we
6039       // declaration we just added to the local declarations map.
6040       EmitDecl(*PvtVD);
6041 
6042       // The initialization variables reached its purpose in the emission
6043       // of the previous declaration, so we don't need it anymore.
6044       LocalDeclMap.erase(InitVD);
6045 
6046       // Return the address of the private variable.
6047       return GetAddrOfLocalVar(PvtVD);
6048     });
6049     assert(IsRegistered && "firstprivate var already registered as private");
6050     // Silence the warning about unused variable.
6051     (void)IsRegistered;
6052 
6053     ++OrigVarIt;
6054     ++InitIt;
6055   }
6056 }
6057 
6058 static const VarDecl *getBaseDecl(const Expr *Ref) {
6059   const Expr *Base = Ref->IgnoreParenImpCasts();
6060   while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
6061     Base = OASE->getBase()->IgnoreParenImpCasts();
6062   while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
6063     Base = ASE->getBase()->IgnoreParenImpCasts();
6064   return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
6065 }
6066 
6067 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
6068     const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
6069     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6070   llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
6071   for (const Expr *Ref : C.varlists()) {
6072     const VarDecl *OrigVD = getBaseDecl(Ref);
6073     if (!Processed.insert(OrigVD).second)
6074       continue;
6075     // In order to identify the right initializer we need to match the
6076     // declaration used by the mapping logic. In some cases we may get
6077     // OMPCapturedExprDecl that refers to the original declaration.
6078     const ValueDecl *MatchingVD = OrigVD;
6079     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6080       // OMPCapturedExprDecl are used to privative fields of the current
6081       // structure.
6082       const auto *ME = cast<MemberExpr>(OED->getInit());
6083       assert(isa<CXXThisExpr>(ME->getBase()) &&
6084              "Base should be the current struct!");
6085       MatchingVD = ME->getMemberDecl();
6086     }
6087 
6088     // If we don't have information about the current list item, move on to
6089     // the next one.
6090     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6091     if (InitAddrIt == CaptureDeviceAddrMap.end())
6092       continue;
6093 
6094     Address PrivAddr = InitAddrIt->getSecond();
6095     // For declrefs and variable length array need to load the pointer for
6096     // correct mapping, since the pointer to the data was passed to the runtime.
6097     if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
6098         MatchingVD->getType()->isArrayType())
6099       PrivAddr =
6100           EmitLoadOfPointer(PrivAddr, getContext()
6101                                           .getPointerType(OrigVD->getType())
6102                                           ->castAs<PointerType>());
6103     llvm::Type *RealTy =
6104         ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
6105             ->getPointerTo();
6106     PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
6107 
6108     (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
6109   }
6110 }
6111 
6112 // Generate the instructions for '#pragma omp target data' directive.
6113 void CodeGenFunction::EmitOMPTargetDataDirective(
6114     const OMPTargetDataDirective &S) {
6115   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
6116                                        /*SeparateBeginEndCalls=*/true);
6117 
6118   // Create a pre/post action to signal the privatization of the device pointer.
6119   // This action can be replaced by the OpenMP runtime code generation to
6120   // deactivate privatization.
6121   bool PrivatizeDevicePointers = false;
6122   class DevicePointerPrivActionTy : public PrePostActionTy {
6123     bool &PrivatizeDevicePointers;
6124 
6125   public:
6126     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
6127         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
6128     void Enter(CodeGenFunction &CGF) override {
6129       PrivatizeDevicePointers = true;
6130     }
6131   };
6132   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
6133 
6134   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
6135                        CodeGenFunction &CGF, PrePostActionTy &Action) {
6136     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6137       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6138     };
6139 
6140     // Codegen that selects whether to generate the privatization code or not.
6141     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
6142                           &InnermostCodeGen](CodeGenFunction &CGF,
6143                                              PrePostActionTy &Action) {
6144       RegionCodeGenTy RCG(InnermostCodeGen);
6145       PrivatizeDevicePointers = false;
6146 
6147       // Call the pre-action to change the status of PrivatizeDevicePointers if
6148       // needed.
6149       Action.Enter(CGF);
6150 
6151       if (PrivatizeDevicePointers) {
6152         OMPPrivateScope PrivateScope(CGF);
6153         // Emit all instances of the use_device_ptr clause.
6154         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
6155           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
6156                                         Info.CaptureDeviceAddrMap);
6157         for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
6158           CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
6159                                          Info.CaptureDeviceAddrMap);
6160         (void)PrivateScope.Privatize();
6161         RCG(CGF);
6162       } else {
6163         OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6164         RCG(CGF);
6165       }
6166     };
6167 
6168     // Forward the provided action to the privatization codegen.
6169     RegionCodeGenTy PrivRCG(PrivCodeGen);
6170     PrivRCG.setAction(Action);
6171 
6172     // Notwithstanding the body of the region is emitted as inlined directive,
6173     // we don't use an inline scope as changes in the references inside the
6174     // region are expected to be visible outside, so we do not privative them.
6175     OMPLexicalScope Scope(CGF, S);
6176     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
6177                                                     PrivRCG);
6178   };
6179 
6180   RegionCodeGenTy RCG(CodeGen);
6181 
6182   // If we don't have target devices, don't bother emitting the data mapping
6183   // code.
6184   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
6185     RCG(*this);
6186     return;
6187   }
6188 
6189   // Check if we have any if clause associated with the directive.
6190   const Expr *IfCond = nullptr;
6191   if (const auto *C = S.getSingleClause<OMPIfClause>())
6192     IfCond = C->getCondition();
6193 
6194   // Check if we have any device clause associated with the directive.
6195   const Expr *Device = nullptr;
6196   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6197     Device = C->getDevice();
6198 
6199   // Set the action to signal privatization of device pointers.
6200   RCG.setAction(PrivAction);
6201 
6202   // Emit region code.
6203   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
6204                                              Info);
6205 }
6206 
6207 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
6208     const OMPTargetEnterDataDirective &S) {
6209   // If we don't have target devices, don't bother emitting the data mapping
6210   // code.
6211   if (CGM.getLangOpts().OMPTargetTriples.empty())
6212     return;
6213 
6214   // Check if we have any if clause associated with the directive.
6215   const Expr *IfCond = nullptr;
6216   if (const auto *C = S.getSingleClause<OMPIfClause>())
6217     IfCond = C->getCondition();
6218 
6219   // Check if we have any device clause associated with the directive.
6220   const Expr *Device = nullptr;
6221   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6222     Device = C->getDevice();
6223 
6224   OMPLexicalScope Scope(*this, S, OMPD_task);
6225   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6226 }
6227 
6228 void CodeGenFunction::EmitOMPTargetExitDataDirective(
6229     const OMPTargetExitDataDirective &S) {
6230   // If we don't have target devices, don't bother emitting the data mapping
6231   // code.
6232   if (CGM.getLangOpts().OMPTargetTriples.empty())
6233     return;
6234 
6235   // Check if we have any if clause associated with the directive.
6236   const Expr *IfCond = nullptr;
6237   if (const auto *C = S.getSingleClause<OMPIfClause>())
6238     IfCond = C->getCondition();
6239 
6240   // Check if we have any device clause associated with the directive.
6241   const Expr *Device = nullptr;
6242   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6243     Device = C->getDevice();
6244 
6245   OMPLexicalScope Scope(*this, S, OMPD_task);
6246   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6247 }
6248 
6249 static void emitTargetParallelRegion(CodeGenFunction &CGF,
6250                                      const OMPTargetParallelDirective &S,
6251                                      PrePostActionTy &Action) {
6252   // Get the captured statement associated with the 'parallel' region.
6253   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
6254   Action.Enter(CGF);
6255   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6256     Action.Enter(CGF);
6257     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6258     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6259     CGF.EmitOMPPrivateClause(S, PrivateScope);
6260     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6261     (void)PrivateScope.Privatize();
6262     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6263       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6264     // TODO: Add support for clauses.
6265     CGF.EmitStmt(CS->getCapturedStmt());
6266     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
6267   };
6268   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
6269                                  emitEmptyBoundParameters);
6270   emitPostUpdateForReductionClause(CGF, S,
6271                                    [](CodeGenFunction &) { return nullptr; });
6272 }
6273 
6274 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
6275     CodeGenModule &CGM, StringRef ParentName,
6276     const OMPTargetParallelDirective &S) {
6277   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6278     emitTargetParallelRegion(CGF, S, Action);
6279   };
6280   llvm::Function *Fn;
6281   llvm::Constant *Addr;
6282   // Emit target region as a standalone region.
6283   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6284       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6285   assert(Fn && Addr && "Target device function emission failed.");
6286 }
6287 
6288 void CodeGenFunction::EmitOMPTargetParallelDirective(
6289     const OMPTargetParallelDirective &S) {
6290   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6291     emitTargetParallelRegion(CGF, S, Action);
6292   };
6293   emitCommonOMPTargetDirective(*this, S, CodeGen);
6294 }
6295 
6296 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
6297                                         const OMPTargetParallelForDirective &S,
6298                                         PrePostActionTy &Action) {
6299   Action.Enter(CGF);
6300   // Emit directive as a combined directive that consists of two implicit
6301   // directives: 'parallel' with 'for' directive.
6302   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6303     Action.Enter(CGF);
6304     CodeGenFunction::OMPCancelStackRAII CancelRegion(
6305         CGF, OMPD_target_parallel_for, S.hasCancel());
6306     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6307                                emitDispatchForLoopBounds);
6308   };
6309   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
6310                                  emitEmptyBoundParameters);
6311 }
6312 
6313 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
6314     CodeGenModule &CGM, StringRef ParentName,
6315     const OMPTargetParallelForDirective &S) {
6316   // Emit SPMD target parallel for region as a standalone region.
6317   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6318     emitTargetParallelForRegion(CGF, S, Action);
6319   };
6320   llvm::Function *Fn;
6321   llvm::Constant *Addr;
6322   // Emit target region as a standalone region.
6323   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6324       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6325   assert(Fn && Addr && "Target device function emission failed.");
6326 }
6327 
6328 void CodeGenFunction::EmitOMPTargetParallelForDirective(
6329     const OMPTargetParallelForDirective &S) {
6330   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6331     emitTargetParallelForRegion(CGF, S, Action);
6332   };
6333   emitCommonOMPTargetDirective(*this, S, CodeGen);
6334 }
6335 
6336 static void
6337 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
6338                                 const OMPTargetParallelForSimdDirective &S,
6339                                 PrePostActionTy &Action) {
6340   Action.Enter(CGF);
6341   // Emit directive as a combined directive that consists of two implicit
6342   // directives: 'parallel' with 'for' directive.
6343   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6344     Action.Enter(CGF);
6345     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6346                                emitDispatchForLoopBounds);
6347   };
6348   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
6349                                  emitEmptyBoundParameters);
6350 }
6351 
6352 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
6353     CodeGenModule &CGM, StringRef ParentName,
6354     const OMPTargetParallelForSimdDirective &S) {
6355   // Emit SPMD target parallel for region as a standalone region.
6356   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6357     emitTargetParallelForSimdRegion(CGF, S, Action);
6358   };
6359   llvm::Function *Fn;
6360   llvm::Constant *Addr;
6361   // Emit target region as a standalone region.
6362   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6363       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6364   assert(Fn && Addr && "Target device function emission failed.");
6365 }
6366 
6367 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
6368     const OMPTargetParallelForSimdDirective &S) {
6369   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6370     emitTargetParallelForSimdRegion(CGF, S, Action);
6371   };
6372   emitCommonOMPTargetDirective(*this, S, CodeGen);
6373 }
6374 
6375 /// Emit a helper variable and return corresponding lvalue.
6376 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
6377                      const ImplicitParamDecl *PVD,
6378                      CodeGenFunction::OMPPrivateScope &Privates) {
6379   const auto *VDecl = cast<VarDecl>(Helper->getDecl());
6380   Privates.addPrivate(VDecl,
6381                       [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
6382 }
6383 
6384 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
6385   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
6386   // Emit outlined function for task construct.
6387   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
6388   Address CapturedStruct = Address::invalid();
6389   {
6390     OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6391     CapturedStruct = GenerateCapturedStmtArgument(*CS);
6392   }
6393   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
6394   const Expr *IfCond = nullptr;
6395   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6396     if (C->getNameModifier() == OMPD_unknown ||
6397         C->getNameModifier() == OMPD_taskloop) {
6398       IfCond = C->getCondition();
6399       break;
6400     }
6401   }
6402 
6403   OMPTaskDataTy Data;
6404   // Check if taskloop must be emitted without taskgroup.
6405   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
6406   // TODO: Check if we should emit tied or untied task.
6407   Data.Tied = true;
6408   // Set scheduling for taskloop
6409   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
6410     // grainsize clause
6411     Data.Schedule.setInt(/*IntVal=*/false);
6412     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
6413   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
6414     // num_tasks clause
6415     Data.Schedule.setInt(/*IntVal=*/true);
6416     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
6417   }
6418 
6419   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
6420     // if (PreCond) {
6421     //   for (IV in 0..LastIteration) BODY;
6422     //   <Final counter/linear vars updates>;
6423     // }
6424     //
6425 
6426     // Emit: if (PreCond) - begin.
6427     // If the condition constant folds and can be elided, avoid emitting the
6428     // whole loop.
6429     bool CondConstant;
6430     llvm::BasicBlock *ContBlock = nullptr;
6431     OMPLoopScope PreInitScope(CGF, S);
6432     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
6433       if (!CondConstant)
6434         return;
6435     } else {
6436       llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
6437       ContBlock = CGF.createBasicBlock("taskloop.if.end");
6438       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
6439                   CGF.getProfileCount(&S));
6440       CGF.EmitBlock(ThenBlock);
6441       CGF.incrementProfileCounter(&S);
6442     }
6443 
6444     (void)CGF.EmitOMPLinearClauseInit(S);
6445 
6446     OMPPrivateScope LoopScope(CGF);
6447     // Emit helper vars inits.
6448     enum { LowerBound = 5, UpperBound, Stride, LastIter };
6449     auto *I = CS->getCapturedDecl()->param_begin();
6450     auto *LBP = std::next(I, LowerBound);
6451     auto *UBP = std::next(I, UpperBound);
6452     auto *STP = std::next(I, Stride);
6453     auto *LIP = std::next(I, LastIter);
6454     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
6455              LoopScope);
6456     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
6457              LoopScope);
6458     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
6459     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
6460              LoopScope);
6461     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
6462     CGF.EmitOMPLinearClause(S, LoopScope);
6463     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
6464     (void)LoopScope.Privatize();
6465     // Emit the loop iteration variable.
6466     const Expr *IVExpr = S.getIterationVariable();
6467     const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
6468     CGF.EmitVarDecl(*IVDecl);
6469     CGF.EmitIgnoredExpr(S.getInit());
6470 
6471     // Emit the iterations count variable.
6472     // If it is not a variable, Sema decided to calculate iterations count on
6473     // each iteration (e.g., it is foldable into a constant).
6474     if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
6475       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
6476       // Emit calculation of the iterations count.
6477       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
6478     }
6479 
6480     {
6481       OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6482       emitCommonSimdLoop(
6483           CGF, S,
6484           [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6485             if (isOpenMPSimdDirective(S.getDirectiveKind()))
6486               CGF.EmitOMPSimdInit(S);
6487           },
6488           [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
6489             CGF.EmitOMPInnerLoop(
6490                 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
6491                 [&S](CodeGenFunction &CGF) {
6492                   emitOMPLoopBodyWithStopPoint(CGF, S,
6493                                                CodeGenFunction::JumpDest());
6494                 },
6495                 [](CodeGenFunction &) {});
6496           });
6497     }
6498     // Emit: if (PreCond) - end.
6499     if (ContBlock) {
6500       CGF.EmitBranch(ContBlock);
6501       CGF.EmitBlock(ContBlock, true);
6502     }
6503     // Emit final copy of the lastprivate variables if IsLastIter != 0.
6504     if (HasLastprivateClause) {
6505       CGF.EmitOMPLastprivateClauseFinal(
6506           S, isOpenMPSimdDirective(S.getDirectiveKind()),
6507           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
6508               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6509               (*LIP)->getType(), S.getBeginLoc())));
6510     }
6511     CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
6512       return CGF.Builder.CreateIsNotNull(
6513           CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6514                                (*LIP)->getType(), S.getBeginLoc()));
6515     });
6516   };
6517   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
6518                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
6519                             const OMPTaskDataTy &Data) {
6520     auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
6521                       &Data](CodeGenFunction &CGF, PrePostActionTy &) {
6522       OMPLoopScope PreInitScope(CGF, S);
6523       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
6524                                                   OutlinedFn, SharedsTy,
6525                                                   CapturedStruct, IfCond, Data);
6526     };
6527     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
6528                                                     CodeGen);
6529   };
6530   if (Data.Nogroup) {
6531     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
6532   } else {
6533     CGM.getOpenMPRuntime().emitTaskgroupRegion(
6534         *this,
6535         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
6536                                         PrePostActionTy &Action) {
6537           Action.Enter(CGF);
6538           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
6539                                         Data);
6540         },
6541         S.getBeginLoc());
6542   }
6543 }
6544 
6545 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
6546   auto LPCRegion =
6547       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6548   EmitOMPTaskLoopBasedDirective(S);
6549 }
6550 
6551 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
6552     const OMPTaskLoopSimdDirective &S) {
6553   auto LPCRegion =
6554       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6555   OMPLexicalScope Scope(*this, S);
6556   EmitOMPTaskLoopBasedDirective(S);
6557 }
6558 
6559 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
6560     const OMPMasterTaskLoopDirective &S) {
6561   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6562     Action.Enter(CGF);
6563     EmitOMPTaskLoopBasedDirective(S);
6564   };
6565   auto LPCRegion =
6566       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6567   OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
6568   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
6569 }
6570 
6571 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
6572     const OMPMasterTaskLoopSimdDirective &S) {
6573   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6574     Action.Enter(CGF);
6575     EmitOMPTaskLoopBasedDirective(S);
6576   };
6577   auto LPCRegion =
6578       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6579   OMPLexicalScope Scope(*this, S);
6580   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
6581 }
6582 
6583 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
6584     const OMPParallelMasterTaskLoopDirective &S) {
6585   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6586     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
6587                                   PrePostActionTy &Action) {
6588       Action.Enter(CGF);
6589       CGF.EmitOMPTaskLoopBasedDirective(S);
6590     };
6591     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
6592     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
6593                                             S.getBeginLoc());
6594   };
6595   auto LPCRegion =
6596       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6597   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
6598                                  emitEmptyBoundParameters);
6599 }
6600 
6601 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
6602     const OMPParallelMasterTaskLoopSimdDirective &S) {
6603   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6604     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
6605                                   PrePostActionTy &Action) {
6606       Action.Enter(CGF);
6607       CGF.EmitOMPTaskLoopBasedDirective(S);
6608     };
6609     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
6610     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
6611                                             S.getBeginLoc());
6612   };
6613   auto LPCRegion =
6614       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6615   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
6616                                  emitEmptyBoundParameters);
6617 }
6618 
6619 // Generate the instructions for '#pragma omp target update' directive.
6620 void CodeGenFunction::EmitOMPTargetUpdateDirective(
6621     const OMPTargetUpdateDirective &S) {
6622   // If we don't have target devices, don't bother emitting the data mapping
6623   // code.
6624   if (CGM.getLangOpts().OMPTargetTriples.empty())
6625     return;
6626 
6627   // Check if we have any if clause associated with the directive.
6628   const Expr *IfCond = nullptr;
6629   if (const auto *C = S.getSingleClause<OMPIfClause>())
6630     IfCond = C->getCondition();
6631 
6632   // Check if we have any device clause associated with the directive.
6633   const Expr *Device = nullptr;
6634   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6635     Device = C->getDevice();
6636 
6637   OMPLexicalScope Scope(*this, S, OMPD_task);
6638   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6639 }
6640 
6641 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
6642     const OMPExecutableDirective &D) {
6643   if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
6644     EmitOMPScanDirective(*SD);
6645     return;
6646   }
6647   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
6648     return;
6649   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
6650     OMPPrivateScope GlobalsScope(CGF);
6651     if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
6652       // Capture global firstprivates to avoid crash.
6653       for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
6654         for (const Expr *Ref : C->varlists()) {
6655           const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
6656           if (!DRE)
6657             continue;
6658           const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
6659           if (!VD || VD->hasLocalStorage())
6660             continue;
6661           if (!CGF.LocalDeclMap.count(VD)) {
6662             LValue GlobLVal = CGF.EmitLValue(Ref);
6663             GlobalsScope.addPrivate(
6664                 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
6665           }
6666         }
6667       }
6668     }
6669     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
6670       (void)GlobalsScope.Privatize();
6671       ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
6672       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
6673     } else {
6674       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
6675         for (const Expr *E : LD->counters()) {
6676           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
6677           if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
6678             LValue GlobLVal = CGF.EmitLValue(E);
6679             GlobalsScope.addPrivate(
6680                 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
6681           }
6682           if (isa<OMPCapturedExprDecl>(VD)) {
6683             // Emit only those that were not explicitly referenced in clauses.
6684             if (!CGF.LocalDeclMap.count(VD))
6685               CGF.EmitVarDecl(*VD);
6686           }
6687         }
6688         for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
6689           if (!C->getNumForLoops())
6690             continue;
6691           for (unsigned I = LD->getCollapsedNumber(),
6692                         E = C->getLoopNumIterations().size();
6693                I < E; ++I) {
6694             if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
6695                     cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
6696               // Emit only those that were not explicitly referenced in clauses.
6697               if (!CGF.LocalDeclMap.count(VD))
6698                 CGF.EmitVarDecl(*VD);
6699             }
6700           }
6701         }
6702       }
6703       (void)GlobalsScope.Privatize();
6704       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
6705     }
6706   };
6707   if (D.getDirectiveKind() == OMPD_atomic ||
6708       D.getDirectiveKind() == OMPD_critical ||
6709       D.getDirectiveKind() == OMPD_section ||
6710       D.getDirectiveKind() == OMPD_master) {
6711     EmitStmt(D.getAssociatedStmt());
6712   } else {
6713     auto LPCRegion =
6714         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
6715     OMPSimdLexicalScope Scope(*this, D);
6716     CGM.getOpenMPRuntime().emitInlinedDirective(
6717         *this,
6718         isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
6719                                                     : D.getDirectiveKind(),
6720         CodeGen);
6721   }
6722   // Check for outer lastprivate conditional update.
6723   checkForLastprivateConditionalUpdate(*this, D);
6724 }
6725