1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/BinaryFormat/Dwarf.h"
29 #include "llvm/Frontend/OpenMP/OMPConstants.h"
30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DebugInfoMetadata.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Metadata.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 using namespace clang;
38 using namespace CodeGen;
39 using namespace llvm::omp;
40 
41 static const VarDecl *getBaseDecl(const Expr *Ref);
42 
43 namespace {
44 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
45 /// for captured expressions.
46 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
47   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
48     for (const auto *C : S.clauses()) {
49       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
50         if (const auto *PreInit =
51                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
52           for (const auto *I : PreInit->decls()) {
53             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
54               CGF.EmitVarDecl(cast<VarDecl>(*I));
55             } else {
56               CodeGenFunction::AutoVarEmission Emission =
57                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
58               CGF.EmitAutoVarCleanups(Emission);
59             }
60           }
61         }
62       }
63     }
64   }
65   CodeGenFunction::OMPPrivateScope InlinedShareds;
66 
67   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
68     return CGF.LambdaCaptureFields.lookup(VD) ||
69            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
70            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
71             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
72   }
73 
74 public:
75   OMPLexicalScope(
76       CodeGenFunction &CGF, const OMPExecutableDirective &S,
77       const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
78       const bool EmitPreInitStmt = true)
79       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
80         InlinedShareds(CGF) {
81     if (EmitPreInitStmt)
82       emitPreInitStmt(CGF, S);
83     if (!CapturedRegion.hasValue())
84       return;
85     assert(S.hasAssociatedStmt() &&
86            "Expected associated statement for inlined directive.");
87     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
88     for (const auto &C : CS->captures()) {
89       if (C.capturesVariable() || C.capturesVariableByCopy()) {
90         auto *VD = C.getCapturedVar();
91         assert(VD == VD->getCanonicalDecl() &&
92                "Canonical decl must be captured.");
93         DeclRefExpr DRE(
94             CGF.getContext(), const_cast<VarDecl *>(VD),
95             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
96                                        InlinedShareds.isGlobalVarCaptured(VD)),
97             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
98         InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
99           return CGF.EmitLValue(&DRE).getAddress(CGF);
100         });
101       }
102     }
103     (void)InlinedShareds.Privatize();
104   }
105 };
106 
107 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
108 /// for captured expressions.
109 class OMPParallelScope final : public OMPLexicalScope {
110   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
111     OpenMPDirectiveKind Kind = S.getDirectiveKind();
112     return !(isOpenMPTargetExecutionDirective(Kind) ||
113              isOpenMPLoopBoundSharingDirective(Kind)) &&
114            isOpenMPParallelDirective(Kind);
115   }
116 
117 public:
118   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
119       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
120                         EmitPreInitStmt(S)) {}
121 };
122 
123 /// Lexical scope for OpenMP teams construct, that handles correct codegen
124 /// for captured expressions.
125 class OMPTeamsScope final : public OMPLexicalScope {
126   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
127     OpenMPDirectiveKind Kind = S.getDirectiveKind();
128     return !isOpenMPTargetExecutionDirective(Kind) &&
129            isOpenMPTeamsDirective(Kind);
130   }
131 
132 public:
133   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
134       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
135                         EmitPreInitStmt(S)) {}
136 };
137 
138 /// Private scope for OpenMP loop-based directives, that supports capturing
139 /// of used expression from loop statement.
140 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
141   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
142     const DeclStmt *PreInits;
143     CodeGenFunction::OMPMapVars PreCondVars;
144     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
145       llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
146       for (const auto *E : LD->counters()) {
147         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
148         EmittedAsPrivate.insert(VD->getCanonicalDecl());
149         (void)PreCondVars.setVarAddr(
150             CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
151       }
152       // Mark private vars as undefs.
153       for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
154         for (const Expr *IRef : C->varlists()) {
155           const auto *OrigVD =
156               cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
157           if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
158             (void)PreCondVars.setVarAddr(
159                 CGF, OrigVD,
160                 Address::deprecated(
161                     llvm::UndefValue::get(
162                         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
163                             OrigVD->getType().getNonReferenceType()))),
164                     CGF.getContext().getDeclAlign(OrigVD)));
165           }
166         }
167       }
168       (void)PreCondVars.apply(CGF);
169       // Emit init, __range and __end variables for C++ range loops.
170       (void)OMPLoopBasedDirective::doForAllLoops(
171           LD->getInnermostCapturedStmt()->getCapturedStmt(),
172           /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
173           [&CGF](unsigned Cnt, const Stmt *CurStmt) {
174             if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
175               if (const Stmt *Init = CXXFor->getInit())
176                 CGF.EmitStmt(Init);
177               CGF.EmitStmt(CXXFor->getRangeStmt());
178               CGF.EmitStmt(CXXFor->getEndStmt());
179             }
180             return false;
181           });
182       PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
183     } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
184       PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
185     } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
186       PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
187     } else {
188       llvm_unreachable("Unknown loop-based directive kind.");
189     }
190     if (PreInits) {
191       for (const auto *I : PreInits->decls())
192         CGF.EmitVarDecl(cast<VarDecl>(*I));
193     }
194     PreCondVars.restore(CGF);
195   }
196 
197 public:
198   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
199       : CodeGenFunction::RunCleanupsScope(CGF) {
200     emitPreInitStmt(CGF, S);
201   }
202 };
203 
204 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
205   CodeGenFunction::OMPPrivateScope InlinedShareds;
206 
207   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
208     return CGF.LambdaCaptureFields.lookup(VD) ||
209            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
210            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
211             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
212   }
213 
214 public:
215   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
216       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
217         InlinedShareds(CGF) {
218     for (const auto *C : S.clauses()) {
219       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
220         if (const auto *PreInit =
221                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
222           for (const auto *I : PreInit->decls()) {
223             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
224               CGF.EmitVarDecl(cast<VarDecl>(*I));
225             } else {
226               CodeGenFunction::AutoVarEmission Emission =
227                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
228               CGF.EmitAutoVarCleanups(Emission);
229             }
230           }
231         }
232       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
233         for (const Expr *E : UDP->varlists()) {
234           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
235           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
236             CGF.EmitVarDecl(*OED);
237         }
238       } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
239         for (const Expr *E : UDP->varlists()) {
240           const Decl *D = getBaseDecl(E);
241           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
242             CGF.EmitVarDecl(*OED);
243         }
244       }
245     }
246     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
247       CGF.EmitOMPPrivateClause(S, InlinedShareds);
248     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
249       if (const Expr *E = TG->getReductionRef())
250         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
251     }
252     // Temp copy arrays for inscan reductions should not be emitted as they are
253     // not used in simd only mode.
254     llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
255     for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
256       if (C->getModifier() != OMPC_REDUCTION_inscan)
257         continue;
258       for (const Expr *E : C->copy_array_temps())
259         CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
260     }
261     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
262     while (CS) {
263       for (auto &C : CS->captures()) {
264         if (C.capturesVariable() || C.capturesVariableByCopy()) {
265           auto *VD = C.getCapturedVar();
266           if (CopyArrayTemps.contains(VD))
267             continue;
268           assert(VD == VD->getCanonicalDecl() &&
269                  "Canonical decl must be captured.");
270           DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
271                           isCapturedVar(CGF, VD) ||
272                               (CGF.CapturedStmtInfo &&
273                                InlinedShareds.isGlobalVarCaptured(VD)),
274                           VD->getType().getNonReferenceType(), VK_LValue,
275                           C.getLocation());
276           InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
277             return CGF.EmitLValue(&DRE).getAddress(CGF);
278           });
279         }
280       }
281       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
282     }
283     (void)InlinedShareds.Privatize();
284   }
285 };
286 
287 } // namespace
288 
289 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
290                                          const OMPExecutableDirective &S,
291                                          const RegionCodeGenTy &CodeGen);
292 
293 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
294   if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
295     if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
296       OrigVD = OrigVD->getCanonicalDecl();
297       bool IsCaptured =
298           LambdaCaptureFields.lookup(OrigVD) ||
299           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
300           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
301       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
302                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
303       return EmitLValue(&DRE);
304     }
305   }
306   return EmitLValue(E);
307 }
308 
309 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
310   ASTContext &C = getContext();
311   llvm::Value *Size = nullptr;
312   auto SizeInChars = C.getTypeSizeInChars(Ty);
313   if (SizeInChars.isZero()) {
314     // getTypeSizeInChars() returns 0 for a VLA.
315     while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
316       VlaSizePair VlaSize = getVLASize(VAT);
317       Ty = VlaSize.Type;
318       Size =
319           Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
320     }
321     SizeInChars = C.getTypeSizeInChars(Ty);
322     if (SizeInChars.isZero())
323       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
324     return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
325   }
326   return CGM.getSize(SizeInChars);
327 }
328 
329 void CodeGenFunction::GenerateOpenMPCapturedVars(
330     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
331   const RecordDecl *RD = S.getCapturedRecordDecl();
332   auto CurField = RD->field_begin();
333   auto CurCap = S.captures().begin();
334   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
335                                                  E = S.capture_init_end();
336        I != E; ++I, ++CurField, ++CurCap) {
337     if (CurField->hasCapturedVLAType()) {
338       const VariableArrayType *VAT = CurField->getCapturedVLAType();
339       llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
340       CapturedVars.push_back(Val);
341     } else if (CurCap->capturesThis()) {
342       CapturedVars.push_back(CXXThisValue);
343     } else if (CurCap->capturesVariableByCopy()) {
344       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
345 
346       // If the field is not a pointer, we need to save the actual value
347       // and load it as a void pointer.
348       if (!CurField->getType()->isAnyPointerType()) {
349         ASTContext &Ctx = getContext();
350         Address DstAddr = CreateMemTemp(
351             Ctx.getUIntPtrType(),
352             Twine(CurCap->getCapturedVar()->getName(), ".casted"));
353         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
354 
355         llvm::Value *SrcAddrVal = EmitScalarConversion(
356             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
357             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
358         LValue SrcLV =
359             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
360 
361         // Store the value using the source type pointer.
362         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
363 
364         // Load the value using the destination type pointer.
365         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
366       }
367       CapturedVars.push_back(CV);
368     } else {
369       assert(CurCap->capturesVariable() && "Expected capture by reference.");
370       CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
371     }
372   }
373 }
374 
375 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
376                                     QualType DstType, StringRef Name,
377                                     LValue AddrLV) {
378   ASTContext &Ctx = CGF.getContext();
379 
380   llvm::Value *CastedPtr = CGF.EmitScalarConversion(
381       AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
382       Ctx.getPointerType(DstType), Loc);
383   Address TmpAddr =
384       CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF);
385   return TmpAddr;
386 }
387 
388 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
389   if (T->isLValueReferenceType())
390     return C.getLValueReferenceType(
391         getCanonicalParamType(C, T.getNonReferenceType()),
392         /*SpelledAsLValue=*/false);
393   if (T->isPointerType())
394     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
395   if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
396     if (const auto *VLA = dyn_cast<VariableArrayType>(A))
397       return getCanonicalParamType(C, VLA->getElementType());
398     if (!A->isVariablyModifiedType())
399       return C.getCanonicalType(T);
400   }
401   return C.getCanonicalParamType(T);
402 }
403 
404 namespace {
405 /// Contains required data for proper outlined function codegen.
406 struct FunctionOptions {
407   /// Captured statement for which the function is generated.
408   const CapturedStmt *S = nullptr;
409   /// true if cast to/from  UIntPtr is required for variables captured by
410   /// value.
411   const bool UIntPtrCastRequired = true;
412   /// true if only casted arguments must be registered as local args or VLA
413   /// sizes.
414   const bool RegisterCastedArgsOnly = false;
415   /// Name of the generated function.
416   const StringRef FunctionName;
417   /// Location of the non-debug version of the outlined function.
418   SourceLocation Loc;
419   explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
420                            bool RegisterCastedArgsOnly, StringRef FunctionName,
421                            SourceLocation Loc)
422       : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
423         RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
424         FunctionName(FunctionName), Loc(Loc) {}
425 };
426 } // namespace
427 
428 static llvm::Function *emitOutlinedFunctionPrologue(
429     CodeGenFunction &CGF, FunctionArgList &Args,
430     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
431         &LocalAddrs,
432     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
433         &VLASizes,
434     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
435   const CapturedDecl *CD = FO.S->getCapturedDecl();
436   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
437   assert(CD->hasBody() && "missing CapturedDecl body");
438 
439   CXXThisValue = nullptr;
440   // Build the argument list.
441   CodeGenModule &CGM = CGF.CGM;
442   ASTContext &Ctx = CGM.getContext();
443   FunctionArgList TargetArgs;
444   Args.append(CD->param_begin(),
445               std::next(CD->param_begin(), CD->getContextParamPosition()));
446   TargetArgs.append(
447       CD->param_begin(),
448       std::next(CD->param_begin(), CD->getContextParamPosition()));
449   auto I = FO.S->captures().begin();
450   FunctionDecl *DebugFunctionDecl = nullptr;
451   if (!FO.UIntPtrCastRequired) {
452     FunctionProtoType::ExtProtoInfo EPI;
453     QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
454     DebugFunctionDecl = FunctionDecl::Create(
455         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
456         SourceLocation(), DeclarationName(), FunctionTy,
457         Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
458         /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
459         /*hasWrittenPrototype=*/false);
460   }
461   for (const FieldDecl *FD : RD->fields()) {
462     QualType ArgType = FD->getType();
463     IdentifierInfo *II = nullptr;
464     VarDecl *CapVar = nullptr;
465 
466     // If this is a capture by copy and the type is not a pointer, the outlined
467     // function argument type should be uintptr and the value properly casted to
468     // uintptr. This is necessary given that the runtime library is only able to
469     // deal with pointers. We can pass in the same way the VLA type sizes to the
470     // outlined function.
471     if (FO.UIntPtrCastRequired &&
472         ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
473          I->capturesVariableArrayType()))
474       ArgType = Ctx.getUIntPtrType();
475 
476     if (I->capturesVariable() || I->capturesVariableByCopy()) {
477       CapVar = I->getCapturedVar();
478       II = CapVar->getIdentifier();
479     } else if (I->capturesThis()) {
480       II = &Ctx.Idents.get("this");
481     } else {
482       assert(I->capturesVariableArrayType());
483       II = &Ctx.Idents.get("vla");
484     }
485     if (ArgType->isVariablyModifiedType())
486       ArgType = getCanonicalParamType(Ctx, ArgType);
487     VarDecl *Arg;
488     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
489       Arg = ParmVarDecl::Create(
490           Ctx, DebugFunctionDecl,
491           CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
492           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
493           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
494     } else {
495       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
496                                       II, ArgType, ImplicitParamDecl::Other);
497     }
498     Args.emplace_back(Arg);
499     // Do not cast arguments if we emit function with non-original types.
500     TargetArgs.emplace_back(
501         FO.UIntPtrCastRequired
502             ? Arg
503             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
504     ++I;
505   }
506   Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
507               CD->param_end());
508   TargetArgs.append(
509       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
510       CD->param_end());
511 
512   // Create the function declaration.
513   const CGFunctionInfo &FuncInfo =
514       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
515   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
516 
517   auto *F =
518       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
519                              FO.FunctionName, &CGM.getModule());
520   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
521   if (CD->isNothrow())
522     F->setDoesNotThrow();
523   F->setDoesNotRecurse();
524 
525   // Always inline the outlined function if optimizations are enabled.
526   if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
527     F->removeFnAttr(llvm::Attribute::NoInline);
528     F->addFnAttr(llvm::Attribute::AlwaysInline);
529   }
530 
531   // Generate the function.
532   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
533                     FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
534                     FO.UIntPtrCastRequired ? FO.Loc
535                                            : CD->getBody()->getBeginLoc());
536   unsigned Cnt = CD->getContextParamPosition();
537   I = FO.S->captures().begin();
538   for (const FieldDecl *FD : RD->fields()) {
539     // Do not map arguments if we emit function with non-original types.
540     Address LocalAddr(Address::invalid());
541     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
542       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
543                                                              TargetArgs[Cnt]);
544     } else {
545       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
546     }
547     // If we are capturing a pointer by copy we don't need to do anything, just
548     // use the value that we get from the arguments.
549     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
550       const VarDecl *CurVD = I->getCapturedVar();
551       if (!FO.RegisterCastedArgsOnly)
552         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
553       ++Cnt;
554       ++I;
555       continue;
556     }
557 
558     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
559                                         AlignmentSource::Decl);
560     if (FD->hasCapturedVLAType()) {
561       if (FO.UIntPtrCastRequired) {
562         ArgLVal = CGF.MakeAddrLValue(
563             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
564                                  Args[Cnt]->getName(), ArgLVal),
565             FD->getType(), AlignmentSource::Decl);
566       }
567       llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
568       const VariableArrayType *VAT = FD->getCapturedVLAType();
569       VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
570     } else if (I->capturesVariable()) {
571       const VarDecl *Var = I->getCapturedVar();
572       QualType VarTy = Var->getType();
573       Address ArgAddr = ArgLVal.getAddress(CGF);
574       if (ArgLVal.getType()->isLValueReferenceType()) {
575         ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
576       } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
577         assert(ArgLVal.getType()->isPointerType());
578         ArgAddr = CGF.EmitLoadOfPointer(
579             ArgAddr, ArgLVal.getType()->castAs<PointerType>());
580       }
581       if (!FO.RegisterCastedArgsOnly) {
582         LocalAddrs.insert(
583             {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}});
584       }
585     } else if (I->capturesVariableByCopy()) {
586       assert(!FD->getType()->isAnyPointerType() &&
587              "Not expecting a captured pointer.");
588       const VarDecl *Var = I->getCapturedVar();
589       LocalAddrs.insert({Args[Cnt],
590                          {Var, FO.UIntPtrCastRequired
591                                    ? castValueFromUintptr(
592                                          CGF, I->getLocation(), FD->getType(),
593                                          Args[Cnt]->getName(), ArgLVal)
594                                    : ArgLVal.getAddress(CGF)}});
595     } else {
596       // If 'this' is captured, load it into CXXThisValue.
597       assert(I->capturesThis());
598       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
599       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
600     }
601     ++Cnt;
602     ++I;
603   }
604 
605   return F;
606 }
607 
608 llvm::Function *
609 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
610                                                     SourceLocation Loc) {
611   assert(
612       CapturedStmtInfo &&
613       "CapturedStmtInfo should be set when generating the captured function");
614   const CapturedDecl *CD = S.getCapturedDecl();
615   // Build the argument list.
616   bool NeedWrapperFunction =
617       getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
618   FunctionArgList Args;
619   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
620   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
621   SmallString<256> Buffer;
622   llvm::raw_svector_ostream Out(Buffer);
623   Out << CapturedStmtInfo->getHelperName();
624   if (NeedWrapperFunction)
625     Out << "_debug__";
626   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
627                      Out.str(), Loc);
628   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
629                                                    VLASizes, CXXThisValue, FO);
630   CodeGenFunction::OMPPrivateScope LocalScope(*this);
631   for (const auto &LocalAddrPair : LocalAddrs) {
632     if (LocalAddrPair.second.first) {
633       LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
634         return LocalAddrPair.second.second;
635       });
636     }
637   }
638   (void)LocalScope.Privatize();
639   for (const auto &VLASizePair : VLASizes)
640     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
641   PGO.assignRegionCounters(GlobalDecl(CD), F);
642   CapturedStmtInfo->EmitBody(*this, CD->getBody());
643   (void)LocalScope.ForceCleanup();
644   FinishFunction(CD->getBodyRBrace());
645   if (!NeedWrapperFunction)
646     return F;
647 
648   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
649                             /*RegisterCastedArgsOnly=*/true,
650                             CapturedStmtInfo->getHelperName(), Loc);
651   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
652   WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
653   Args.clear();
654   LocalAddrs.clear();
655   VLASizes.clear();
656   llvm::Function *WrapperF =
657       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
658                                    WrapperCGF.CXXThisValue, WrapperFO);
659   llvm::SmallVector<llvm::Value *, 4> CallArgs;
660   auto *PI = F->arg_begin();
661   for (const auto *Arg : Args) {
662     llvm::Value *CallArg;
663     auto I = LocalAddrs.find(Arg);
664     if (I != LocalAddrs.end()) {
665       LValue LV = WrapperCGF.MakeAddrLValue(
666           I->second.second,
667           I->second.first ? I->second.first->getType() : Arg->getType(),
668           AlignmentSource::Decl);
669       if (LV.getType()->isAnyComplexType())
670         LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
671             LV.getAddress(WrapperCGF),
672             PI->getType()->getPointerTo(
673                 LV.getAddress(WrapperCGF).getAddressSpace()),
674             PI->getType()));
675       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
676     } else {
677       auto EI = VLASizes.find(Arg);
678       if (EI != VLASizes.end()) {
679         CallArg = EI->second.second;
680       } else {
681         LValue LV =
682             WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
683                                       Arg->getType(), AlignmentSource::Decl);
684         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
685       }
686     }
687     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
688     ++PI;
689   }
690   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
691   WrapperCGF.FinishFunction();
692   return WrapperF;
693 }
694 
695 //===----------------------------------------------------------------------===//
696 //                              OpenMP Directive Emission
697 //===----------------------------------------------------------------------===//
698 void CodeGenFunction::EmitOMPAggregateAssign(
699     Address DestAddr, Address SrcAddr, QualType OriginalType,
700     const llvm::function_ref<void(Address, Address)> CopyGen) {
701   // Perform element-by-element initialization.
702   QualType ElementTy;
703 
704   // Drill down to the base element type on both arrays.
705   const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
706   llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
707   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
708 
709   llvm::Value *SrcBegin = SrcAddr.getPointer();
710   llvm::Value *DestBegin = DestAddr.getPointer();
711   // Cast from pointer to array type to pointer to single element.
712   llvm::Value *DestEnd =
713       Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
714   // The basic structure here is a while-do loop.
715   llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
716   llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
717   llvm::Value *IsEmpty =
718       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
719   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
720 
721   // Enter the loop body, making that address the current address.
722   llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
723   EmitBlock(BodyBB);
724 
725   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
726 
727   llvm::PHINode *SrcElementPHI =
728       Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
729   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
730   Address SrcElementCurrent =
731       Address(SrcElementPHI, SrcAddr.getElementType(),
732               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
733 
734   llvm::PHINode *DestElementPHI = Builder.CreatePHI(
735       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
736   DestElementPHI->addIncoming(DestBegin, EntryBB);
737   Address DestElementCurrent =
738       Address(DestElementPHI, DestAddr.getElementType(),
739               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
740 
741   // Emit copy.
742   CopyGen(DestElementCurrent, SrcElementCurrent);
743 
744   // Shift the address forward by one element.
745   llvm::Value *DestElementNext =
746       Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
747                                  /*Idx0=*/1, "omp.arraycpy.dest.element");
748   llvm::Value *SrcElementNext =
749       Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
750                                  /*Idx0=*/1, "omp.arraycpy.src.element");
751   // Check whether we've reached the end.
752   llvm::Value *Done =
753       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
754   Builder.CreateCondBr(Done, DoneBB, BodyBB);
755   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
756   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
757 
758   // Done.
759   EmitBlock(DoneBB, /*IsFinished=*/true);
760 }
761 
762 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
763                                   Address SrcAddr, const VarDecl *DestVD,
764                                   const VarDecl *SrcVD, const Expr *Copy) {
765   if (OriginalType->isArrayType()) {
766     const auto *BO = dyn_cast<BinaryOperator>(Copy);
767     if (BO && BO->getOpcode() == BO_Assign) {
768       // Perform simple memcpy for simple copying.
769       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
770       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
771       EmitAggregateAssign(Dest, Src, OriginalType);
772     } else {
773       // For arrays with complex element types perform element by element
774       // copying.
775       EmitOMPAggregateAssign(
776           DestAddr, SrcAddr, OriginalType,
777           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
778             // Working with the single array element, so have to remap
779             // destination and source variables to corresponding array
780             // elements.
781             CodeGenFunction::OMPPrivateScope Remap(*this);
782             Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
783             Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
784             (void)Remap.Privatize();
785             EmitIgnoredExpr(Copy);
786           });
787     }
788   } else {
789     // Remap pseudo source variable to private copy.
790     CodeGenFunction::OMPPrivateScope Remap(*this);
791     Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
792     Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
793     (void)Remap.Privatize();
794     // Emit copying of the whole variable.
795     EmitIgnoredExpr(Copy);
796   }
797 }
798 
799 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
800                                                 OMPPrivateScope &PrivateScope) {
801   if (!HaveInsertPoint())
802     return false;
803   bool DeviceConstTarget =
804       getLangOpts().OpenMPIsDevice &&
805       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
806   bool FirstprivateIsLastprivate = false;
807   llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
808   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
809     for (const auto *D : C->varlists())
810       Lastprivates.try_emplace(
811           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
812           C->getKind());
813   }
814   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
815   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
816   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
817   // Force emission of the firstprivate copy if the directive does not emit
818   // outlined function, like omp for, omp simd, omp distribute etc.
819   bool MustEmitFirstprivateCopy =
820       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
821   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
822     const auto *IRef = C->varlist_begin();
823     const auto *InitsRef = C->inits().begin();
824     for (const Expr *IInit : C->private_copies()) {
825       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
826       bool ThisFirstprivateIsLastprivate =
827           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
828       const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
829       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
830       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
831           !FD->getType()->isReferenceType() &&
832           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
833         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
834         ++IRef;
835         ++InitsRef;
836         continue;
837       }
838       // Do not emit copy for firstprivate constant variables in target regions,
839       // captured by reference.
840       if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
841           FD && FD->getType()->isReferenceType() &&
842           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
843         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
844         ++IRef;
845         ++InitsRef;
846         continue;
847       }
848       FirstprivateIsLastprivate =
849           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
850       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
851         const auto *VDInit =
852             cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
853         bool IsRegistered;
854         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
855                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
856                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
857         LValue OriginalLVal;
858         if (!FD) {
859           // Check if the firstprivate variable is just a constant value.
860           ConstantEmission CE = tryEmitAsConstant(&DRE);
861           if (CE && !CE.isReference()) {
862             // Constant value, no need to create a copy.
863             ++IRef;
864             ++InitsRef;
865             continue;
866           }
867           if (CE && CE.isReference()) {
868             OriginalLVal = CE.getReferenceLValue(*this, &DRE);
869           } else {
870             assert(!CE && "Expected non-constant firstprivate.");
871             OriginalLVal = EmitLValue(&DRE);
872           }
873         } else {
874           OriginalLVal = EmitLValue(&DRE);
875         }
876         QualType Type = VD->getType();
877         if (Type->isArrayType()) {
878           // Emit VarDecl with copy init for arrays.
879           // Get the address of the original variable captured in current
880           // captured region.
881           IsRegistered = PrivateScope.addPrivate(
882               OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
883                 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
884                 const Expr *Init = VD->getInit();
885                 if (!isa<CXXConstructExpr>(Init) ||
886                     isTrivialInitializer(Init)) {
887                   // Perform simple memcpy.
888                   LValue Dest =
889                       MakeAddrLValue(Emission.getAllocatedAddress(), Type);
890                   EmitAggregateAssign(Dest, OriginalLVal, Type);
891                 } else {
892                   EmitOMPAggregateAssign(
893                       Emission.getAllocatedAddress(),
894                       OriginalLVal.getAddress(*this), Type,
895                       [this, VDInit, Init](Address DestElement,
896                                            Address SrcElement) {
897                         // Clean up any temporaries needed by the
898                         // initialization.
899                         RunCleanupsScope InitScope(*this);
900                         // Emit initialization for single element.
901                         setAddrOfLocalVar(VDInit, SrcElement);
902                         EmitAnyExprToMem(Init, DestElement,
903                                          Init->getType().getQualifiers(),
904                                          /*IsInitializer*/ false);
905                         LocalDeclMap.erase(VDInit);
906                       });
907                 }
908                 EmitAutoVarCleanups(Emission);
909                 return Emission.getAllocatedAddress();
910               });
911         } else {
912           Address OriginalAddr = OriginalLVal.getAddress(*this);
913           IsRegistered =
914               PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
915                                                ThisFirstprivateIsLastprivate,
916                                                OrigVD, &Lastprivates, IRef]() {
917                 // Emit private VarDecl with copy init.
918                 // Remap temp VDInit variable to the address of the original
919                 // variable (for proper handling of captured global variables).
920                 setAddrOfLocalVar(VDInit, OriginalAddr);
921                 EmitDecl(*VD);
922                 LocalDeclMap.erase(VDInit);
923                 if (ThisFirstprivateIsLastprivate &&
924                     Lastprivates[OrigVD->getCanonicalDecl()] ==
925                         OMPC_LASTPRIVATE_conditional) {
926                   // Create/init special variable for lastprivate conditionals.
927                   Address VDAddr =
928                       CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
929                           *this, OrigVD);
930                   llvm::Value *V = EmitLoadOfScalar(
931                       MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
932                                      AlignmentSource::Decl),
933                       (*IRef)->getExprLoc());
934                   EmitStoreOfScalar(V,
935                                     MakeAddrLValue(VDAddr, (*IRef)->getType(),
936                                                    AlignmentSource::Decl));
937                   LocalDeclMap.erase(VD);
938                   setAddrOfLocalVar(VD, VDAddr);
939                   return VDAddr;
940                 }
941                 return GetAddrOfLocalVar(VD);
942               });
943         }
944         assert(IsRegistered &&
945                "firstprivate var already registered as private");
946         // Silence the warning about unused variable.
947         (void)IsRegistered;
948       }
949       ++IRef;
950       ++InitsRef;
951     }
952   }
953   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
954 }
955 
956 void CodeGenFunction::EmitOMPPrivateClause(
957     const OMPExecutableDirective &D,
958     CodeGenFunction::OMPPrivateScope &PrivateScope) {
959   if (!HaveInsertPoint())
960     return;
961   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
962   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
963     auto IRef = C->varlist_begin();
964     for (const Expr *IInit : C->private_copies()) {
965       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
966       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
967         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
968         bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
969           // Emit private VarDecl with copy init.
970           EmitDecl(*VD);
971           return GetAddrOfLocalVar(VD);
972         });
973         assert(IsRegistered && "private var already registered as private");
974         // Silence the warning about unused variable.
975         (void)IsRegistered;
976       }
977       ++IRef;
978     }
979   }
980 }
981 
982 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
983   if (!HaveInsertPoint())
984     return false;
985   // threadprivate_var1 = master_threadprivate_var1;
986   // operator=(threadprivate_var2, master_threadprivate_var2);
987   // ...
988   // __kmpc_barrier(&loc, global_tid);
989   llvm::DenseSet<const VarDecl *> CopiedVars;
990   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
991   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
992     auto IRef = C->varlist_begin();
993     auto ISrcRef = C->source_exprs().begin();
994     auto IDestRef = C->destination_exprs().begin();
995     for (const Expr *AssignOp : C->assignment_ops()) {
996       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
997       QualType Type = VD->getType();
998       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
999         // Get the address of the master variable. If we are emitting code with
1000         // TLS support, the address is passed from the master as field in the
1001         // captured declaration.
1002         Address MasterAddr = Address::invalid();
1003         if (getLangOpts().OpenMPUseTLS &&
1004             getContext().getTargetInfo().isTLSSupported()) {
1005           assert(CapturedStmtInfo->lookup(VD) &&
1006                  "Copyin threadprivates should have been captured!");
1007           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1008                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1009           MasterAddr = EmitLValue(&DRE).getAddress(*this);
1010           LocalDeclMap.erase(VD);
1011         } else {
1012           MasterAddr = Address::deprecated(
1013               VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
1014                                   : CGM.GetAddrOfGlobal(VD),
1015               getContext().getDeclAlign(VD));
1016         }
1017         // Get the address of the threadprivate variable.
1018         Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1019         if (CopiedVars.size() == 1) {
1020           // At first check if current thread is a master thread. If it is, no
1021           // need to copy data.
1022           CopyBegin = createBasicBlock("copyin.not.master");
1023           CopyEnd = createBasicBlock("copyin.not.master.end");
1024           // TODO: Avoid ptrtoint conversion.
1025           auto *MasterAddrInt =
1026               Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1027           auto *PrivateAddrInt =
1028               Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1029           Builder.CreateCondBr(
1030               Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1031               CopyEnd);
1032           EmitBlock(CopyBegin);
1033         }
1034         const auto *SrcVD =
1035             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1036         const auto *DestVD =
1037             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1038         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1039       }
1040       ++IRef;
1041       ++ISrcRef;
1042       ++IDestRef;
1043     }
1044   }
1045   if (CopyEnd) {
1046     // Exit out of copying procedure for non-master thread.
1047     EmitBlock(CopyEnd, /*IsFinished=*/true);
1048     return true;
1049   }
1050   return false;
1051 }
1052 
1053 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1054     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1055   if (!HaveInsertPoint())
1056     return false;
1057   bool HasAtLeastOneLastprivate = false;
1058   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1059   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1060     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1061     for (const Expr *C : LoopDirective->counters()) {
1062       SIMDLCVs.insert(
1063           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1064     }
1065   }
1066   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1067   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1068     HasAtLeastOneLastprivate = true;
1069     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1070         !getLangOpts().OpenMPSimd)
1071       break;
1072     const auto *IRef = C->varlist_begin();
1073     const auto *IDestRef = C->destination_exprs().begin();
1074     for (const Expr *IInit : C->private_copies()) {
1075       // Keep the address of the original variable for future update at the end
1076       // of the loop.
1077       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1078       // Taskloops do not require additional initialization, it is done in
1079       // runtime support library.
1080       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1081         const auto *DestVD =
1082             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1083         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1084           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1085                           /*RefersToEnclosingVariableOrCapture=*/
1086                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
1087                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1088           return EmitLValue(&DRE).getAddress(*this);
1089         });
1090         // Check if the variable is also a firstprivate: in this case IInit is
1091         // not generated. Initialization of this variable will happen in codegen
1092         // for 'firstprivate' clause.
1093         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1094           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1095           bool IsRegistered =
1096               PrivateScope.addPrivate(OrigVD, [this, VD, C, OrigVD]() {
1097                 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1098                   Address VDAddr =
1099                       CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1100                           *this, OrigVD);
1101                   setAddrOfLocalVar(VD, VDAddr);
1102                   return VDAddr;
1103                 }
1104                 // Emit private VarDecl with copy init.
1105                 EmitDecl(*VD);
1106                 return GetAddrOfLocalVar(VD);
1107               });
1108           assert(IsRegistered &&
1109                  "lastprivate var already registered as private");
1110           (void)IsRegistered;
1111         }
1112       }
1113       ++IRef;
1114       ++IDestRef;
1115     }
1116   }
1117   return HasAtLeastOneLastprivate;
1118 }
1119 
1120 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1121     const OMPExecutableDirective &D, bool NoFinals,
1122     llvm::Value *IsLastIterCond) {
1123   if (!HaveInsertPoint())
1124     return;
1125   // Emit following code:
1126   // if (<IsLastIterCond>) {
1127   //   orig_var1 = private_orig_var1;
1128   //   ...
1129   //   orig_varn = private_orig_varn;
1130   // }
1131   llvm::BasicBlock *ThenBB = nullptr;
1132   llvm::BasicBlock *DoneBB = nullptr;
1133   if (IsLastIterCond) {
1134     // Emit implicit barrier if at least one lastprivate conditional is found
1135     // and this is not a simd mode.
1136     if (!getLangOpts().OpenMPSimd &&
1137         llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1138                      [](const OMPLastprivateClause *C) {
1139                        return C->getKind() == OMPC_LASTPRIVATE_conditional;
1140                      })) {
1141       CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1142                                              OMPD_unknown,
1143                                              /*EmitChecks=*/false,
1144                                              /*ForceSimpleCall=*/true);
1145     }
1146     ThenBB = createBasicBlock(".omp.lastprivate.then");
1147     DoneBB = createBasicBlock(".omp.lastprivate.done");
1148     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1149     EmitBlock(ThenBB);
1150   }
1151   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1152   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1153   if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1154     auto IC = LoopDirective->counters().begin();
1155     for (const Expr *F : LoopDirective->finals()) {
1156       const auto *D =
1157           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1158       if (NoFinals)
1159         AlreadyEmittedVars.insert(D);
1160       else
1161         LoopCountersAndUpdates[D] = F;
1162       ++IC;
1163     }
1164   }
1165   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1166     auto IRef = C->varlist_begin();
1167     auto ISrcRef = C->source_exprs().begin();
1168     auto IDestRef = C->destination_exprs().begin();
1169     for (const Expr *AssignOp : C->assignment_ops()) {
1170       const auto *PrivateVD =
1171           cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1172       QualType Type = PrivateVD->getType();
1173       const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1174       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1175         // If lastprivate variable is a loop control variable for loop-based
1176         // directive, update its value before copyin back to original
1177         // variable.
1178         if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1179           EmitIgnoredExpr(FinalExpr);
1180         const auto *SrcVD =
1181             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1182         const auto *DestVD =
1183             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1184         // Get the address of the private variable.
1185         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1186         if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1187           PrivateAddr = Address::deprecated(
1188               Builder.CreateLoad(PrivateAddr),
1189               CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1190         // Store the last value to the private copy in the last iteration.
1191         if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1192           CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1193               *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1194               (*IRef)->getExprLoc());
1195         // Get the address of the original variable.
1196         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1197         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1198       }
1199       ++IRef;
1200       ++ISrcRef;
1201       ++IDestRef;
1202     }
1203     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1204       EmitIgnoredExpr(PostUpdate);
1205   }
1206   if (IsLastIterCond)
1207     EmitBlock(DoneBB, /*IsFinished=*/true);
1208 }
1209 
1210 void CodeGenFunction::EmitOMPReductionClauseInit(
1211     const OMPExecutableDirective &D,
1212     CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1213   if (!HaveInsertPoint())
1214     return;
1215   SmallVector<const Expr *, 4> Shareds;
1216   SmallVector<const Expr *, 4> Privates;
1217   SmallVector<const Expr *, 4> ReductionOps;
1218   SmallVector<const Expr *, 4> LHSs;
1219   SmallVector<const Expr *, 4> RHSs;
1220   OMPTaskDataTy Data;
1221   SmallVector<const Expr *, 4> TaskLHSs;
1222   SmallVector<const Expr *, 4> TaskRHSs;
1223   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1224     if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1225       continue;
1226     Shareds.append(C->varlist_begin(), C->varlist_end());
1227     Privates.append(C->privates().begin(), C->privates().end());
1228     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1229     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1230     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1231     if (C->getModifier() == OMPC_REDUCTION_task) {
1232       Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1233       Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1234       Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1235       Data.ReductionOps.append(C->reduction_ops().begin(),
1236                                C->reduction_ops().end());
1237       TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1238       TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1239     }
1240   }
1241   ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1242   unsigned Count = 0;
1243   auto *ILHS = LHSs.begin();
1244   auto *IRHS = RHSs.begin();
1245   auto *IPriv = Privates.begin();
1246   for (const Expr *IRef : Shareds) {
1247     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1248     // Emit private VarDecl with reduction init.
1249     RedCG.emitSharedOrigLValue(*this, Count);
1250     RedCG.emitAggregateType(*this, Count);
1251     AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1252     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1253                              RedCG.getSharedLValue(Count).getAddress(*this),
1254                              [&Emission](CodeGenFunction &CGF) {
1255                                CGF.EmitAutoVarInit(Emission);
1256                                return true;
1257                              });
1258     EmitAutoVarCleanups(Emission);
1259     Address BaseAddr = RedCG.adjustPrivateAddress(
1260         *this, Count, Emission.getAllocatedAddress());
1261     bool IsRegistered = PrivateScope.addPrivate(
1262         RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1263     assert(IsRegistered && "private var already registered as private");
1264     // Silence the warning about unused variable.
1265     (void)IsRegistered;
1266 
1267     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1268     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1269     QualType Type = PrivateVD->getType();
1270     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1271     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1272       // Store the address of the original variable associated with the LHS
1273       // implicit variable.
1274       PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1275         return RedCG.getSharedLValue(Count).getAddress(*this);
1276       });
1277       PrivateScope.addPrivate(
1278           RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1279     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1280                isa<ArraySubscriptExpr>(IRef)) {
1281       // Store the address of the original variable associated with the LHS
1282       // implicit variable.
1283       PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1284         return RedCG.getSharedLValue(Count).getAddress(*this);
1285       });
1286       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1287         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1288                                             ConvertTypeForMem(RHSVD->getType()),
1289                                             "rhs.begin");
1290       });
1291     } else {
1292       QualType Type = PrivateVD->getType();
1293       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1294       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1295       // Store the address of the original variable associated with the LHS
1296       // implicit variable.
1297       if (IsArray) {
1298         OriginalAddr = Builder.CreateElementBitCast(
1299             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1300       }
1301       PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1302       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1303         return IsArray ? Builder.CreateElementBitCast(
1304                              GetAddrOfLocalVar(PrivateVD),
1305                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1306                        : GetAddrOfLocalVar(PrivateVD);
1307       });
1308     }
1309     ++ILHS;
1310     ++IRHS;
1311     ++IPriv;
1312     ++Count;
1313   }
1314   if (!Data.ReductionVars.empty()) {
1315     Data.IsReductionWithTaskMod = true;
1316     Data.IsWorksharingReduction =
1317         isOpenMPWorksharingDirective(D.getDirectiveKind());
1318     llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1319         *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1320     const Expr *TaskRedRef = nullptr;
1321     switch (D.getDirectiveKind()) {
1322     case OMPD_parallel:
1323       TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1324       break;
1325     case OMPD_for:
1326       TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1327       break;
1328     case OMPD_sections:
1329       TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1330       break;
1331     case OMPD_parallel_for:
1332       TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1333       break;
1334     case OMPD_parallel_master:
1335       TaskRedRef =
1336           cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1337       break;
1338     case OMPD_parallel_sections:
1339       TaskRedRef =
1340           cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1341       break;
1342     case OMPD_target_parallel:
1343       TaskRedRef =
1344           cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1345       break;
1346     case OMPD_target_parallel_for:
1347       TaskRedRef =
1348           cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1349       break;
1350     case OMPD_distribute_parallel_for:
1351       TaskRedRef =
1352           cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1353       break;
1354     case OMPD_teams_distribute_parallel_for:
1355       TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1356                        .getTaskReductionRefExpr();
1357       break;
1358     case OMPD_target_teams_distribute_parallel_for:
1359       TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1360                        .getTaskReductionRefExpr();
1361       break;
1362     case OMPD_simd:
1363     case OMPD_for_simd:
1364     case OMPD_section:
1365     case OMPD_single:
1366     case OMPD_master:
1367     case OMPD_critical:
1368     case OMPD_parallel_for_simd:
1369     case OMPD_task:
1370     case OMPD_taskyield:
1371     case OMPD_barrier:
1372     case OMPD_taskwait:
1373     case OMPD_taskgroup:
1374     case OMPD_flush:
1375     case OMPD_depobj:
1376     case OMPD_scan:
1377     case OMPD_ordered:
1378     case OMPD_atomic:
1379     case OMPD_teams:
1380     case OMPD_target:
1381     case OMPD_cancellation_point:
1382     case OMPD_cancel:
1383     case OMPD_target_data:
1384     case OMPD_target_enter_data:
1385     case OMPD_target_exit_data:
1386     case OMPD_taskloop:
1387     case OMPD_taskloop_simd:
1388     case OMPD_master_taskloop:
1389     case OMPD_master_taskloop_simd:
1390     case OMPD_parallel_master_taskloop:
1391     case OMPD_parallel_master_taskloop_simd:
1392     case OMPD_distribute:
1393     case OMPD_target_update:
1394     case OMPD_distribute_parallel_for_simd:
1395     case OMPD_distribute_simd:
1396     case OMPD_target_parallel_for_simd:
1397     case OMPD_target_simd:
1398     case OMPD_teams_distribute:
1399     case OMPD_teams_distribute_simd:
1400     case OMPD_teams_distribute_parallel_for_simd:
1401     case OMPD_target_teams:
1402     case OMPD_target_teams_distribute:
1403     case OMPD_target_teams_distribute_parallel_for_simd:
1404     case OMPD_target_teams_distribute_simd:
1405     case OMPD_declare_target:
1406     case OMPD_end_declare_target:
1407     case OMPD_threadprivate:
1408     case OMPD_allocate:
1409     case OMPD_declare_reduction:
1410     case OMPD_declare_mapper:
1411     case OMPD_declare_simd:
1412     case OMPD_requires:
1413     case OMPD_declare_variant:
1414     case OMPD_begin_declare_variant:
1415     case OMPD_end_declare_variant:
1416     case OMPD_unknown:
1417     default:
1418       llvm_unreachable("Enexpected directive with task reductions.");
1419     }
1420 
1421     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1422     EmitVarDecl(*VD);
1423     EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1424                       /*Volatile=*/false, TaskRedRef->getType());
1425   }
1426 }
1427 
1428 void CodeGenFunction::EmitOMPReductionClauseFinal(
1429     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1430   if (!HaveInsertPoint())
1431     return;
1432   llvm::SmallVector<const Expr *, 8> Privates;
1433   llvm::SmallVector<const Expr *, 8> LHSExprs;
1434   llvm::SmallVector<const Expr *, 8> RHSExprs;
1435   llvm::SmallVector<const Expr *, 8> ReductionOps;
1436   bool HasAtLeastOneReduction = false;
1437   bool IsReductionWithTaskMod = false;
1438   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1439     // Do not emit for inscan reductions.
1440     if (C->getModifier() == OMPC_REDUCTION_inscan)
1441       continue;
1442     HasAtLeastOneReduction = true;
1443     Privates.append(C->privates().begin(), C->privates().end());
1444     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1445     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1446     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1447     IsReductionWithTaskMod =
1448         IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1449   }
1450   if (HasAtLeastOneReduction) {
1451     if (IsReductionWithTaskMod) {
1452       CGM.getOpenMPRuntime().emitTaskReductionFini(
1453           *this, D.getBeginLoc(),
1454           isOpenMPWorksharingDirective(D.getDirectiveKind()));
1455     }
1456     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1457                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1458                       ReductionKind == OMPD_simd;
1459     bool SimpleReduction = ReductionKind == OMPD_simd;
1460     // Emit nowait reduction if nowait clause is present or directive is a
1461     // parallel directive (it always has implicit barrier).
1462     CGM.getOpenMPRuntime().emitReduction(
1463         *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1464         {WithNowait, SimpleReduction, ReductionKind});
1465   }
1466 }
1467 
1468 static void emitPostUpdateForReductionClause(
1469     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1470     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1471   if (!CGF.HaveInsertPoint())
1472     return;
1473   llvm::BasicBlock *DoneBB = nullptr;
1474   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1475     if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1476       if (!DoneBB) {
1477         if (llvm::Value *Cond = CondGen(CGF)) {
1478           // If the first post-update expression is found, emit conditional
1479           // block if it was requested.
1480           llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1481           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1482           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1483           CGF.EmitBlock(ThenBB);
1484         }
1485       }
1486       CGF.EmitIgnoredExpr(PostUpdate);
1487     }
1488   }
1489   if (DoneBB)
1490     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1491 }
1492 
1493 namespace {
1494 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1495 /// parallel function. This is necessary for combined constructs such as
1496 /// 'distribute parallel for'
1497 typedef llvm::function_ref<void(CodeGenFunction &,
1498                                 const OMPExecutableDirective &,
1499                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1500     CodeGenBoundParametersTy;
1501 } // anonymous namespace
1502 
1503 static void
1504 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1505                                      const OMPExecutableDirective &S) {
1506   if (CGF.getLangOpts().OpenMP < 50)
1507     return;
1508   llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1509   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1510     for (const Expr *Ref : C->varlists()) {
1511       if (!Ref->getType()->isScalarType())
1512         continue;
1513       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1514       if (!DRE)
1515         continue;
1516       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1517       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1518     }
1519   }
1520   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1521     for (const Expr *Ref : C->varlists()) {
1522       if (!Ref->getType()->isScalarType())
1523         continue;
1524       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1525       if (!DRE)
1526         continue;
1527       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1528       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1529     }
1530   }
1531   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1532     for (const Expr *Ref : C->varlists()) {
1533       if (!Ref->getType()->isScalarType())
1534         continue;
1535       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1536       if (!DRE)
1537         continue;
1538       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1539       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1540     }
1541   }
1542   // Privates should ne analyzed since they are not captured at all.
1543   // Task reductions may be skipped - tasks are ignored.
1544   // Firstprivates do not return value but may be passed by reference - no need
1545   // to check for updated lastprivate conditional.
1546   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1547     for (const Expr *Ref : C->varlists()) {
1548       if (!Ref->getType()->isScalarType())
1549         continue;
1550       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1551       if (!DRE)
1552         continue;
1553       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1554     }
1555   }
1556   CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1557       CGF, S, PrivateDecls);
1558 }
1559 
1560 static void emitCommonOMPParallelDirective(
1561     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1562     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1563     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1564   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1565   llvm::Value *NumThreads = nullptr;
1566   llvm::Function *OutlinedFn =
1567       CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1568           S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1569   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1570     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1571     NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1572                                     /*IgnoreResultAssign=*/true);
1573     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1574         CGF, NumThreads, NumThreadsClause->getBeginLoc());
1575   }
1576   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1577     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1578     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1579         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1580   }
1581   const Expr *IfCond = nullptr;
1582   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1583     if (C->getNameModifier() == OMPD_unknown ||
1584         C->getNameModifier() == OMPD_parallel) {
1585       IfCond = C->getCondition();
1586       break;
1587     }
1588   }
1589 
1590   OMPParallelScope Scope(CGF, S);
1591   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1592   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1593   // lower and upper bounds with the pragma 'for' chunking mechanism.
1594   // The following lambda takes care of appending the lower and upper bound
1595   // parameters when necessary
1596   CodeGenBoundParameters(CGF, S, CapturedVars);
1597   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1598   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1599                                               CapturedVars, IfCond, NumThreads);
1600 }
1601 
1602 static bool isAllocatableDecl(const VarDecl *VD) {
1603   const VarDecl *CVD = VD->getCanonicalDecl();
1604   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1605     return false;
1606   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1607   // Use the default allocation.
1608   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1609             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1610            !AA->getAllocator());
1611 }
1612 
1613 static void emitEmptyBoundParameters(CodeGenFunction &,
1614                                      const OMPExecutableDirective &,
1615                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1616 
1617 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1618     CodeGenFunction &CGF, const VarDecl *VD) {
1619   CodeGenModule &CGM = CGF.CGM;
1620   auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1621 
1622   if (!VD)
1623     return Address::invalid();
1624   const VarDecl *CVD = VD->getCanonicalDecl();
1625   if (!isAllocatableDecl(CVD))
1626     return Address::invalid();
1627   llvm::Value *Size;
1628   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1629   if (CVD->getType()->isVariablyModifiedType()) {
1630     Size = CGF.getTypeSize(CVD->getType());
1631     // Align the size: ((size + align - 1) / align) * align
1632     Size = CGF.Builder.CreateNUWAdd(
1633         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1634     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1635     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1636   } else {
1637     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1638     Size = CGM.getSize(Sz.alignTo(Align));
1639   }
1640 
1641   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1642   assert(AA->getAllocator() &&
1643          "Expected allocator expression for non-default allocator.");
1644   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1645   // According to the standard, the original allocator type is a enum (integer).
1646   // Convert to pointer type, if required.
1647   if (Allocator->getType()->isIntegerTy())
1648     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1649   else if (Allocator->getType()->isPointerTy())
1650     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1651                                                                 CGM.VoidPtrTy);
1652 
1653   llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1654       CGF.Builder, Size, Allocator,
1655       getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1656   llvm::CallInst *FreeCI =
1657       OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1658 
1659   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1660   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1661       Addr,
1662       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1663       getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1664   return Address::deprecated(Addr, Align);
1665 }
1666 
1667 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1668     CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1669     SourceLocation Loc) {
1670   CodeGenModule &CGM = CGF.CGM;
1671   if (CGM.getLangOpts().OpenMPUseTLS &&
1672       CGM.getContext().getTargetInfo().isTLSSupported())
1673     return VDAddr;
1674 
1675   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1676 
1677   llvm::Type *VarTy = VDAddr.getElementType();
1678   llvm::Value *Data =
1679       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1680   llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1681   std::string Suffix = getNameWithSeparators({"cache", ""});
1682   llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1683 
1684   llvm::CallInst *ThreadPrivateCacheCall =
1685       OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1686 
1687   return Address::deprecated(ThreadPrivateCacheCall, VDAddr.getAlignment());
1688 }
1689 
1690 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1691     ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1692   SmallString<128> Buffer;
1693   llvm::raw_svector_ostream OS(Buffer);
1694   StringRef Sep = FirstSeparator;
1695   for (StringRef Part : Parts) {
1696     OS << Sep << Part;
1697     Sep = Separator;
1698   }
1699   return OS.str().str();
1700 }
1701 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1702   if (CGM.getLangOpts().OpenMPIRBuilder) {
1703     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1704     // Check if we have any if clause associated with the directive.
1705     llvm::Value *IfCond = nullptr;
1706     if (const auto *C = S.getSingleClause<OMPIfClause>())
1707       IfCond = EmitScalarExpr(C->getCondition(),
1708                               /*IgnoreResultAssign=*/true);
1709 
1710     llvm::Value *NumThreads = nullptr;
1711     if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1712       NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1713                                   /*IgnoreResultAssign=*/true);
1714 
1715     ProcBindKind ProcBind = OMP_PROC_BIND_default;
1716     if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1717       ProcBind = ProcBindClause->getProcBindKind();
1718 
1719     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1720 
1721     // The cleanup callback that finalizes all variabels at the given location,
1722     // thus calls destructors etc.
1723     auto FiniCB = [this](InsertPointTy IP) {
1724       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1725     };
1726 
1727     // Privatization callback that performs appropriate action for
1728     // shared/private/firstprivate/lastprivate/copyin/... variables.
1729     //
1730     // TODO: This defaults to shared right now.
1731     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1732                      llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1733       // The next line is appropriate only for variables (Val) with the
1734       // data-sharing attribute "shared".
1735       ReplVal = &Val;
1736 
1737       return CodeGenIP;
1738     };
1739 
1740     const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1741     const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1742 
1743     auto BodyGenCB = [ParallelRegionBodyStmt,
1744                       this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1745                             llvm::BasicBlock &ContinuationBB) {
1746       OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1747                                                       ContinuationBB);
1748       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1749                                              CodeGenIP, ContinuationBB);
1750     };
1751 
1752     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1753     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1754     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1755         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1756     Builder.restoreIP(
1757         OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1758                                   IfCond, NumThreads, ProcBind, S.hasCancel()));
1759     return;
1760   }
1761 
1762   // Emit parallel region as a standalone region.
1763   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1764     Action.Enter(CGF);
1765     OMPPrivateScope PrivateScope(CGF);
1766     bool Copyins = CGF.EmitOMPCopyinClause(S);
1767     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1768     if (Copyins) {
1769       // Emit implicit barrier to synchronize threads and avoid data races on
1770       // propagation master's thread values of threadprivate variables to local
1771       // instances of that variables of all other implicit threads.
1772       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1773           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1774           /*ForceSimpleCall=*/true);
1775     }
1776     CGF.EmitOMPPrivateClause(S, PrivateScope);
1777     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1778     (void)PrivateScope.Privatize();
1779     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1780     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1781   };
1782   {
1783     auto LPCRegion =
1784         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1785     emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1786                                    emitEmptyBoundParameters);
1787     emitPostUpdateForReductionClause(*this, S,
1788                                      [](CodeGenFunction &) { return nullptr; });
1789   }
1790   // Check for outer lastprivate conditional update.
1791   checkForLastprivateConditionalUpdate(*this, S);
1792 }
1793 
1794 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1795   EmitStmt(S.getIfStmt());
1796 }
1797 
1798 namespace {
1799 /// RAII to handle scopes for loop transformation directives.
1800 class OMPTransformDirectiveScopeRAII {
1801   OMPLoopScope *Scope = nullptr;
1802   CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1803   CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1804 
1805 public:
1806   OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1807     if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1808       Scope = new OMPLoopScope(CGF, *Dir);
1809       CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1810       CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1811     }
1812   }
1813   ~OMPTransformDirectiveScopeRAII() {
1814     if (!Scope)
1815       return;
1816     delete CapInfoRAII;
1817     delete CGSI;
1818     delete Scope;
1819   }
1820 };
1821 } // namespace
1822 
1823 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1824                      int MaxLevel, int Level = 0) {
1825   assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1826   const Stmt *SimplifiedS = S->IgnoreContainers();
1827   if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1828     PrettyStackTraceLoc CrashInfo(
1829         CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1830         "LLVM IR generation of compound statement ('{}')");
1831 
1832     // Keep track of the current cleanup stack depth, including debug scopes.
1833     CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1834     for (const Stmt *CurStmt : CS->body())
1835       emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1836     return;
1837   }
1838   if (SimplifiedS == NextLoop) {
1839     if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
1840       SimplifiedS = Dir->getTransformedStmt();
1841     if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1842       SimplifiedS = CanonLoop->getLoopStmt();
1843     if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1844       S = For->getBody();
1845     } else {
1846       assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1847              "Expected canonical for loop or range-based for loop.");
1848       const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1849       CGF.EmitStmt(CXXFor->getLoopVarStmt());
1850       S = CXXFor->getBody();
1851     }
1852     if (Level + 1 < MaxLevel) {
1853       NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1854           S, /*TryImperfectlyNestedLoops=*/true);
1855       emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1856       return;
1857     }
1858   }
1859   CGF.EmitStmt(S);
1860 }
1861 
1862 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1863                                       JumpDest LoopExit) {
1864   RunCleanupsScope BodyScope(*this);
1865   // Update counters values on current iteration.
1866   for (const Expr *UE : D.updates())
1867     EmitIgnoredExpr(UE);
1868   // Update the linear variables.
1869   // In distribute directives only loop counters may be marked as linear, no
1870   // need to generate the code for them.
1871   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1872     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1873       for (const Expr *UE : C->updates())
1874         EmitIgnoredExpr(UE);
1875     }
1876   }
1877 
1878   // On a continue in the body, jump to the end.
1879   JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1880   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1881   for (const Expr *E : D.finals_conditions()) {
1882     if (!E)
1883       continue;
1884     // Check that loop counter in non-rectangular nest fits into the iteration
1885     // space.
1886     llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1887     EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1888                          getProfileCount(D.getBody()));
1889     EmitBlock(NextBB);
1890   }
1891 
1892   OMPPrivateScope InscanScope(*this);
1893   EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1894   bool IsInscanRegion = InscanScope.Privatize();
1895   if (IsInscanRegion) {
1896     // Need to remember the block before and after scan directive
1897     // to dispatch them correctly depending on the clause used in
1898     // this directive, inclusive or exclusive. For inclusive scan the natural
1899     // order of the blocks is used, for exclusive clause the blocks must be
1900     // executed in reverse order.
1901     OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1902     OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1903     // No need to allocate inscan exit block, in simd mode it is selected in the
1904     // codegen for the scan directive.
1905     if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1906       OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1907     OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1908     EmitBranch(OMPScanDispatch);
1909     EmitBlock(OMPBeforeScanBlock);
1910   }
1911 
1912   // Emit loop variables for C++ range loops.
1913   const Stmt *Body =
1914       D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1915   // Emit loop body.
1916   emitBody(*this, Body,
1917            OMPLoopBasedDirective::tryToFindNextInnerLoop(
1918                Body, /*TryImperfectlyNestedLoops=*/true),
1919            D.getLoopsNumber());
1920 
1921   // Jump to the dispatcher at the end of the loop body.
1922   if (IsInscanRegion)
1923     EmitBranch(OMPScanExitBlock);
1924 
1925   // The end (updates/cleanups).
1926   EmitBlock(Continue.getBlock());
1927   BreakContinueStack.pop_back();
1928 }
1929 
1930 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1931 
1932 /// Emit a captured statement and return the function as well as its captured
1933 /// closure context.
1934 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1935                                              const CapturedStmt *S) {
1936   LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1937   CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1938   std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1939       std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1940   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1941   llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1942 
1943   return {F, CapStruct.getPointer(ParentCGF)};
1944 }
1945 
1946 /// Emit a call to a previously captured closure.
1947 static llvm::CallInst *
1948 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1949                      llvm::ArrayRef<llvm::Value *> Args) {
1950   // Append the closure context to the argument.
1951   SmallVector<llvm::Value *> EffectiveArgs;
1952   EffectiveArgs.reserve(Args.size() + 1);
1953   llvm::append_range(EffectiveArgs, Args);
1954   EffectiveArgs.push_back(Cap.second);
1955 
1956   return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1957 }
1958 
1959 llvm::CanonicalLoopInfo *
1960 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1961   assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1962 
1963   // The caller is processing the loop-associated directive processing the \p
1964   // Depth loops nested in \p S. Put the previous pending loop-associated
1965   // directive to the stack. If the current loop-associated directive is a loop
1966   // transformation directive, it will push its generated loops onto the stack
1967   // such that together with the loops left here they form the combined loop
1968   // nest for the parent loop-associated directive.
1969   int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
1970   ExpectedOMPLoopDepth = Depth;
1971 
1972   EmitStmt(S);
1973   assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1974 
1975   // The last added loop is the outermost one.
1976   llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
1977 
1978   // Pop the \p Depth loops requested by the call from that stack and restore
1979   // the previous context.
1980   OMPLoopNestStack.pop_back_n(Depth);
1981   ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
1982 
1983   return Result;
1984 }
1985 
1986 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
1987   const Stmt *SyntacticalLoop = S->getLoopStmt();
1988   if (!getLangOpts().OpenMPIRBuilder) {
1989     // Ignore if OpenMPIRBuilder is not enabled.
1990     EmitStmt(SyntacticalLoop);
1991     return;
1992   }
1993 
1994   LexicalScope ForScope(*this, S->getSourceRange());
1995 
1996   // Emit init statements. The Distance/LoopVar funcs may reference variable
1997   // declarations they contain.
1998   const Stmt *BodyStmt;
1999   if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
2000     if (const Stmt *InitStmt = For->getInit())
2001       EmitStmt(InitStmt);
2002     BodyStmt = For->getBody();
2003   } else if (const auto *RangeFor =
2004                  dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2005     if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2006       EmitStmt(RangeStmt);
2007     if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2008       EmitStmt(BeginStmt);
2009     if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2010       EmitStmt(EndStmt);
2011     if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2012       EmitStmt(LoopVarStmt);
2013     BodyStmt = RangeFor->getBody();
2014   } else
2015     llvm_unreachable("Expected for-stmt or range-based for-stmt");
2016 
2017   // Emit closure for later use. By-value captures will be captured here.
2018   const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2019   EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2020   const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2021   EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2022 
2023   // Call the distance function to get the number of iterations of the loop to
2024   // come.
2025   QualType LogicalTy = DistanceFunc->getCapturedDecl()
2026                            ->getParam(0)
2027                            ->getType()
2028                            .getNonReferenceType();
2029   Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2030   emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2031   llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2032 
2033   // Emit the loop structure.
2034   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2035   auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2036                            llvm::Value *IndVar) {
2037     Builder.restoreIP(CodeGenIP);
2038 
2039     // Emit the loop body: Convert the logical iteration number to the loop
2040     // variable and emit the body.
2041     const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2042     LValue LCVal = EmitLValue(LoopVarRef);
2043     Address LoopVarAddress = LCVal.getAddress(*this);
2044     emitCapturedStmtCall(*this, LoopVarClosure,
2045                          {LoopVarAddress.getPointer(), IndVar});
2046 
2047     RunCleanupsScope BodyScope(*this);
2048     EmitStmt(BodyStmt);
2049   };
2050   llvm::CanonicalLoopInfo *CL =
2051       OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2052 
2053   // Finish up the loop.
2054   Builder.restoreIP(CL->getAfterIP());
2055   ForScope.ForceCleanup();
2056 
2057   // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2058   OMPLoopNestStack.push_back(CL);
2059 }
2060 
2061 void CodeGenFunction::EmitOMPInnerLoop(
2062     const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2063     const Expr *IncExpr,
2064     const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2065     const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2066   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2067 
2068   // Start the loop with a block that tests the condition.
2069   auto CondBlock = createBasicBlock("omp.inner.for.cond");
2070   EmitBlock(CondBlock);
2071   const SourceRange R = S.getSourceRange();
2072 
2073   // If attributes are attached, push to the basic block with them.
2074   const auto &OMPED = cast<OMPExecutableDirective>(S);
2075   const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2076   const Stmt *SS = ICS->getCapturedStmt();
2077   const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2078   OMPLoopNestStack.clear();
2079   if (AS)
2080     LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2081                    AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2082                    SourceLocToDebugLoc(R.getEnd()));
2083   else
2084     LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2085                    SourceLocToDebugLoc(R.getEnd()));
2086 
2087   // If there are any cleanups between here and the loop-exit scope,
2088   // create a block to stage a loop exit along.
2089   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2090   if (RequiresCleanup)
2091     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2092 
2093   llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2094 
2095   // Emit condition.
2096   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2097   if (ExitBlock != LoopExit.getBlock()) {
2098     EmitBlock(ExitBlock);
2099     EmitBranchThroughCleanup(LoopExit);
2100   }
2101 
2102   EmitBlock(LoopBody);
2103   incrementProfileCounter(&S);
2104 
2105   // Create a block for the increment.
2106   JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2107   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2108 
2109   BodyGen(*this);
2110 
2111   // Emit "IV = IV + 1" and a back-edge to the condition block.
2112   EmitBlock(Continue.getBlock());
2113   EmitIgnoredExpr(IncExpr);
2114   PostIncGen(*this);
2115   BreakContinueStack.pop_back();
2116   EmitBranch(CondBlock);
2117   LoopStack.pop();
2118   // Emit the fall-through block.
2119   EmitBlock(LoopExit.getBlock());
2120 }
2121 
2122 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2123   if (!HaveInsertPoint())
2124     return false;
2125   // Emit inits for the linear variables.
2126   bool HasLinears = false;
2127   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2128     for (const Expr *Init : C->inits()) {
2129       HasLinears = true;
2130       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2131       if (const auto *Ref =
2132               dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2133         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2134         const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2135         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2136                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
2137                         VD->getInit()->getType(), VK_LValue,
2138                         VD->getInit()->getExprLoc());
2139         EmitExprAsInit(
2140             &DRE, VD,
2141             MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2142             /*capturedByInit=*/false);
2143         EmitAutoVarCleanups(Emission);
2144       } else {
2145         EmitVarDecl(*VD);
2146       }
2147     }
2148     // Emit the linear steps for the linear clauses.
2149     // If a step is not constant, it is pre-calculated before the loop.
2150     if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2151       if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2152         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2153         // Emit calculation of the linear step.
2154         EmitIgnoredExpr(CS);
2155       }
2156   }
2157   return HasLinears;
2158 }
2159 
2160 void CodeGenFunction::EmitOMPLinearClauseFinal(
2161     const OMPLoopDirective &D,
2162     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2163   if (!HaveInsertPoint())
2164     return;
2165   llvm::BasicBlock *DoneBB = nullptr;
2166   // Emit the final values of the linear variables.
2167   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2168     auto IC = C->varlist_begin();
2169     for (const Expr *F : C->finals()) {
2170       if (!DoneBB) {
2171         if (llvm::Value *Cond = CondGen(*this)) {
2172           // If the first post-update expression is found, emit conditional
2173           // block if it was requested.
2174           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2175           DoneBB = createBasicBlock(".omp.linear.pu.done");
2176           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2177           EmitBlock(ThenBB);
2178         }
2179       }
2180       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2181       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2182                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
2183                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2184       Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2185       CodeGenFunction::OMPPrivateScope VarScope(*this);
2186       VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2187       (void)VarScope.Privatize();
2188       EmitIgnoredExpr(F);
2189       ++IC;
2190     }
2191     if (const Expr *PostUpdate = C->getPostUpdateExpr())
2192       EmitIgnoredExpr(PostUpdate);
2193   }
2194   if (DoneBB)
2195     EmitBlock(DoneBB, /*IsFinished=*/true);
2196 }
2197 
2198 static void emitAlignedClause(CodeGenFunction &CGF,
2199                               const OMPExecutableDirective &D) {
2200   if (!CGF.HaveInsertPoint())
2201     return;
2202   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2203     llvm::APInt ClauseAlignment(64, 0);
2204     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2205       auto *AlignmentCI =
2206           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2207       ClauseAlignment = AlignmentCI->getValue();
2208     }
2209     for (const Expr *E : Clause->varlists()) {
2210       llvm::APInt Alignment(ClauseAlignment);
2211       if (Alignment == 0) {
2212         // OpenMP [2.8.1, Description]
2213         // If no optional parameter is specified, implementation-defined default
2214         // alignments for SIMD instructions on the target platforms are assumed.
2215         Alignment =
2216             CGF.getContext()
2217                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2218                     E->getType()->getPointeeType()))
2219                 .getQuantity();
2220       }
2221       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2222              "alignment is not power of 2");
2223       if (Alignment != 0) {
2224         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2225         CGF.emitAlignmentAssumption(
2226             PtrValue, E, /*No second loc needed*/ SourceLocation(),
2227             llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2228       }
2229     }
2230   }
2231 }
2232 
2233 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2234     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2235   if (!HaveInsertPoint())
2236     return;
2237   auto I = S.private_counters().begin();
2238   for (const Expr *E : S.counters()) {
2239     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2240     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2241     // Emit var without initialization.
2242     AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2243     EmitAutoVarCleanups(VarEmission);
2244     LocalDeclMap.erase(PrivateVD);
2245     (void)LoopScope.addPrivate(
2246         VD, [&VarEmission]() { return VarEmission.getAllocatedAddress(); });
2247     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2248         VD->hasGlobalStorage()) {
2249       (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2250         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2251                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2252                         E->getType(), VK_LValue, E->getExprLoc());
2253         return EmitLValue(&DRE).getAddress(*this);
2254       });
2255     } else {
2256       (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2257         return VarEmission.getAllocatedAddress();
2258       });
2259     }
2260     ++I;
2261   }
2262   // Privatize extra loop counters used in loops for ordered(n) clauses.
2263   for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2264     if (!C->getNumForLoops())
2265       continue;
2266     for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2267          I < E; ++I) {
2268       const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2269       const auto *VD = cast<VarDecl>(DRE->getDecl());
2270       // Override only those variables that can be captured to avoid re-emission
2271       // of the variables declared within the loops.
2272       if (DRE->refersToEnclosingVariableOrCapture()) {
2273         (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2274           return CreateMemTemp(DRE->getType(), VD->getName());
2275         });
2276       }
2277     }
2278   }
2279 }
2280 
2281 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2282                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
2283                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2284   if (!CGF.HaveInsertPoint())
2285     return;
2286   {
2287     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2288     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2289     (void)PreCondScope.Privatize();
2290     // Get initial values of real counters.
2291     for (const Expr *I : S.inits()) {
2292       CGF.EmitIgnoredExpr(I);
2293     }
2294   }
2295   // Create temp loop control variables with their init values to support
2296   // non-rectangular loops.
2297   CodeGenFunction::OMPMapVars PreCondVars;
2298   for (const Expr *E : S.dependent_counters()) {
2299     if (!E)
2300       continue;
2301     assert(!E->getType().getNonReferenceType()->isRecordType() &&
2302            "dependent counter must not be an iterator.");
2303     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2304     Address CounterAddr =
2305         CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2306     (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2307   }
2308   (void)PreCondVars.apply(CGF);
2309   for (const Expr *E : S.dependent_inits()) {
2310     if (!E)
2311       continue;
2312     CGF.EmitIgnoredExpr(E);
2313   }
2314   // Check that loop is executed at least one time.
2315   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2316   PreCondVars.restore(CGF);
2317 }
2318 
2319 void CodeGenFunction::EmitOMPLinearClause(
2320     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2321   if (!HaveInsertPoint())
2322     return;
2323   llvm::DenseSet<const VarDecl *> SIMDLCVs;
2324   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2325     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2326     for (const Expr *C : LoopDirective->counters()) {
2327       SIMDLCVs.insert(
2328           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2329     }
2330   }
2331   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2332     auto CurPrivate = C->privates().begin();
2333     for (const Expr *E : C->varlists()) {
2334       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2335       const auto *PrivateVD =
2336           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2337       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2338         bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2339           // Emit private VarDecl with copy init.
2340           EmitVarDecl(*PrivateVD);
2341           return GetAddrOfLocalVar(PrivateVD);
2342         });
2343         assert(IsRegistered && "linear var already registered as private");
2344         // Silence the warning about unused variable.
2345         (void)IsRegistered;
2346       } else {
2347         EmitVarDecl(*PrivateVD);
2348       }
2349       ++CurPrivate;
2350     }
2351   }
2352 }
2353 
2354 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2355                                      const OMPExecutableDirective &D) {
2356   if (!CGF.HaveInsertPoint())
2357     return;
2358   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2359     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2360                                  /*ignoreResult=*/true);
2361     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2362     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2363     // In presence of finite 'safelen', it may be unsafe to mark all
2364     // the memory instructions parallel, because loop-carried
2365     // dependences of 'safelen' iterations are possible.
2366     CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2367   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2368     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2369                                  /*ignoreResult=*/true);
2370     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2371     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2372     // In presence of finite 'safelen', it may be unsafe to mark all
2373     // the memory instructions parallel, because loop-carried
2374     // dependences of 'safelen' iterations are possible.
2375     CGF.LoopStack.setParallel(/*Enable=*/false);
2376   }
2377 }
2378 
2379 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2380   // Walk clauses and process safelen/lastprivate.
2381   LoopStack.setParallel(/*Enable=*/true);
2382   LoopStack.setVectorizeEnable();
2383   emitSimdlenSafelenClause(*this, D);
2384   if (const auto *C = D.getSingleClause<OMPOrderClause>())
2385     if (C->getKind() == OMPC_ORDER_concurrent)
2386       LoopStack.setParallel(/*Enable=*/true);
2387   if ((D.getDirectiveKind() == OMPD_simd ||
2388        (getLangOpts().OpenMPSimd &&
2389         isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2390       llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2391                    [](const OMPReductionClause *C) {
2392                      return C->getModifier() == OMPC_REDUCTION_inscan;
2393                    }))
2394     // Disable parallel access in case of prefix sum.
2395     LoopStack.setParallel(/*Enable=*/false);
2396 }
2397 
2398 void CodeGenFunction::EmitOMPSimdFinal(
2399     const OMPLoopDirective &D,
2400     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2401   if (!HaveInsertPoint())
2402     return;
2403   llvm::BasicBlock *DoneBB = nullptr;
2404   auto IC = D.counters().begin();
2405   auto IPC = D.private_counters().begin();
2406   for (const Expr *F : D.finals()) {
2407     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2408     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2409     const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2410     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2411         OrigVD->hasGlobalStorage() || CED) {
2412       if (!DoneBB) {
2413         if (llvm::Value *Cond = CondGen(*this)) {
2414           // If the first post-update expression is found, emit conditional
2415           // block if it was requested.
2416           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2417           DoneBB = createBasicBlock(".omp.final.done");
2418           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2419           EmitBlock(ThenBB);
2420         }
2421       }
2422       Address OrigAddr = Address::invalid();
2423       if (CED) {
2424         OrigAddr =
2425             EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2426       } else {
2427         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2428                         /*RefersToEnclosingVariableOrCapture=*/false,
2429                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2430         OrigAddr = EmitLValue(&DRE).getAddress(*this);
2431       }
2432       OMPPrivateScope VarScope(*this);
2433       VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2434       (void)VarScope.Privatize();
2435       EmitIgnoredExpr(F);
2436     }
2437     ++IC;
2438     ++IPC;
2439   }
2440   if (DoneBB)
2441     EmitBlock(DoneBB, /*IsFinished=*/true);
2442 }
2443 
2444 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2445                                          const OMPLoopDirective &S,
2446                                          CodeGenFunction::JumpDest LoopExit) {
2447   CGF.EmitOMPLoopBody(S, LoopExit);
2448   CGF.EmitStopPoint(&S);
2449 }
2450 
2451 /// Emit a helper variable and return corresponding lvalue.
2452 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2453                                const DeclRefExpr *Helper) {
2454   auto VDecl = cast<VarDecl>(Helper->getDecl());
2455   CGF.EmitVarDecl(*VDecl);
2456   return CGF.EmitLValue(Helper);
2457 }
2458 
2459 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2460                                const RegionCodeGenTy &SimdInitGen,
2461                                const RegionCodeGenTy &BodyCodeGen) {
2462   auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2463                                                     PrePostActionTy &) {
2464     CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2465     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2466     SimdInitGen(CGF);
2467 
2468     BodyCodeGen(CGF);
2469   };
2470   auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2471     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2472     CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2473 
2474     BodyCodeGen(CGF);
2475   };
2476   const Expr *IfCond = nullptr;
2477   if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2478     for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2479       if (CGF.getLangOpts().OpenMP >= 50 &&
2480           (C->getNameModifier() == OMPD_unknown ||
2481            C->getNameModifier() == OMPD_simd)) {
2482         IfCond = C->getCondition();
2483         break;
2484       }
2485     }
2486   }
2487   if (IfCond) {
2488     CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2489   } else {
2490     RegionCodeGenTy ThenRCG(ThenGen);
2491     ThenRCG(CGF);
2492   }
2493 }
2494 
2495 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2496                               PrePostActionTy &Action) {
2497   Action.Enter(CGF);
2498   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2499          "Expected simd directive");
2500   OMPLoopScope PreInitScope(CGF, S);
2501   // if (PreCond) {
2502   //   for (IV in 0..LastIteration) BODY;
2503   //   <Final counter/linear vars updates>;
2504   // }
2505   //
2506   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2507       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2508       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2509     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2510     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2511   }
2512 
2513   // Emit: if (PreCond) - begin.
2514   // If the condition constant folds and can be elided, avoid emitting the
2515   // whole loop.
2516   bool CondConstant;
2517   llvm::BasicBlock *ContBlock = nullptr;
2518   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2519     if (!CondConstant)
2520       return;
2521   } else {
2522     llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2523     ContBlock = CGF.createBasicBlock("simd.if.end");
2524     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2525                 CGF.getProfileCount(&S));
2526     CGF.EmitBlock(ThenBlock);
2527     CGF.incrementProfileCounter(&S);
2528   }
2529 
2530   // Emit the loop iteration variable.
2531   const Expr *IVExpr = S.getIterationVariable();
2532   const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2533   CGF.EmitVarDecl(*IVDecl);
2534   CGF.EmitIgnoredExpr(S.getInit());
2535 
2536   // Emit the iterations count variable.
2537   // If it is not a variable, Sema decided to calculate iterations count on
2538   // each iteration (e.g., it is foldable into a constant).
2539   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2540     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2541     // Emit calculation of the iterations count.
2542     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2543   }
2544 
2545   emitAlignedClause(CGF, S);
2546   (void)CGF.EmitOMPLinearClauseInit(S);
2547   {
2548     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2549     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2550     CGF.EmitOMPLinearClause(S, LoopScope);
2551     CGF.EmitOMPPrivateClause(S, LoopScope);
2552     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2553     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2554         CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2555     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2556     (void)LoopScope.Privatize();
2557     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2558       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2559 
2560     emitCommonSimdLoop(
2561         CGF, S,
2562         [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2563           CGF.EmitOMPSimdInit(S);
2564         },
2565         [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2566           CGF.EmitOMPInnerLoop(
2567               S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2568               [&S](CodeGenFunction &CGF) {
2569                 emitOMPLoopBodyWithStopPoint(CGF, S,
2570                                              CodeGenFunction::JumpDest());
2571               },
2572               [](CodeGenFunction &) {});
2573         });
2574     CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2575     // Emit final copy of the lastprivate variables at the end of loops.
2576     if (HasLastprivateClause)
2577       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2578     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2579     emitPostUpdateForReductionClause(CGF, S,
2580                                      [](CodeGenFunction &) { return nullptr; });
2581   }
2582   CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2583   // Emit: if (PreCond) - end.
2584   if (ContBlock) {
2585     CGF.EmitBranch(ContBlock);
2586     CGF.EmitBlock(ContBlock, true);
2587   }
2588 }
2589 
2590 static bool isSupportedByOpenMPIRBuilder(const OMPExecutableDirective &S) {
2591   // Check for unsupported clauses
2592   if (!S.clauses().empty()) {
2593     // Currently no clause is supported
2594     return false;
2595   }
2596 
2597   // Check if we have a statement with the ordered directive.
2598   // Visit the statement hierarchy to find a compound statement
2599   // with a ordered directive in it.
2600   if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
2601     if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2602       for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2603         if (!SubStmt)
2604           continue;
2605         if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
2606           for (const Stmt *CSSubStmt : CS->children()) {
2607             if (!CSSubStmt)
2608               continue;
2609             if (isa<OMPOrderedDirective>(CSSubStmt)) {
2610               return false;
2611             }
2612           }
2613         }
2614       }
2615     }
2616   }
2617   return true;
2618 }
2619 
2620 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2621   bool UseOMPIRBuilder =
2622       CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
2623   if (UseOMPIRBuilder) {
2624     auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2625                                                           PrePostActionTy &) {
2626       // Use the OpenMPIRBuilder if enabled.
2627       if (UseOMPIRBuilder) {
2628         // Emit the associated statement and get its loop representation.
2629         llvm::DebugLoc DL = SourceLocToDebugLoc(S.getBeginLoc());
2630         const Stmt *Inner = S.getRawStmt();
2631         llvm::CanonicalLoopInfo *CLI =
2632             EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2633 
2634         llvm::OpenMPIRBuilder &OMPBuilder =
2635             CGM.getOpenMPRuntime().getOMPBuilder();
2636         // Add SIMD specific metadata
2637         OMPBuilder.applySimd(DL, CLI);
2638         return;
2639       }
2640     };
2641     {
2642       auto LPCRegion =
2643           CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2644       OMPLexicalScope Scope(*this, S, OMPD_unknown);
2645       CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
2646                                                   CodeGenIRBuilder);
2647     }
2648     return;
2649   }
2650 
2651   ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2652   OMPFirstScanLoop = true;
2653   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2654     emitOMPSimdRegion(CGF, S, Action);
2655   };
2656   {
2657     auto LPCRegion =
2658         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2659     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2660     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2661   }
2662   // Check for outer lastprivate conditional update.
2663   checkForLastprivateConditionalUpdate(*this, S);
2664 }
2665 
2666 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2667   // Emit the de-sugared statement.
2668   OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2669   EmitStmt(S.getTransformedStmt());
2670 }
2671 
2672 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2673   bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2674 
2675   if (UseOMPIRBuilder) {
2676     auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2677     const Stmt *Inner = S.getRawStmt();
2678 
2679     // Consume nested loop. Clear the entire remaining loop stack because a
2680     // fully unrolled loop is non-transformable. For partial unrolling the
2681     // generated outer loop is pushed back to the stack.
2682     llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2683     OMPLoopNestStack.clear();
2684 
2685     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2686 
2687     bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2688     llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2689 
2690     if (S.hasClausesOfKind<OMPFullClause>()) {
2691       assert(ExpectedOMPLoopDepth == 0);
2692       OMPBuilder.unrollLoopFull(DL, CLI);
2693     } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2694       uint64_t Factor = 0;
2695       if (Expr *FactorExpr = PartialClause->getFactor()) {
2696         Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2697         assert(Factor >= 1 && "Only positive factors are valid");
2698       }
2699       OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2700                                    NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2701     } else {
2702       OMPBuilder.unrollLoopHeuristic(DL, CLI);
2703     }
2704 
2705     assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2706            "NeedsUnrolledCLI implies UnrolledCLI to be set");
2707     if (UnrolledCLI)
2708       OMPLoopNestStack.push_back(UnrolledCLI);
2709 
2710     return;
2711   }
2712 
2713   // This function is only called if the unrolled loop is not consumed by any
2714   // other loop-associated construct. Such a loop-associated construct will have
2715   // used the transformed AST.
2716 
2717   // Set the unroll metadata for the next emitted loop.
2718   LoopStack.setUnrollState(LoopAttributes::Enable);
2719 
2720   if (S.hasClausesOfKind<OMPFullClause>()) {
2721     LoopStack.setUnrollState(LoopAttributes::Full);
2722   } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2723     if (Expr *FactorExpr = PartialClause->getFactor()) {
2724       uint64_t Factor =
2725           FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2726       assert(Factor >= 1 && "Only positive factors are valid");
2727       LoopStack.setUnrollCount(Factor);
2728     }
2729   }
2730 
2731   EmitStmt(S.getAssociatedStmt());
2732 }
2733 
2734 void CodeGenFunction::EmitOMPOuterLoop(
2735     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2736     CodeGenFunction::OMPPrivateScope &LoopScope,
2737     const CodeGenFunction::OMPLoopArguments &LoopArgs,
2738     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2739     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2740   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2741 
2742   const Expr *IVExpr = S.getIterationVariable();
2743   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2744   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2745 
2746   JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2747 
2748   // Start the loop with a block that tests the condition.
2749   llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2750   EmitBlock(CondBlock);
2751   const SourceRange R = S.getSourceRange();
2752   OMPLoopNestStack.clear();
2753   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2754                  SourceLocToDebugLoc(R.getEnd()));
2755 
2756   llvm::Value *BoolCondVal = nullptr;
2757   if (!DynamicOrOrdered) {
2758     // UB = min(UB, GlobalUB) or
2759     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2760     // 'distribute parallel for')
2761     EmitIgnoredExpr(LoopArgs.EUB);
2762     // IV = LB
2763     EmitIgnoredExpr(LoopArgs.Init);
2764     // IV < UB
2765     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2766   } else {
2767     BoolCondVal =
2768         RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2769                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2770   }
2771 
2772   // If there are any cleanups between here and the loop-exit scope,
2773   // create a block to stage a loop exit along.
2774   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2775   if (LoopScope.requiresCleanups())
2776     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2777 
2778   llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2779   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2780   if (ExitBlock != LoopExit.getBlock()) {
2781     EmitBlock(ExitBlock);
2782     EmitBranchThroughCleanup(LoopExit);
2783   }
2784   EmitBlock(LoopBody);
2785 
2786   // Emit "IV = LB" (in case of static schedule, we have already calculated new
2787   // LB for loop condition and emitted it above).
2788   if (DynamicOrOrdered)
2789     EmitIgnoredExpr(LoopArgs.Init);
2790 
2791   // Create a block for the increment.
2792   JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2793   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2794 
2795   emitCommonSimdLoop(
2796       *this, S,
2797       [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2798         // Generate !llvm.loop.parallel metadata for loads and stores for loops
2799         // with dynamic/guided scheduling and without ordered clause.
2800         if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2801           CGF.LoopStack.setParallel(!IsMonotonic);
2802           if (const auto *C = S.getSingleClause<OMPOrderClause>())
2803             if (C->getKind() == OMPC_ORDER_concurrent)
2804               CGF.LoopStack.setParallel(/*Enable=*/true);
2805         } else {
2806           CGF.EmitOMPSimdInit(S);
2807         }
2808       },
2809       [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2810        &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2811         SourceLocation Loc = S.getBeginLoc();
2812         // when 'distribute' is not combined with a 'for':
2813         // while (idx <= UB) { BODY; ++idx; }
2814         // when 'distribute' is combined with a 'for'
2815         // (e.g. 'distribute parallel for')
2816         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2817         CGF.EmitOMPInnerLoop(
2818             S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2819             [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2820               CodeGenLoop(CGF, S, LoopExit);
2821             },
2822             [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2823               CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2824             });
2825       });
2826 
2827   EmitBlock(Continue.getBlock());
2828   BreakContinueStack.pop_back();
2829   if (!DynamicOrOrdered) {
2830     // Emit "LB = LB + Stride", "UB = UB + Stride".
2831     EmitIgnoredExpr(LoopArgs.NextLB);
2832     EmitIgnoredExpr(LoopArgs.NextUB);
2833   }
2834 
2835   EmitBranch(CondBlock);
2836   OMPLoopNestStack.clear();
2837   LoopStack.pop();
2838   // Emit the fall-through block.
2839   EmitBlock(LoopExit.getBlock());
2840 
2841   // Tell the runtime we are done.
2842   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2843     if (!DynamicOrOrdered)
2844       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2845                                                      S.getDirectiveKind());
2846   };
2847   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2848 }
2849 
2850 void CodeGenFunction::EmitOMPForOuterLoop(
2851     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2852     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2853     const OMPLoopArguments &LoopArgs,
2854     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2855   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2856 
2857   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2858   const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
2859 
2860   assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2861                                             LoopArgs.Chunk != nullptr)) &&
2862          "static non-chunked schedule does not need outer loop");
2863 
2864   // Emit outer loop.
2865   //
2866   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2867   // When schedule(dynamic,chunk_size) is specified, the iterations are
2868   // distributed to threads in the team in chunks as the threads request them.
2869   // Each thread executes a chunk of iterations, then requests another chunk,
2870   // until no chunks remain to be distributed. Each chunk contains chunk_size
2871   // iterations, except for the last chunk to be distributed, which may have
2872   // fewer iterations. When no chunk_size is specified, it defaults to 1.
2873   //
2874   // When schedule(guided,chunk_size) is specified, the iterations are assigned
2875   // to threads in the team in chunks as the executing threads request them.
2876   // Each thread executes a chunk of iterations, then requests another chunk,
2877   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2878   // each chunk is proportional to the number of unassigned iterations divided
2879   // by the number of threads in the team, decreasing to 1. For a chunk_size
2880   // with value k (greater than 1), the size of each chunk is determined in the
2881   // same way, with the restriction that the chunks do not contain fewer than k
2882   // iterations (except for the last chunk to be assigned, which may have fewer
2883   // than k iterations).
2884   //
2885   // When schedule(auto) is specified, the decision regarding scheduling is
2886   // delegated to the compiler and/or runtime system. The programmer gives the
2887   // implementation the freedom to choose any possible mapping of iterations to
2888   // threads in the team.
2889   //
2890   // When schedule(runtime) is specified, the decision regarding scheduling is
2891   // deferred until run time, and the schedule and chunk size are taken from the
2892   // run-sched-var ICV. If the ICV is set to auto, the schedule is
2893   // implementation defined
2894   //
2895   // while(__kmpc_dispatch_next(&LB, &UB)) {
2896   //   idx = LB;
2897   //   while (idx <= UB) { BODY; ++idx;
2898   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2899   //   } // inner loop
2900   // }
2901   //
2902   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2903   // When schedule(static, chunk_size) is specified, iterations are divided into
2904   // chunks of size chunk_size, and the chunks are assigned to the threads in
2905   // the team in a round-robin fashion in the order of the thread number.
2906   //
2907   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2908   //   while (idx <= UB) { BODY; ++idx; } // inner loop
2909   //   LB = LB + ST;
2910   //   UB = UB + ST;
2911   // }
2912   //
2913 
2914   const Expr *IVExpr = S.getIterationVariable();
2915   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2916   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2917 
2918   if (DynamicOrOrdered) {
2919     const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2920         CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2921     llvm::Value *LBVal = DispatchBounds.first;
2922     llvm::Value *UBVal = DispatchBounds.second;
2923     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2924                                                              LoopArgs.Chunk};
2925     RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2926                            IVSigned, Ordered, DipatchRTInputValues);
2927   } else {
2928     CGOpenMPRuntime::StaticRTInput StaticInit(
2929         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2930         LoopArgs.ST, LoopArgs.Chunk);
2931     RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2932                          ScheduleKind, StaticInit);
2933   }
2934 
2935   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2936                                     const unsigned IVSize,
2937                                     const bool IVSigned) {
2938     if (Ordered) {
2939       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2940                                                             IVSigned);
2941     }
2942   };
2943 
2944   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2945                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2946   OuterLoopArgs.IncExpr = S.getInc();
2947   OuterLoopArgs.Init = S.getInit();
2948   OuterLoopArgs.Cond = S.getCond();
2949   OuterLoopArgs.NextLB = S.getNextLowerBound();
2950   OuterLoopArgs.NextUB = S.getNextUpperBound();
2951   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2952                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2953 }
2954 
2955 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2956                              const unsigned IVSize, const bool IVSigned) {}
2957 
2958 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2959     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2960     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2961     const CodeGenLoopTy &CodeGenLoopContent) {
2962 
2963   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2964 
2965   // Emit outer loop.
2966   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2967   // dynamic
2968   //
2969 
2970   const Expr *IVExpr = S.getIterationVariable();
2971   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2972   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2973 
2974   CGOpenMPRuntime::StaticRTInput StaticInit(
2975       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2976       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2977   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2978 
2979   // for combined 'distribute' and 'for' the increment expression of distribute
2980   // is stored in DistInc. For 'distribute' alone, it is in Inc.
2981   Expr *IncExpr;
2982   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2983     IncExpr = S.getDistInc();
2984   else
2985     IncExpr = S.getInc();
2986 
2987   // this routine is shared by 'omp distribute parallel for' and
2988   // 'omp distribute': select the right EUB expression depending on the
2989   // directive
2990   OMPLoopArguments OuterLoopArgs;
2991   OuterLoopArgs.LB = LoopArgs.LB;
2992   OuterLoopArgs.UB = LoopArgs.UB;
2993   OuterLoopArgs.ST = LoopArgs.ST;
2994   OuterLoopArgs.IL = LoopArgs.IL;
2995   OuterLoopArgs.Chunk = LoopArgs.Chunk;
2996   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2997                           ? S.getCombinedEnsureUpperBound()
2998                           : S.getEnsureUpperBound();
2999   OuterLoopArgs.IncExpr = IncExpr;
3000   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3001                            ? S.getCombinedInit()
3002                            : S.getInit();
3003   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3004                            ? S.getCombinedCond()
3005                            : S.getCond();
3006   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3007                              ? S.getCombinedNextLowerBound()
3008                              : S.getNextLowerBound();
3009   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3010                              ? S.getCombinedNextUpperBound()
3011                              : S.getNextUpperBound();
3012 
3013   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3014                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
3015                    emitEmptyOrdered);
3016 }
3017 
3018 static std::pair<LValue, LValue>
3019 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3020                                      const OMPExecutableDirective &S) {
3021   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3022   LValue LB =
3023       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3024   LValue UB =
3025       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3026 
3027   // When composing 'distribute' with 'for' (e.g. as in 'distribute
3028   // parallel for') we need to use the 'distribute'
3029   // chunk lower and upper bounds rather than the whole loop iteration
3030   // space. These are parameters to the outlined function for 'parallel'
3031   // and we copy the bounds of the previous schedule into the
3032   // the current ones.
3033   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
3034   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
3035   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3036       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
3037   PrevLBVal = CGF.EmitScalarConversion(
3038       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
3039       LS.getIterationVariable()->getType(),
3040       LS.getPrevLowerBoundVariable()->getExprLoc());
3041   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3042       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
3043   PrevUBVal = CGF.EmitScalarConversion(
3044       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
3045       LS.getIterationVariable()->getType(),
3046       LS.getPrevUpperBoundVariable()->getExprLoc());
3047 
3048   CGF.EmitStoreOfScalar(PrevLBVal, LB);
3049   CGF.EmitStoreOfScalar(PrevUBVal, UB);
3050 
3051   return {LB, UB};
3052 }
3053 
3054 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3055 /// we need to use the LB and UB expressions generated by the worksharing
3056 /// code generation support, whereas in non combined situations we would
3057 /// just emit 0 and the LastIteration expression
3058 /// This function is necessary due to the difference of the LB and UB
3059 /// types for the RT emission routines for 'for_static_init' and
3060 /// 'for_dispatch_init'
3061 static std::pair<llvm::Value *, llvm::Value *>
3062 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3063                                         const OMPExecutableDirective &S,
3064                                         Address LB, Address UB) {
3065   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3066   const Expr *IVExpr = LS.getIterationVariable();
3067   // when implementing a dynamic schedule for a 'for' combined with a
3068   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3069   // is not normalized as each team only executes its own assigned
3070   // distribute chunk
3071   QualType IteratorTy = IVExpr->getType();
3072   llvm::Value *LBVal =
3073       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3074   llvm::Value *UBVal =
3075       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3076   return {LBVal, UBVal};
3077 }
3078 
3079 static void emitDistributeParallelForDistributeInnerBoundParams(
3080     CodeGenFunction &CGF, const OMPExecutableDirective &S,
3081     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3082   const auto &Dir = cast<OMPLoopDirective>(S);
3083   LValue LB =
3084       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3085   llvm::Value *LBCast =
3086       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
3087                                 CGF.SizeTy, /*isSigned=*/false);
3088   CapturedVars.push_back(LBCast);
3089   LValue UB =
3090       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3091 
3092   llvm::Value *UBCast =
3093       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
3094                                 CGF.SizeTy, /*isSigned=*/false);
3095   CapturedVars.push_back(UBCast);
3096 }
3097 
3098 static void
3099 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3100                                  const OMPLoopDirective &S,
3101                                  CodeGenFunction::JumpDest LoopExit) {
3102   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3103                                          PrePostActionTy &Action) {
3104     Action.Enter(CGF);
3105     bool HasCancel = false;
3106     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3107       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3108         HasCancel = D->hasCancel();
3109       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3110         HasCancel = D->hasCancel();
3111       else if (const auto *D =
3112                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3113         HasCancel = D->hasCancel();
3114     }
3115     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3116                                                      HasCancel);
3117     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3118                                emitDistributeParallelForInnerBounds,
3119                                emitDistributeParallelForDispatchBounds);
3120   };
3121 
3122   emitCommonOMPParallelDirective(
3123       CGF, S,
3124       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3125       CGInlinedWorksharingLoop,
3126       emitDistributeParallelForDistributeInnerBoundParams);
3127 }
3128 
3129 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3130     const OMPDistributeParallelForDirective &S) {
3131   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3132     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3133                               S.getDistInc());
3134   };
3135   OMPLexicalScope Scope(*this, S, OMPD_parallel);
3136   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3137 }
3138 
3139 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3140     const OMPDistributeParallelForSimdDirective &S) {
3141   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3142     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3143                               S.getDistInc());
3144   };
3145   OMPLexicalScope Scope(*this, S, OMPD_parallel);
3146   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3147 }
3148 
3149 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3150     const OMPDistributeSimdDirective &S) {
3151   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3152     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3153   };
3154   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3155   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3156 }
3157 
3158 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3159     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3160   // Emit SPMD target parallel for region as a standalone region.
3161   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3162     emitOMPSimdRegion(CGF, S, Action);
3163   };
3164   llvm::Function *Fn;
3165   llvm::Constant *Addr;
3166   // Emit target region as a standalone region.
3167   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3168       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3169   assert(Fn && Addr && "Target device function emission failed.");
3170 }
3171 
3172 void CodeGenFunction::EmitOMPTargetSimdDirective(
3173     const OMPTargetSimdDirective &S) {
3174   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3175     emitOMPSimdRegion(CGF, S, Action);
3176   };
3177   emitCommonOMPTargetDirective(*this, S, CodeGen);
3178 }
3179 
3180 namespace {
3181 struct ScheduleKindModifiersTy {
3182   OpenMPScheduleClauseKind Kind;
3183   OpenMPScheduleClauseModifier M1;
3184   OpenMPScheduleClauseModifier M2;
3185   ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3186                           OpenMPScheduleClauseModifier M1,
3187                           OpenMPScheduleClauseModifier M2)
3188       : Kind(Kind), M1(M1), M2(M2) {}
3189 };
3190 } // namespace
3191 
3192 bool CodeGenFunction::EmitOMPWorksharingLoop(
3193     const OMPLoopDirective &S, Expr *EUB,
3194     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3195     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3196   // Emit the loop iteration variable.
3197   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3198   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3199   EmitVarDecl(*IVDecl);
3200 
3201   // Emit the iterations count variable.
3202   // If it is not a variable, Sema decided to calculate iterations count on each
3203   // iteration (e.g., it is foldable into a constant).
3204   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3205     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3206     // Emit calculation of the iterations count.
3207     EmitIgnoredExpr(S.getCalcLastIteration());
3208   }
3209 
3210   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3211 
3212   bool HasLastprivateClause;
3213   // Check pre-condition.
3214   {
3215     OMPLoopScope PreInitScope(*this, S);
3216     // Skip the entire loop if we don't meet the precondition.
3217     // If the condition constant folds and can be elided, avoid emitting the
3218     // whole loop.
3219     bool CondConstant;
3220     llvm::BasicBlock *ContBlock = nullptr;
3221     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3222       if (!CondConstant)
3223         return false;
3224     } else {
3225       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3226       ContBlock = createBasicBlock("omp.precond.end");
3227       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3228                   getProfileCount(&S));
3229       EmitBlock(ThenBlock);
3230       incrementProfileCounter(&S);
3231     }
3232 
3233     RunCleanupsScope DoacrossCleanupScope(*this);
3234     bool Ordered = false;
3235     if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3236       if (OrderedClause->getNumForLoops())
3237         RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3238       else
3239         Ordered = true;
3240     }
3241 
3242     llvm::DenseSet<const Expr *> EmittedFinals;
3243     emitAlignedClause(*this, S);
3244     bool HasLinears = EmitOMPLinearClauseInit(S);
3245     // Emit helper vars inits.
3246 
3247     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3248     LValue LB = Bounds.first;
3249     LValue UB = Bounds.second;
3250     LValue ST =
3251         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3252     LValue IL =
3253         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3254 
3255     // Emit 'then' code.
3256     {
3257       OMPPrivateScope LoopScope(*this);
3258       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3259         // Emit implicit barrier to synchronize threads and avoid data races on
3260         // initialization of firstprivate variables and post-update of
3261         // lastprivate variables.
3262         CGM.getOpenMPRuntime().emitBarrierCall(
3263             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3264             /*ForceSimpleCall=*/true);
3265       }
3266       EmitOMPPrivateClause(S, LoopScope);
3267       CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3268           *this, S, EmitLValue(S.getIterationVariable()));
3269       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3270       EmitOMPReductionClauseInit(S, LoopScope);
3271       EmitOMPPrivateLoopCounters(S, LoopScope);
3272       EmitOMPLinearClause(S, LoopScope);
3273       (void)LoopScope.Privatize();
3274       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3275         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3276 
3277       // Detect the loop schedule kind and chunk.
3278       const Expr *ChunkExpr = nullptr;
3279       OpenMPScheduleTy ScheduleKind;
3280       if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3281         ScheduleKind.Schedule = C->getScheduleKind();
3282         ScheduleKind.M1 = C->getFirstScheduleModifier();
3283         ScheduleKind.M2 = C->getSecondScheduleModifier();
3284         ChunkExpr = C->getChunkSize();
3285       } else {
3286         // Default behaviour for schedule clause.
3287         CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3288             *this, S, ScheduleKind.Schedule, ChunkExpr);
3289       }
3290       bool HasChunkSizeOne = false;
3291       llvm::Value *Chunk = nullptr;
3292       if (ChunkExpr) {
3293         Chunk = EmitScalarExpr(ChunkExpr);
3294         Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3295                                      S.getIterationVariable()->getType(),
3296                                      S.getBeginLoc());
3297         Expr::EvalResult Result;
3298         if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3299           llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3300           HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3301         }
3302       }
3303       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3304       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3305       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3306       // If the static schedule kind is specified or if the ordered clause is
3307       // specified, and if no monotonic modifier is specified, the effect will
3308       // be as if the monotonic modifier was specified.
3309       bool StaticChunkedOne =
3310           RT.isStaticChunked(ScheduleKind.Schedule,
3311                              /* Chunked */ Chunk != nullptr) &&
3312           HasChunkSizeOne &&
3313           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3314       bool IsMonotonic =
3315           Ordered ||
3316           (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3317            !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3318              ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3319           ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3320           ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3321       if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3322                                  /* Chunked */ Chunk != nullptr) ||
3323            StaticChunkedOne) &&
3324           !Ordered) {
3325         JumpDest LoopExit =
3326             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3327         emitCommonSimdLoop(
3328             *this, S,
3329             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3330               if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3331                 CGF.EmitOMPSimdInit(S);
3332               } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3333                 if (C->getKind() == OMPC_ORDER_concurrent)
3334                   CGF.LoopStack.setParallel(/*Enable=*/true);
3335               }
3336             },
3337             [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3338              &S, ScheduleKind, LoopExit,
3339              &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3340               // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3341               // When no chunk_size is specified, the iteration space is divided
3342               // into chunks that are approximately equal in size, and at most
3343               // one chunk is distributed to each thread. Note that the size of
3344               // the chunks is unspecified in this case.
3345               CGOpenMPRuntime::StaticRTInput StaticInit(
3346                   IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3347                   LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3348                   StaticChunkedOne ? Chunk : nullptr);
3349               CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3350                   CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3351                   StaticInit);
3352               // UB = min(UB, GlobalUB);
3353               if (!StaticChunkedOne)
3354                 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3355               // IV = LB;
3356               CGF.EmitIgnoredExpr(S.getInit());
3357               // For unchunked static schedule generate:
3358               //
3359               // while (idx <= UB) {
3360               //   BODY;
3361               //   ++idx;
3362               // }
3363               //
3364               // For static schedule with chunk one:
3365               //
3366               // while (IV <= PrevUB) {
3367               //   BODY;
3368               //   IV += ST;
3369               // }
3370               CGF.EmitOMPInnerLoop(
3371                   S, LoopScope.requiresCleanups(),
3372                   StaticChunkedOne ? S.getCombinedParForInDistCond()
3373                                    : S.getCond(),
3374                   StaticChunkedOne ? S.getDistInc() : S.getInc(),
3375                   [&S, LoopExit](CodeGenFunction &CGF) {
3376                     emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3377                   },
3378                   [](CodeGenFunction &) {});
3379             });
3380         EmitBlock(LoopExit.getBlock());
3381         // Tell the runtime we are done.
3382         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3383           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3384                                                          S.getDirectiveKind());
3385         };
3386         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3387       } else {
3388         // Emit the outer loop, which requests its work chunk [LB..UB] from
3389         // runtime and runs the inner loop to process it.
3390         const OMPLoopArguments LoopArguments(
3391             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3392             IL.getAddress(*this), Chunk, EUB);
3393         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3394                             LoopArguments, CGDispatchBounds);
3395       }
3396       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3397         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3398           return CGF.Builder.CreateIsNotNull(
3399               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3400         });
3401       }
3402       EmitOMPReductionClauseFinal(
3403           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3404                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3405                  : /*Parallel only*/ OMPD_parallel);
3406       // Emit post-update of the reduction variables if IsLastIter != 0.
3407       emitPostUpdateForReductionClause(
3408           *this, S, [IL, &S](CodeGenFunction &CGF) {
3409             return CGF.Builder.CreateIsNotNull(
3410                 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3411           });
3412       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3413       if (HasLastprivateClause)
3414         EmitOMPLastprivateClauseFinal(
3415             S, isOpenMPSimdDirective(S.getDirectiveKind()),
3416             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3417     }
3418     EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3419       return CGF.Builder.CreateIsNotNull(
3420           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3421     });
3422     DoacrossCleanupScope.ForceCleanup();
3423     // We're now done with the loop, so jump to the continuation block.
3424     if (ContBlock) {
3425       EmitBranch(ContBlock);
3426       EmitBlock(ContBlock, /*IsFinished=*/true);
3427     }
3428   }
3429   return HasLastprivateClause;
3430 }
3431 
3432 /// The following two functions generate expressions for the loop lower
3433 /// and upper bounds in case of static and dynamic (dispatch) schedule
3434 /// of the associated 'for' or 'distribute' loop.
3435 static std::pair<LValue, LValue>
3436 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3437   const auto &LS = cast<OMPLoopDirective>(S);
3438   LValue LB =
3439       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3440   LValue UB =
3441       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3442   return {LB, UB};
3443 }
3444 
3445 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3446 /// consider the lower and upper bound expressions generated by the
3447 /// worksharing loop support, but we use 0 and the iteration space size as
3448 /// constants
3449 static std::pair<llvm::Value *, llvm::Value *>
3450 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3451                           Address LB, Address UB) {
3452   const auto &LS = cast<OMPLoopDirective>(S);
3453   const Expr *IVExpr = LS.getIterationVariable();
3454   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3455   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3456   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3457   return {LBVal, UBVal};
3458 }
3459 
3460 /// Emits internal temp array declarations for the directive with inscan
3461 /// reductions.
3462 /// The code is the following:
3463 /// \code
3464 /// size num_iters = <num_iters>;
3465 /// <type> buffer[num_iters];
3466 /// \endcode
3467 static void emitScanBasedDirectiveDecls(
3468     CodeGenFunction &CGF, const OMPLoopDirective &S,
3469     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3470   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3471       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3472   SmallVector<const Expr *, 4> Shareds;
3473   SmallVector<const Expr *, 4> Privates;
3474   SmallVector<const Expr *, 4> ReductionOps;
3475   SmallVector<const Expr *, 4> CopyArrayTemps;
3476   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3477     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3478            "Only inscan reductions are expected.");
3479     Shareds.append(C->varlist_begin(), C->varlist_end());
3480     Privates.append(C->privates().begin(), C->privates().end());
3481     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3482     CopyArrayTemps.append(C->copy_array_temps().begin(),
3483                           C->copy_array_temps().end());
3484   }
3485   {
3486     // Emit buffers for each reduction variables.
3487     // ReductionCodeGen is required to emit correctly the code for array
3488     // reductions.
3489     ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3490     unsigned Count = 0;
3491     auto *ITA = CopyArrayTemps.begin();
3492     for (const Expr *IRef : Privates) {
3493       const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3494       // Emit variably modified arrays, used for arrays/array sections
3495       // reductions.
3496       if (PrivateVD->getType()->isVariablyModifiedType()) {
3497         RedCG.emitSharedOrigLValue(CGF, Count);
3498         RedCG.emitAggregateType(CGF, Count);
3499       }
3500       CodeGenFunction::OpaqueValueMapping DimMapping(
3501           CGF,
3502           cast<OpaqueValueExpr>(
3503               cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3504                   ->getSizeExpr()),
3505           RValue::get(OMPScanNumIterations));
3506       // Emit temp buffer.
3507       CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3508       ++ITA;
3509       ++Count;
3510     }
3511   }
3512 }
3513 
3514 /// Emits the code for the directive with inscan reductions.
3515 /// The code is the following:
3516 /// \code
3517 /// #pragma omp ...
3518 /// for (i: 0..<num_iters>) {
3519 ///   <input phase>;
3520 ///   buffer[i] = red;
3521 /// }
3522 /// #pragma omp master // in parallel region
3523 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3524 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3525 ///   buffer[i] op= buffer[i-pow(2,k)];
3526 /// #pragma omp barrier // in parallel region
3527 /// #pragma omp ...
3528 /// for (0..<num_iters>) {
3529 ///   red = InclusiveScan ? buffer[i] : buffer[i-1];
3530 ///   <scan phase>;
3531 /// }
3532 /// \endcode
3533 static void emitScanBasedDirective(
3534     CodeGenFunction &CGF, const OMPLoopDirective &S,
3535     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3536     llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3537     llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3538   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3539       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3540   SmallVector<const Expr *, 4> Privates;
3541   SmallVector<const Expr *, 4> ReductionOps;
3542   SmallVector<const Expr *, 4> LHSs;
3543   SmallVector<const Expr *, 4> RHSs;
3544   SmallVector<const Expr *, 4> CopyArrayElems;
3545   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3546     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3547            "Only inscan reductions are expected.");
3548     Privates.append(C->privates().begin(), C->privates().end());
3549     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3550     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3551     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3552     CopyArrayElems.append(C->copy_array_elems().begin(),
3553                           C->copy_array_elems().end());
3554   }
3555   CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3556   {
3557     // Emit loop with input phase:
3558     // #pragma omp ...
3559     // for (i: 0..<num_iters>) {
3560     //   <input phase>;
3561     //   buffer[i] = red;
3562     // }
3563     CGF.OMPFirstScanLoop = true;
3564     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3565     FirstGen(CGF);
3566   }
3567   // #pragma omp barrier // in parallel region
3568   auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3569                     &ReductionOps,
3570                     &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3571     Action.Enter(CGF);
3572     // Emit prefix reduction:
3573     // #pragma omp master // in parallel region
3574     // for (int k = 0; k <= ceil(log2(n)); ++k)
3575     llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3576     llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3577     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3578     llvm::Function *F =
3579         CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3580     llvm::Value *Arg =
3581         CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3582     llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3583     F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3584     LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3585     LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3586     llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3587         OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3588     auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3589     CGF.EmitBlock(LoopBB);
3590     auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3591     // size pow2k = 1;
3592     auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3593     Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3594     Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3595     // for (size i = n - 1; i >= 2 ^ k; --i)
3596     //   tmp[i] op= tmp[i-pow2k];
3597     llvm::BasicBlock *InnerLoopBB =
3598         CGF.createBasicBlock("omp.inner.log.scan.body");
3599     llvm::BasicBlock *InnerExitBB =
3600         CGF.createBasicBlock("omp.inner.log.scan.exit");
3601     llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3602     CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3603     CGF.EmitBlock(InnerLoopBB);
3604     auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3605     IVal->addIncoming(NMin1, LoopBB);
3606     {
3607       CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3608       auto *ILHS = LHSs.begin();
3609       auto *IRHS = RHSs.begin();
3610       for (const Expr *CopyArrayElem : CopyArrayElems) {
3611         const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3612         const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3613         Address LHSAddr = Address::invalid();
3614         {
3615           CodeGenFunction::OpaqueValueMapping IdxMapping(
3616               CGF,
3617               cast<OpaqueValueExpr>(
3618                   cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3619               RValue::get(IVal));
3620           LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3621         }
3622         PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3623         Address RHSAddr = Address::invalid();
3624         {
3625           llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3626           CodeGenFunction::OpaqueValueMapping IdxMapping(
3627               CGF,
3628               cast<OpaqueValueExpr>(
3629                   cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3630               RValue::get(OffsetIVal));
3631           RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3632         }
3633         PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3634         ++ILHS;
3635         ++IRHS;
3636       }
3637       PrivScope.Privatize();
3638       CGF.CGM.getOpenMPRuntime().emitReduction(
3639           CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3640           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3641     }
3642     llvm::Value *NextIVal =
3643         CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3644     IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3645     CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3646     CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3647     CGF.EmitBlock(InnerExitBB);
3648     llvm::Value *Next =
3649         CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3650     Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3651     // pow2k <<= 1;
3652     llvm::Value *NextPow2K =
3653         CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3654     Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3655     llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3656     CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3657     auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3658     CGF.EmitBlock(ExitBB);
3659   };
3660   if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3661     CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3662     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3663         CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3664         /*ForceSimpleCall=*/true);
3665   } else {
3666     RegionCodeGenTy RCG(CodeGen);
3667     RCG(CGF);
3668   }
3669 
3670   CGF.OMPFirstScanLoop = false;
3671   SecondGen(CGF);
3672 }
3673 
3674 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3675                                      const OMPLoopDirective &S,
3676                                      bool HasCancel) {
3677   bool HasLastprivates;
3678   if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3679                    [](const OMPReductionClause *C) {
3680                      return C->getModifier() == OMPC_REDUCTION_inscan;
3681                    })) {
3682     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3683       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3684       OMPLoopScope LoopScope(CGF, S);
3685       return CGF.EmitScalarExpr(S.getNumIterations());
3686     };
3687     const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3688       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3689           CGF, S.getDirectiveKind(), HasCancel);
3690       (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3691                                        emitForLoopBounds,
3692                                        emitDispatchForLoopBounds);
3693       // Emit an implicit barrier at the end.
3694       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3695                                                  OMPD_for);
3696     };
3697     const auto &&SecondGen = [&S, HasCancel,
3698                               &HasLastprivates](CodeGenFunction &CGF) {
3699       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3700           CGF, S.getDirectiveKind(), HasCancel);
3701       HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3702                                                    emitForLoopBounds,
3703                                                    emitDispatchForLoopBounds);
3704     };
3705     if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3706       emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3707     emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3708   } else {
3709     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3710                                                      HasCancel);
3711     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3712                                                  emitForLoopBounds,
3713                                                  emitDispatchForLoopBounds);
3714   }
3715   return HasLastprivates;
3716 }
3717 
3718 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3719   if (S.hasCancel())
3720     return false;
3721   for (OMPClause *C : S.clauses()) {
3722     if (isa<OMPNowaitClause>(C))
3723       continue;
3724 
3725     if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
3726       if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3727         return false;
3728       if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3729         return false;
3730       switch (SC->getScheduleKind()) {
3731       case OMPC_SCHEDULE_auto:
3732       case OMPC_SCHEDULE_dynamic:
3733       case OMPC_SCHEDULE_runtime:
3734       case OMPC_SCHEDULE_guided:
3735       case OMPC_SCHEDULE_static:
3736         continue;
3737       case OMPC_SCHEDULE_unknown:
3738         return false;
3739       }
3740     }
3741 
3742     return false;
3743   }
3744 
3745   return true;
3746 }
3747 
3748 static llvm::omp::ScheduleKind
3749 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3750   switch (ScheduleClauseKind) {
3751   case OMPC_SCHEDULE_unknown:
3752     return llvm::omp::OMP_SCHEDULE_Default;
3753   case OMPC_SCHEDULE_auto:
3754     return llvm::omp::OMP_SCHEDULE_Auto;
3755   case OMPC_SCHEDULE_dynamic:
3756     return llvm::omp::OMP_SCHEDULE_Dynamic;
3757   case OMPC_SCHEDULE_guided:
3758     return llvm::omp::OMP_SCHEDULE_Guided;
3759   case OMPC_SCHEDULE_runtime:
3760     return llvm::omp::OMP_SCHEDULE_Runtime;
3761   case OMPC_SCHEDULE_static:
3762     return llvm::omp::OMP_SCHEDULE_Static;
3763   }
3764   llvm_unreachable("Unhandled schedule kind");
3765 }
3766 
3767 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3768   bool HasLastprivates = false;
3769   bool UseOMPIRBuilder =
3770       CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3771   auto &&CodeGen = [this, &S, &HasLastprivates,
3772                     UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3773     // Use the OpenMPIRBuilder if enabled.
3774     if (UseOMPIRBuilder) {
3775       bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3776 
3777       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3778       llvm::Value *ChunkSize = nullptr;
3779       if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3780         SchedKind =
3781             convertClauseKindToSchedKind(SchedClause->getScheduleKind());
3782         if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3783           ChunkSize = EmitScalarExpr(ChunkSizeExpr);
3784       }
3785 
3786       // Emit the associated statement and get its loop representation.
3787       const Stmt *Inner = S.getRawStmt();
3788       llvm::CanonicalLoopInfo *CLI =
3789           EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3790 
3791       llvm::OpenMPIRBuilder &OMPBuilder =
3792           CGM.getOpenMPRuntime().getOMPBuilder();
3793       llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3794           AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3795       OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI,
3796                                     AllocaIP, NeedsBarrier, SchedKind,
3797                                     ChunkSize);
3798       return;
3799     }
3800 
3801     HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3802   };
3803   {
3804     auto LPCRegion =
3805         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3806     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3807     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3808                                                 S.hasCancel());
3809   }
3810 
3811   if (!UseOMPIRBuilder) {
3812     // Emit an implicit barrier at the end.
3813     if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3814       CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3815   }
3816   // Check for outer lastprivate conditional update.
3817   checkForLastprivateConditionalUpdate(*this, S);
3818 }
3819 
3820 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3821   bool HasLastprivates = false;
3822   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3823                                           PrePostActionTy &) {
3824     HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3825   };
3826   {
3827     auto LPCRegion =
3828         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3829     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3830     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3831   }
3832 
3833   // Emit an implicit barrier at the end.
3834   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3835     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3836   // Check for outer lastprivate conditional update.
3837   checkForLastprivateConditionalUpdate(*this, S);
3838 }
3839 
3840 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3841                                 const Twine &Name,
3842                                 llvm::Value *Init = nullptr) {
3843   LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3844   if (Init)
3845     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3846   return LVal;
3847 }
3848 
3849 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3850   const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3851   const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3852   bool HasLastprivates = false;
3853   auto &&CodeGen = [&S, CapturedStmt, CS,
3854                     &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3855     const ASTContext &C = CGF.getContext();
3856     QualType KmpInt32Ty =
3857         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3858     // Emit helper vars inits.
3859     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3860                                   CGF.Builder.getInt32(0));
3861     llvm::ConstantInt *GlobalUBVal = CS != nullptr
3862                                          ? CGF.Builder.getInt32(CS->size() - 1)
3863                                          : CGF.Builder.getInt32(0);
3864     LValue UB =
3865         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3866     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3867                                   CGF.Builder.getInt32(1));
3868     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3869                                   CGF.Builder.getInt32(0));
3870     // Loop counter.
3871     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3872     OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3873     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3874     OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3875     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3876     // Generate condition for loop.
3877     BinaryOperator *Cond = BinaryOperator::Create(
3878         C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
3879         S.getBeginLoc(), FPOptionsOverride());
3880     // Increment for loop counter.
3881     UnaryOperator *Inc = UnaryOperator::Create(
3882         C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
3883         S.getBeginLoc(), true, FPOptionsOverride());
3884     auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3885       // Iterate through all sections and emit a switch construct:
3886       // switch (IV) {
3887       //   case 0:
3888       //     <SectionStmt[0]>;
3889       //     break;
3890       // ...
3891       //   case <NumSection> - 1:
3892       //     <SectionStmt[<NumSection> - 1]>;
3893       //     break;
3894       // }
3895       // .omp.sections.exit:
3896       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3897       llvm::SwitchInst *SwitchStmt =
3898           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3899                                    ExitBB, CS == nullptr ? 1 : CS->size());
3900       if (CS) {
3901         unsigned CaseNumber = 0;
3902         for (const Stmt *SubStmt : CS->children()) {
3903           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3904           CGF.EmitBlock(CaseBB);
3905           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3906           CGF.EmitStmt(SubStmt);
3907           CGF.EmitBranch(ExitBB);
3908           ++CaseNumber;
3909         }
3910       } else {
3911         llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3912         CGF.EmitBlock(CaseBB);
3913         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3914         CGF.EmitStmt(CapturedStmt);
3915         CGF.EmitBranch(ExitBB);
3916       }
3917       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3918     };
3919 
3920     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3921     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3922       // Emit implicit barrier to synchronize threads and avoid data races on
3923       // initialization of firstprivate variables and post-update of lastprivate
3924       // variables.
3925       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3926           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3927           /*ForceSimpleCall=*/true);
3928     }
3929     CGF.EmitOMPPrivateClause(S, LoopScope);
3930     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3931     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3932     CGF.EmitOMPReductionClauseInit(S, LoopScope);
3933     (void)LoopScope.Privatize();
3934     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3935       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3936 
3937     // Emit static non-chunked loop.
3938     OpenMPScheduleTy ScheduleKind;
3939     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3940     CGOpenMPRuntime::StaticRTInput StaticInit(
3941         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3942         LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3943     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3944         CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3945     // UB = min(UB, GlobalUB);
3946     llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3947     llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3948         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3949     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3950     // IV = LB;
3951     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3952     // while (idx <= UB) { BODY; ++idx; }
3953     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3954                          [](CodeGenFunction &) {});
3955     // Tell the runtime we are done.
3956     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3957       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3958                                                      S.getDirectiveKind());
3959     };
3960     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3961     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3962     // Emit post-update of the reduction variables if IsLastIter != 0.
3963     emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3964       return CGF.Builder.CreateIsNotNull(
3965           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3966     });
3967 
3968     // Emit final copy of the lastprivate variables if IsLastIter != 0.
3969     if (HasLastprivates)
3970       CGF.EmitOMPLastprivateClauseFinal(
3971           S, /*NoFinals=*/false,
3972           CGF.Builder.CreateIsNotNull(
3973               CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3974   };
3975 
3976   bool HasCancel = false;
3977   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3978     HasCancel = OSD->hasCancel();
3979   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3980     HasCancel = OPSD->hasCancel();
3981   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3982   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3983                                               HasCancel);
3984   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3985   // clause. Otherwise the barrier will be generated by the codegen for the
3986   // directive.
3987   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
3988     // Emit implicit barrier to synchronize threads and avoid data races on
3989     // initialization of firstprivate variables.
3990     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3991                                            OMPD_unknown);
3992   }
3993 }
3994 
3995 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3996   if (CGM.getLangOpts().OpenMPIRBuilder) {
3997     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3998     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3999     using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4000 
4001     auto FiniCB = [this](InsertPointTy IP) {
4002       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4003     };
4004 
4005     const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4006     const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4007     const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4008     llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4009     if (CS) {
4010       for (const Stmt *SubStmt : CS->children()) {
4011         auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4012                                          InsertPointTy CodeGenIP,
4013                                          llvm::BasicBlock &FiniBB) {
4014           OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
4015                                                          FiniBB);
4016           OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
4017                                                  FiniBB);
4018         };
4019         SectionCBVector.push_back(SectionCB);
4020       }
4021     } else {
4022       auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4023                                             InsertPointTy CodeGenIP,
4024                                             llvm::BasicBlock &FiniBB) {
4025         OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4026         OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
4027                                                FiniBB);
4028       };
4029       SectionCBVector.push_back(SectionCB);
4030     }
4031 
4032     // Privatization callback that performs appropriate action for
4033     // shared/private/firstprivate/lastprivate/copyin/... variables.
4034     //
4035     // TODO: This defaults to shared right now.
4036     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4037                      llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4038       // The next line is appropriate only for variables (Val) with the
4039       // data-sharing attribute "shared".
4040       ReplVal = &Val;
4041 
4042       return CodeGenIP;
4043     };
4044 
4045     CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4046     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4047     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4048         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4049     Builder.restoreIP(OMPBuilder.createSections(
4050         Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
4051         S.getSingleClause<OMPNowaitClause>()));
4052     return;
4053   }
4054   {
4055     auto LPCRegion =
4056         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4057     OMPLexicalScope Scope(*this, S, OMPD_unknown);
4058     EmitSections(S);
4059   }
4060   // Emit an implicit barrier at the end.
4061   if (!S.getSingleClause<OMPNowaitClause>()) {
4062     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4063                                            OMPD_sections);
4064   }
4065   // Check for outer lastprivate conditional update.
4066   checkForLastprivateConditionalUpdate(*this, S);
4067 }
4068 
4069 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4070   if (CGM.getLangOpts().OpenMPIRBuilder) {
4071     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4072     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4073 
4074     const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4075     auto FiniCB = [this](InsertPointTy IP) {
4076       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4077     };
4078 
4079     auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4080                                                    InsertPointTy CodeGenIP,
4081                                                    llvm::BasicBlock &FiniBB) {
4082       OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4083       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
4084                                              CodeGenIP, FiniBB);
4085     };
4086 
4087     LexicalScope Scope(*this, S.getSourceRange());
4088     EmitStopPoint(&S);
4089     Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
4090 
4091     return;
4092   }
4093   LexicalScope Scope(*this, S.getSourceRange());
4094   EmitStopPoint(&S);
4095   EmitStmt(S.getAssociatedStmt());
4096 }
4097 
4098 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4099   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4100   llvm::SmallVector<const Expr *, 8> DestExprs;
4101   llvm::SmallVector<const Expr *, 8> SrcExprs;
4102   llvm::SmallVector<const Expr *, 8> AssignmentOps;
4103   // Check if there are any 'copyprivate' clauses associated with this
4104   // 'single' construct.
4105   // Build a list of copyprivate variables along with helper expressions
4106   // (<source>, <destination>, <destination>=<source> expressions)
4107   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4108     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
4109     DestExprs.append(C->destination_exprs().begin(),
4110                      C->destination_exprs().end());
4111     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4112     AssignmentOps.append(C->assignment_ops().begin(),
4113                          C->assignment_ops().end());
4114   }
4115   // Emit code for 'single' region along with 'copyprivate' clauses
4116   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4117     Action.Enter(CGF);
4118     OMPPrivateScope SingleScope(CGF);
4119     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4120     CGF.EmitOMPPrivateClause(S, SingleScope);
4121     (void)SingleScope.Privatize();
4122     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4123   };
4124   {
4125     auto LPCRegion =
4126         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4127     OMPLexicalScope Scope(*this, S, OMPD_unknown);
4128     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4129                                             CopyprivateVars, DestExprs,
4130                                             SrcExprs, AssignmentOps);
4131   }
4132   // Emit an implicit barrier at the end (to avoid data race on firstprivate
4133   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4134   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4135     CGM.getOpenMPRuntime().emitBarrierCall(
4136         *this, S.getBeginLoc(),
4137         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4138   }
4139   // Check for outer lastprivate conditional update.
4140   checkForLastprivateConditionalUpdate(*this, S);
4141 }
4142 
4143 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4144   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4145     Action.Enter(CGF);
4146     CGF.EmitStmt(S.getRawStmt());
4147   };
4148   CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4149 }
4150 
4151 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4152   if (CGM.getLangOpts().OpenMPIRBuilder) {
4153     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4154     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4155 
4156     const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4157 
4158     auto FiniCB = [this](InsertPointTy IP) {
4159       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4160     };
4161 
4162     auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4163                                                   InsertPointTy CodeGenIP,
4164                                                   llvm::BasicBlock &FiniBB) {
4165       OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4166       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
4167                                              CodeGenIP, FiniBB);
4168     };
4169 
4170     LexicalScope Scope(*this, S.getSourceRange());
4171     EmitStopPoint(&S);
4172     Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4173 
4174     return;
4175   }
4176   LexicalScope Scope(*this, S.getSourceRange());
4177   EmitStopPoint(&S);
4178   emitMaster(*this, S);
4179 }
4180 
4181 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4182   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4183     Action.Enter(CGF);
4184     CGF.EmitStmt(S.getRawStmt());
4185   };
4186   Expr *Filter = nullptr;
4187   if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4188     Filter = FilterClause->getThreadID();
4189   CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4190                                               Filter);
4191 }
4192 
4193 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4194   if (CGM.getLangOpts().OpenMPIRBuilder) {
4195     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4196     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4197 
4198     const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4199     const Expr *Filter = nullptr;
4200     if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4201       Filter = FilterClause->getThreadID();
4202     llvm::Value *FilterVal = Filter
4203                                  ? EmitScalarExpr(Filter, CGM.Int32Ty)
4204                                  : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4205 
4206     auto FiniCB = [this](InsertPointTy IP) {
4207       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4208     };
4209 
4210     auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4211                                                   InsertPointTy CodeGenIP,
4212                                                   llvm::BasicBlock &FiniBB) {
4213       OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4214       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
4215                                              CodeGenIP, FiniBB);
4216     };
4217 
4218     LexicalScope Scope(*this, S.getSourceRange());
4219     EmitStopPoint(&S);
4220     Builder.restoreIP(
4221         OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4222 
4223     return;
4224   }
4225   LexicalScope Scope(*this, S.getSourceRange());
4226   EmitStopPoint(&S);
4227   emitMasked(*this, S);
4228 }
4229 
4230 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4231   if (CGM.getLangOpts().OpenMPIRBuilder) {
4232     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4233     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4234 
4235     const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4236     const Expr *Hint = nullptr;
4237     if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4238       Hint = HintClause->getHint();
4239 
4240     // TODO: This is slightly different from what's currently being done in
4241     // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4242     // about typing is final.
4243     llvm::Value *HintInst = nullptr;
4244     if (Hint)
4245       HintInst =
4246           Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4247 
4248     auto FiniCB = [this](InsertPointTy IP) {
4249       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4250     };
4251 
4252     auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4253                                                     InsertPointTy CodeGenIP,
4254                                                     llvm::BasicBlock &FiniBB) {
4255       OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4256       OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
4257                                              CodeGenIP, FiniBB);
4258     };
4259 
4260     LexicalScope Scope(*this, S.getSourceRange());
4261     EmitStopPoint(&S);
4262     Builder.restoreIP(OMPBuilder.createCritical(
4263         Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4264         HintInst));
4265 
4266     return;
4267   }
4268 
4269   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4270     Action.Enter(CGF);
4271     CGF.EmitStmt(S.getAssociatedStmt());
4272   };
4273   const Expr *Hint = nullptr;
4274   if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4275     Hint = HintClause->getHint();
4276   LexicalScope Scope(*this, S.getSourceRange());
4277   EmitStopPoint(&S);
4278   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4279                                             S.getDirectiveName().getAsString(),
4280                                             CodeGen, S.getBeginLoc(), Hint);
4281 }
4282 
4283 void CodeGenFunction::EmitOMPParallelForDirective(
4284     const OMPParallelForDirective &S) {
4285   // Emit directive as a combined directive that consists of two implicit
4286   // directives: 'parallel' with 'for' directive.
4287   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4288     Action.Enter(CGF);
4289     (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4290   };
4291   {
4292     if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4293                      [](const OMPReductionClause *C) {
4294                        return C->getModifier() == OMPC_REDUCTION_inscan;
4295                      })) {
4296       const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4297         CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4298         CGCapturedStmtInfo CGSI(CR_OpenMP);
4299         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4300         OMPLoopScope LoopScope(CGF, S);
4301         return CGF.EmitScalarExpr(S.getNumIterations());
4302       };
4303       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4304     }
4305     auto LPCRegion =
4306         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4307     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4308                                    emitEmptyBoundParameters);
4309   }
4310   // Check for outer lastprivate conditional update.
4311   checkForLastprivateConditionalUpdate(*this, S);
4312 }
4313 
4314 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4315     const OMPParallelForSimdDirective &S) {
4316   // Emit directive as a combined directive that consists of two implicit
4317   // directives: 'parallel' with 'for' directive.
4318   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4319     Action.Enter(CGF);
4320     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4321   };
4322   {
4323     if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4324                      [](const OMPReductionClause *C) {
4325                        return C->getModifier() == OMPC_REDUCTION_inscan;
4326                      })) {
4327       const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4328         CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4329         CGCapturedStmtInfo CGSI(CR_OpenMP);
4330         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4331         OMPLoopScope LoopScope(CGF, S);
4332         return CGF.EmitScalarExpr(S.getNumIterations());
4333       };
4334       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4335     }
4336     auto LPCRegion =
4337         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4338     emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4339                                    emitEmptyBoundParameters);
4340   }
4341   // Check for outer lastprivate conditional update.
4342   checkForLastprivateConditionalUpdate(*this, S);
4343 }
4344 
4345 void CodeGenFunction::EmitOMPParallelMasterDirective(
4346     const OMPParallelMasterDirective &S) {
4347   // Emit directive as a combined directive that consists of two implicit
4348   // directives: 'parallel' with 'master' directive.
4349   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4350     Action.Enter(CGF);
4351     OMPPrivateScope PrivateScope(CGF);
4352     bool Copyins = CGF.EmitOMPCopyinClause(S);
4353     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4354     if (Copyins) {
4355       // Emit implicit barrier to synchronize threads and avoid data races on
4356       // propagation master's thread values of threadprivate variables to local
4357       // instances of that variables of all other implicit threads.
4358       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4359           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4360           /*ForceSimpleCall=*/true);
4361     }
4362     CGF.EmitOMPPrivateClause(S, PrivateScope);
4363     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4364     (void)PrivateScope.Privatize();
4365     emitMaster(CGF, S);
4366     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4367   };
4368   {
4369     auto LPCRegion =
4370         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4371     emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4372                                    emitEmptyBoundParameters);
4373     emitPostUpdateForReductionClause(*this, S,
4374                                      [](CodeGenFunction &) { return nullptr; });
4375   }
4376   // Check for outer lastprivate conditional update.
4377   checkForLastprivateConditionalUpdate(*this, S);
4378 }
4379 
4380 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4381     const OMPParallelSectionsDirective &S) {
4382   // Emit directive as a combined directive that consists of two implicit
4383   // directives: 'parallel' with 'sections' directive.
4384   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4385     Action.Enter(CGF);
4386     CGF.EmitSections(S);
4387   };
4388   {
4389     auto LPCRegion =
4390         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4391     emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4392                                    emitEmptyBoundParameters);
4393   }
4394   // Check for outer lastprivate conditional update.
4395   checkForLastprivateConditionalUpdate(*this, S);
4396 }
4397 
4398 namespace {
4399 /// Get the list of variables declared in the context of the untied tasks.
4400 class CheckVarsEscapingUntiedTaskDeclContext final
4401     : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4402   llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4403 
4404 public:
4405   explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4406   virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4407   void VisitDeclStmt(const DeclStmt *S) {
4408     if (!S)
4409       return;
4410     // Need to privatize only local vars, static locals can be processed as is.
4411     for (const Decl *D : S->decls()) {
4412       if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4413         if (VD->hasLocalStorage())
4414           PrivateDecls.push_back(VD);
4415     }
4416   }
4417   void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4418   void VisitCapturedStmt(const CapturedStmt *) {}
4419   void VisitLambdaExpr(const LambdaExpr *) {}
4420   void VisitBlockExpr(const BlockExpr *) {}
4421   void VisitStmt(const Stmt *S) {
4422     if (!S)
4423       return;
4424     for (const Stmt *Child : S->children())
4425       if (Child)
4426         Visit(Child);
4427   }
4428 
4429   /// Swaps list of vars with the provided one.
4430   ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4431 };
4432 } // anonymous namespace
4433 
4434 void CodeGenFunction::EmitOMPTaskBasedDirective(
4435     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4436     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4437     OMPTaskDataTy &Data) {
4438   // Emit outlined function for task construct.
4439   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4440   auto I = CS->getCapturedDecl()->param_begin();
4441   auto PartId = std::next(I);
4442   auto TaskT = std::next(I, 4);
4443   // Check if the task is final
4444   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4445     // If the condition constant folds and can be elided, try to avoid emitting
4446     // the condition and the dead arm of the if/else.
4447     const Expr *Cond = Clause->getCondition();
4448     bool CondConstant;
4449     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4450       Data.Final.setInt(CondConstant);
4451     else
4452       Data.Final.setPointer(EvaluateExprAsBool(Cond));
4453   } else {
4454     // By default the task is not final.
4455     Data.Final.setInt(/*IntVal=*/false);
4456   }
4457   // Check if the task has 'priority' clause.
4458   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4459     const Expr *Prio = Clause->getPriority();
4460     Data.Priority.setInt(/*IntVal=*/true);
4461     Data.Priority.setPointer(EmitScalarConversion(
4462         EmitScalarExpr(Prio), Prio->getType(),
4463         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4464         Prio->getExprLoc()));
4465   }
4466   // The first function argument for tasks is a thread id, the second one is a
4467   // part id (0 for tied tasks, >=0 for untied task).
4468   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4469   // Get list of private variables.
4470   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4471     auto IRef = C->varlist_begin();
4472     for (const Expr *IInit : C->private_copies()) {
4473       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4474       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4475         Data.PrivateVars.push_back(*IRef);
4476         Data.PrivateCopies.push_back(IInit);
4477       }
4478       ++IRef;
4479     }
4480   }
4481   EmittedAsPrivate.clear();
4482   // Get list of firstprivate variables.
4483   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4484     auto IRef = C->varlist_begin();
4485     auto IElemInitRef = C->inits().begin();
4486     for (const Expr *IInit : C->private_copies()) {
4487       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4488       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4489         Data.FirstprivateVars.push_back(*IRef);
4490         Data.FirstprivateCopies.push_back(IInit);
4491         Data.FirstprivateInits.push_back(*IElemInitRef);
4492       }
4493       ++IRef;
4494       ++IElemInitRef;
4495     }
4496   }
4497   // Get list of lastprivate variables (for taskloops).
4498   llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4499   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4500     auto IRef = C->varlist_begin();
4501     auto ID = C->destination_exprs().begin();
4502     for (const Expr *IInit : C->private_copies()) {
4503       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4504       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4505         Data.LastprivateVars.push_back(*IRef);
4506         Data.LastprivateCopies.push_back(IInit);
4507       }
4508       LastprivateDstsOrigs.insert(
4509           std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4510                          cast<DeclRefExpr>(*IRef)));
4511       ++IRef;
4512       ++ID;
4513     }
4514   }
4515   SmallVector<const Expr *, 4> LHSs;
4516   SmallVector<const Expr *, 4> RHSs;
4517   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4518     Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4519     Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4520     Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4521     Data.ReductionOps.append(C->reduction_ops().begin(),
4522                              C->reduction_ops().end());
4523     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4524     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4525   }
4526   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4527       *this, S.getBeginLoc(), LHSs, RHSs, Data);
4528   // Build list of dependences.
4529   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4530     OMPTaskDataTy::DependData &DD =
4531         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4532     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4533   }
4534   // Get list of local vars for untied tasks.
4535   if (!Data.Tied) {
4536     CheckVarsEscapingUntiedTaskDeclContext Checker;
4537     Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4538     Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4539                               Checker.getPrivateDecls().end());
4540   }
4541   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4542                     CapturedRegion](CodeGenFunction &CGF,
4543                                     PrePostActionTy &Action) {
4544     llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4545                     std::pair<Address, Address>>
4546         UntiedLocalVars;
4547     // Set proper addresses for generated private copies.
4548     OMPPrivateScope Scope(CGF);
4549     // Generate debug info for variables present in shared clause.
4550     if (auto *DI = CGF.getDebugInfo()) {
4551       llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4552           CGF.CapturedStmtInfo->getCaptureFields();
4553       llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4554       if (CaptureFields.size() && ContextValue) {
4555         unsigned CharWidth = CGF.getContext().getCharWidth();
4556         // The shared variables are packed together as members of structure.
4557         // So the address of each shared variable can be computed by adding
4558         // offset of it (within record) to the base address of record. For each
4559         // shared variable, debug intrinsic llvm.dbg.declare is generated with
4560         // appropriate expressions (DIExpression).
4561         // Ex:
4562         //  %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4563         //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4564         //            metadata !svar1,
4565         //            metadata !DIExpression(DW_OP_deref))
4566         //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4567         //            metadata !svar2,
4568         //            metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4569         for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4570           const VarDecl *SharedVar = It->first;
4571           RecordDecl *CaptureRecord = It->second->getParent();
4572           const ASTRecordLayout &Layout =
4573               CGF.getContext().getASTRecordLayout(CaptureRecord);
4574           unsigned Offset =
4575               Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
4576           if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4577             (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
4578                                                 CGF.Builder, false);
4579           llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4580           // Get the call dbg.declare instruction we just created and update
4581           // its DIExpression to add offset to base address.
4582           if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) {
4583             SmallVector<uint64_t, 8> Ops;
4584             // Add offset to the base address if non zero.
4585             if (Offset) {
4586               Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
4587               Ops.push_back(Offset);
4588             }
4589             Ops.push_back(llvm::dwarf::DW_OP_deref);
4590             auto &Ctx = DDI->getContext();
4591             llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops);
4592             Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr));
4593           }
4594         }
4595       }
4596     }
4597     llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4598     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4599         !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4600       enum { PrivatesParam = 2, CopyFnParam = 3 };
4601       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4602           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4603       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4604           CS->getCapturedDecl()->getParam(PrivatesParam)));
4605       // Map privates.
4606       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4607       llvm::SmallVector<llvm::Value *, 16> CallArgs;
4608       llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4609       CallArgs.push_back(PrivatesPtr);
4610       ParamTypes.push_back(PrivatesPtr->getType());
4611       for (const Expr *E : Data.PrivateVars) {
4612         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4613         Address PrivatePtr = CGF.CreateMemTemp(
4614             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4615         PrivatePtrs.emplace_back(VD, PrivatePtr);
4616         CallArgs.push_back(PrivatePtr.getPointer());
4617         ParamTypes.push_back(PrivatePtr.getType());
4618       }
4619       for (const Expr *E : Data.FirstprivateVars) {
4620         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4621         Address PrivatePtr =
4622             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4623                               ".firstpriv.ptr.addr");
4624         PrivatePtrs.emplace_back(VD, PrivatePtr);
4625         FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4626         CallArgs.push_back(PrivatePtr.getPointer());
4627         ParamTypes.push_back(PrivatePtr.getType());
4628       }
4629       for (const Expr *E : Data.LastprivateVars) {
4630         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4631         Address PrivatePtr =
4632             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4633                               ".lastpriv.ptr.addr");
4634         PrivatePtrs.emplace_back(VD, PrivatePtr);
4635         CallArgs.push_back(PrivatePtr.getPointer());
4636         ParamTypes.push_back(PrivatePtr.getType());
4637       }
4638       for (const VarDecl *VD : Data.PrivateLocals) {
4639         QualType Ty = VD->getType().getNonReferenceType();
4640         if (VD->getType()->isLValueReferenceType())
4641           Ty = CGF.getContext().getPointerType(Ty);
4642         if (isAllocatableDecl(VD))
4643           Ty = CGF.getContext().getPointerType(Ty);
4644         Address PrivatePtr = CGF.CreateMemTemp(
4645             CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4646         auto Result = UntiedLocalVars.insert(
4647             std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4648         // If key exists update in place.
4649         if (Result.second == false)
4650           *Result.first = std::make_pair(
4651               VD, std::make_pair(PrivatePtr, Address::invalid()));
4652         CallArgs.push_back(PrivatePtr.getPointer());
4653         ParamTypes.push_back(PrivatePtr.getType());
4654       }
4655       auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4656                                                ParamTypes, /*isVarArg=*/false);
4657       CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4658           CopyFn, CopyFnTy->getPointerTo());
4659       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4660           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4661       for (const auto &Pair : LastprivateDstsOrigs) {
4662         const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4663         DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4664                         /*RefersToEnclosingVariableOrCapture=*/
4665                         CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4666                         Pair.second->getType(), VK_LValue,
4667                         Pair.second->getExprLoc());
4668         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4669           return CGF.EmitLValue(&DRE).getAddress(CGF);
4670         });
4671       }
4672       for (const auto &Pair : PrivatePtrs) {
4673         Address Replacement =
4674             Address::deprecated(CGF.Builder.CreateLoad(Pair.second),
4675                                 CGF.getContext().getDeclAlign(Pair.first));
4676         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4677         if (auto *DI = CGF.getDebugInfo())
4678           if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4679             (void)DI->EmitDeclareOfAutoVariable(
4680                 Pair.first, Pair.second.getPointer(), CGF.Builder,
4681                 /*UsePointerValue*/ true);
4682       }
4683       // Adjust mapping for internal locals by mapping actual memory instead of
4684       // a pointer to this memory.
4685       for (auto &Pair : UntiedLocalVars) {
4686         if (isAllocatableDecl(Pair.first)) {
4687           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4688           Address Replacement = Address::deprecated(Ptr, CGF.getPointerAlign());
4689           Pair.second.first = Replacement;
4690           Ptr = CGF.Builder.CreateLoad(Replacement);
4691           Replacement = Address::deprecated(
4692               Ptr, CGF.getContext().getDeclAlign(Pair.first));
4693           Pair.second.second = Replacement;
4694         } else {
4695           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4696           Address Replacement = Address::deprecated(
4697               Ptr, CGF.getContext().getDeclAlign(Pair.first));
4698           Pair.second.first = Replacement;
4699         }
4700       }
4701     }
4702     if (Data.Reductions) {
4703       OMPPrivateScope FirstprivateScope(CGF);
4704       for (const auto &Pair : FirstprivatePtrs) {
4705         Address Replacement =
4706             Address::deprecated(CGF.Builder.CreateLoad(Pair.second),
4707                                 CGF.getContext().getDeclAlign(Pair.first));
4708         FirstprivateScope.addPrivate(Pair.first,
4709                                      [Replacement]() { return Replacement; });
4710       }
4711       (void)FirstprivateScope.Privatize();
4712       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4713       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4714                              Data.ReductionCopies, Data.ReductionOps);
4715       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4716           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4717       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4718         RedCG.emitSharedOrigLValue(CGF, Cnt);
4719         RedCG.emitAggregateType(CGF, Cnt);
4720         // FIXME: This must removed once the runtime library is fixed.
4721         // Emit required threadprivate variables for
4722         // initializer/combiner/finalizer.
4723         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4724                                                            RedCG, Cnt);
4725         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4726             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4727         Replacement = Address::deprecated(
4728             CGF.EmitScalarConversion(Replacement.getPointer(),
4729                                      CGF.getContext().VoidPtrTy,
4730                                      CGF.getContext().getPointerType(
4731                                          Data.ReductionCopies[Cnt]->getType()),
4732                                      Data.ReductionCopies[Cnt]->getExprLoc()),
4733             Replacement.getAlignment());
4734         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4735         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4736                          [Replacement]() { return Replacement; });
4737       }
4738     }
4739     // Privatize all private variables except for in_reduction items.
4740     (void)Scope.Privatize();
4741     SmallVector<const Expr *, 4> InRedVars;
4742     SmallVector<const Expr *, 4> InRedPrivs;
4743     SmallVector<const Expr *, 4> InRedOps;
4744     SmallVector<const Expr *, 4> TaskgroupDescriptors;
4745     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4746       auto IPriv = C->privates().begin();
4747       auto IRed = C->reduction_ops().begin();
4748       auto ITD = C->taskgroup_descriptors().begin();
4749       for (const Expr *Ref : C->varlists()) {
4750         InRedVars.emplace_back(Ref);
4751         InRedPrivs.emplace_back(*IPriv);
4752         InRedOps.emplace_back(*IRed);
4753         TaskgroupDescriptors.emplace_back(*ITD);
4754         std::advance(IPriv, 1);
4755         std::advance(IRed, 1);
4756         std::advance(ITD, 1);
4757       }
4758     }
4759     // Privatize in_reduction items here, because taskgroup descriptors must be
4760     // privatized earlier.
4761     OMPPrivateScope InRedScope(CGF);
4762     if (!InRedVars.empty()) {
4763       ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4764       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4765         RedCG.emitSharedOrigLValue(CGF, Cnt);
4766         RedCG.emitAggregateType(CGF, Cnt);
4767         // The taskgroup descriptor variable is always implicit firstprivate and
4768         // privatized already during processing of the firstprivates.
4769         // FIXME: This must removed once the runtime library is fixed.
4770         // Emit required threadprivate variables for
4771         // initializer/combiner/finalizer.
4772         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4773                                                            RedCG, Cnt);
4774         llvm::Value *ReductionsPtr;
4775         if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4776           ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4777                                                TRExpr->getExprLoc());
4778         } else {
4779           ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4780         }
4781         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4782             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4783         Replacement = Address::deprecated(
4784             CGF.EmitScalarConversion(
4785                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4786                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4787                 InRedPrivs[Cnt]->getExprLoc()),
4788             Replacement.getAlignment());
4789         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4790         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4791                               [Replacement]() { return Replacement; });
4792       }
4793     }
4794     (void)InRedScope.Privatize();
4795 
4796     CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4797                                                              UntiedLocalVars);
4798     Action.Enter(CGF);
4799     BodyGen(CGF);
4800   };
4801   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4802       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4803       Data.NumberOfParts);
4804   OMPLexicalScope Scope(*this, S, llvm::None,
4805                         !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4806                             !isOpenMPSimdDirective(S.getDirectiveKind()));
4807   TaskGen(*this, OutlinedFn, Data);
4808 }
4809 
4810 static ImplicitParamDecl *
4811 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4812                                   QualType Ty, CapturedDecl *CD,
4813                                   SourceLocation Loc) {
4814   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4815                                            ImplicitParamDecl::Other);
4816   auto *OrigRef = DeclRefExpr::Create(
4817       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4818       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4819   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4820                                               ImplicitParamDecl::Other);
4821   auto *PrivateRef = DeclRefExpr::Create(
4822       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4823       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4824   QualType ElemType = C.getBaseElementType(Ty);
4825   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4826                                            ImplicitParamDecl::Other);
4827   auto *InitRef = DeclRefExpr::Create(
4828       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4829       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4830   PrivateVD->setInitStyle(VarDecl::CInit);
4831   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4832                                               InitRef, /*BasePath=*/nullptr,
4833                                               VK_PRValue, FPOptionsOverride()));
4834   Data.FirstprivateVars.emplace_back(OrigRef);
4835   Data.FirstprivateCopies.emplace_back(PrivateRef);
4836   Data.FirstprivateInits.emplace_back(InitRef);
4837   return OrigVD;
4838 }
4839 
4840 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4841     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4842     OMPTargetDataInfo &InputInfo) {
4843   // Emit outlined function for task construct.
4844   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4845   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4846   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4847   auto I = CS->getCapturedDecl()->param_begin();
4848   auto PartId = std::next(I);
4849   auto TaskT = std::next(I, 4);
4850   OMPTaskDataTy Data;
4851   // The task is not final.
4852   Data.Final.setInt(/*IntVal=*/false);
4853   // Get list of firstprivate variables.
4854   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4855     auto IRef = C->varlist_begin();
4856     auto IElemInitRef = C->inits().begin();
4857     for (auto *IInit : C->private_copies()) {
4858       Data.FirstprivateVars.push_back(*IRef);
4859       Data.FirstprivateCopies.push_back(IInit);
4860       Data.FirstprivateInits.push_back(*IElemInitRef);
4861       ++IRef;
4862       ++IElemInitRef;
4863     }
4864   }
4865   OMPPrivateScope TargetScope(*this);
4866   VarDecl *BPVD = nullptr;
4867   VarDecl *PVD = nullptr;
4868   VarDecl *SVD = nullptr;
4869   VarDecl *MVD = nullptr;
4870   if (InputInfo.NumberOfTargetItems > 0) {
4871     auto *CD = CapturedDecl::Create(
4872         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4873     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4874     QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4875         getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4876         /*IndexTypeQuals=*/0);
4877     BPVD = createImplicitFirstprivateForType(
4878         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4879     PVD = createImplicitFirstprivateForType(
4880         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4881     QualType SizesType = getContext().getConstantArrayType(
4882         getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4883         ArrSize, nullptr, ArrayType::Normal,
4884         /*IndexTypeQuals=*/0);
4885     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4886                                             S.getBeginLoc());
4887     TargetScope.addPrivate(
4888         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4889     TargetScope.addPrivate(PVD,
4890                            [&InputInfo]() { return InputInfo.PointersArray; });
4891     TargetScope.addPrivate(SVD,
4892                            [&InputInfo]() { return InputInfo.SizesArray; });
4893     // If there is no user-defined mapper, the mapper array will be nullptr. In
4894     // this case, we don't need to privatize it.
4895     if (!isa_and_nonnull<llvm::ConstantPointerNull>(
4896             InputInfo.MappersArray.getPointer())) {
4897       MVD = createImplicitFirstprivateForType(
4898           getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4899       TargetScope.addPrivate(MVD,
4900                              [&InputInfo]() { return InputInfo.MappersArray; });
4901     }
4902   }
4903   (void)TargetScope.Privatize();
4904   // Build list of dependences.
4905   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4906     OMPTaskDataTy::DependData &DD =
4907         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4908     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4909   }
4910   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4911                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4912     // Set proper addresses for generated private copies.
4913     OMPPrivateScope Scope(CGF);
4914     if (!Data.FirstprivateVars.empty()) {
4915       enum { PrivatesParam = 2, CopyFnParam = 3 };
4916       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4917           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4918       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4919           CS->getCapturedDecl()->getParam(PrivatesParam)));
4920       // Map privates.
4921       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4922       llvm::SmallVector<llvm::Value *, 16> CallArgs;
4923       llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4924       CallArgs.push_back(PrivatesPtr);
4925       ParamTypes.push_back(PrivatesPtr->getType());
4926       for (const Expr *E : Data.FirstprivateVars) {
4927         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4928         Address PrivatePtr =
4929             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4930                               ".firstpriv.ptr.addr");
4931         PrivatePtrs.emplace_back(VD, PrivatePtr);
4932         CallArgs.push_back(PrivatePtr.getPointer());
4933         ParamTypes.push_back(PrivatePtr.getType());
4934       }
4935       auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4936                                                ParamTypes, /*isVarArg=*/false);
4937       CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4938           CopyFn, CopyFnTy->getPointerTo());
4939       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4940           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4941       for (const auto &Pair : PrivatePtrs) {
4942         Address Replacement =
4943             Address::deprecated(CGF.Builder.CreateLoad(Pair.second),
4944                                 CGF.getContext().getDeclAlign(Pair.first));
4945         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4946       }
4947     }
4948     // Privatize all private variables except for in_reduction items.
4949     (void)Scope.Privatize();
4950     if (InputInfo.NumberOfTargetItems > 0) {
4951       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4952           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4953       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4954           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4955       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4956           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4957       // If MVD is nullptr, the mapper array is not privatized
4958       if (MVD)
4959         InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4960             CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4961     }
4962 
4963     Action.Enter(CGF);
4964     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4965     BodyGen(CGF);
4966   };
4967   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4968       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4969       Data.NumberOfParts);
4970   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
4971   IntegerLiteral IfCond(getContext(), TrueOrFalse,
4972                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4973                         SourceLocation());
4974 
4975   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4976                                       SharedsTy, CapturedStruct, &IfCond, Data);
4977 }
4978 
4979 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
4980   // Emit outlined function for task construct.
4981   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4982   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4983   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4984   const Expr *IfCond = nullptr;
4985   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4986     if (C->getNameModifier() == OMPD_unknown ||
4987         C->getNameModifier() == OMPD_task) {
4988       IfCond = C->getCondition();
4989       break;
4990     }
4991   }
4992 
4993   OMPTaskDataTy Data;
4994   // Check if we should emit tied or untied task.
4995   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4996   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4997     CGF.EmitStmt(CS->getCapturedStmt());
4998   };
4999   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5000                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5001                             const OMPTaskDataTy &Data) {
5002     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
5003                                             SharedsTy, CapturedStruct, IfCond,
5004                                             Data);
5005   };
5006   auto LPCRegion =
5007       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
5008   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
5009 }
5010 
5011 void CodeGenFunction::EmitOMPTaskyieldDirective(
5012     const OMPTaskyieldDirective &S) {
5013   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
5014 }
5015 
5016 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5017   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
5018 }
5019 
5020 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5021   OMPTaskDataTy Data;
5022   // Build list of dependences
5023   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
5024     OMPTaskDataTy::DependData &DD =
5025         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
5026     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
5027   }
5028   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
5029 }
5030 
5031 void CodeGenFunction::EmitOMPTaskgroupDirective(
5032     const OMPTaskgroupDirective &S) {
5033   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5034     Action.Enter(CGF);
5035     if (const Expr *E = S.getReductionRef()) {
5036       SmallVector<const Expr *, 4> LHSs;
5037       SmallVector<const Expr *, 4> RHSs;
5038       OMPTaskDataTy Data;
5039       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5040         Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5041         Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5042         Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5043         Data.ReductionOps.append(C->reduction_ops().begin(),
5044                                  C->reduction_ops().end());
5045         LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5046         RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5047       }
5048       llvm::Value *ReductionDesc =
5049           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
5050                                                            LHSs, RHSs, Data);
5051       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5052       CGF.EmitVarDecl(*VD);
5053       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
5054                             /*Volatile=*/false, E->getType());
5055     }
5056     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5057   };
5058   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5059   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
5060 }
5061 
5062 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5063   llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5064                                 ? llvm::AtomicOrdering::NotAtomic
5065                                 : llvm::AtomicOrdering::AcquireRelease;
5066   CGM.getOpenMPRuntime().emitFlush(
5067       *this,
5068       [&S]() -> ArrayRef<const Expr *> {
5069         if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5070           return llvm::makeArrayRef(FlushClause->varlist_begin(),
5071                                     FlushClause->varlist_end());
5072         return llvm::None;
5073       }(),
5074       S.getBeginLoc(), AO);
5075 }
5076 
5077 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5078   const auto *DO = S.getSingleClause<OMPDepobjClause>();
5079   LValue DOLVal = EmitLValue(DO->getDepobj());
5080   if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5081     OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5082                                            DC->getModifier());
5083     Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
5084     Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5085         *this, Dependencies, DC->getBeginLoc());
5086     EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
5087     return;
5088   }
5089   if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5090     CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
5091     return;
5092   }
5093   if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5094     CGM.getOpenMPRuntime().emitUpdateClause(
5095         *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
5096     return;
5097   }
5098 }
5099 
5100 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5101   if (!OMPParentLoopDirectiveForScan)
5102     return;
5103   const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5104   bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5105   SmallVector<const Expr *, 4> Shareds;
5106   SmallVector<const Expr *, 4> Privates;
5107   SmallVector<const Expr *, 4> LHSs;
5108   SmallVector<const Expr *, 4> RHSs;
5109   SmallVector<const Expr *, 4> ReductionOps;
5110   SmallVector<const Expr *, 4> CopyOps;
5111   SmallVector<const Expr *, 4> CopyArrayTemps;
5112   SmallVector<const Expr *, 4> CopyArrayElems;
5113   for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5114     if (C->getModifier() != OMPC_REDUCTION_inscan)
5115       continue;
5116     Shareds.append(C->varlist_begin(), C->varlist_end());
5117     Privates.append(C->privates().begin(), C->privates().end());
5118     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5119     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5120     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
5121     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
5122     CopyArrayTemps.append(C->copy_array_temps().begin(),
5123                           C->copy_array_temps().end());
5124     CopyArrayElems.append(C->copy_array_elems().begin(),
5125                           C->copy_array_elems().end());
5126   }
5127   if (ParentDir.getDirectiveKind() == OMPD_simd ||
5128       (getLangOpts().OpenMPSimd &&
5129        isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
5130     // For simd directive and simd-based directives in simd only mode, use the
5131     // following codegen:
5132     // int x = 0;
5133     // #pragma omp simd reduction(inscan, +: x)
5134     // for (..) {
5135     //   <first part>
5136     //   #pragma omp scan inclusive(x)
5137     //   <second part>
5138     //  }
5139     // is transformed to:
5140     // int x = 0;
5141     // for (..) {
5142     //   int x_priv = 0;
5143     //   <first part>
5144     //   x = x_priv + x;
5145     //   x_priv = x;
5146     //   <second part>
5147     // }
5148     // and
5149     // int x = 0;
5150     // #pragma omp simd reduction(inscan, +: x)
5151     // for (..) {
5152     //   <first part>
5153     //   #pragma omp scan exclusive(x)
5154     //   <second part>
5155     // }
5156     // to
5157     // int x = 0;
5158     // for (..) {
5159     //   int x_priv = 0;
5160     //   <second part>
5161     //   int temp = x;
5162     //   x = x_priv + x;
5163     //   x_priv = temp;
5164     //   <first part>
5165     // }
5166     llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
5167     EmitBranch(IsInclusive
5168                    ? OMPScanReduce
5169                    : BreakContinueStack.back().ContinueBlock.getBlock());
5170     EmitBlock(OMPScanDispatch);
5171     {
5172       // New scope for correct construction/destruction of temp variables for
5173       // exclusive scan.
5174       LexicalScope Scope(*this, S.getSourceRange());
5175       EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5176       EmitBlock(OMPScanReduce);
5177       if (!IsInclusive) {
5178         // Create temp var and copy LHS value to this temp value.
5179         // TMP = LHS;
5180         for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5181           const Expr *PrivateExpr = Privates[I];
5182           const Expr *TempExpr = CopyArrayTemps[I];
5183           EmitAutoVarDecl(
5184               *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
5185           LValue DestLVal = EmitLValue(TempExpr);
5186           LValue SrcLVal = EmitLValue(LHSs[I]);
5187           EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5188                       SrcLVal.getAddress(*this),
5189                       cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5190                       cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5191                       CopyOps[I]);
5192         }
5193       }
5194       CGM.getOpenMPRuntime().emitReduction(
5195           *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5196           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5197       for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5198         const Expr *PrivateExpr = Privates[I];
5199         LValue DestLVal;
5200         LValue SrcLVal;
5201         if (IsInclusive) {
5202           DestLVal = EmitLValue(RHSs[I]);
5203           SrcLVal = EmitLValue(LHSs[I]);
5204         } else {
5205           const Expr *TempExpr = CopyArrayTemps[I];
5206           DestLVal = EmitLValue(RHSs[I]);
5207           SrcLVal = EmitLValue(TempExpr);
5208         }
5209         EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5210                     SrcLVal.getAddress(*this),
5211                     cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5212                     cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5213                     CopyOps[I]);
5214       }
5215     }
5216     EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5217     OMPScanExitBlock = IsInclusive
5218                            ? BreakContinueStack.back().ContinueBlock.getBlock()
5219                            : OMPScanReduce;
5220     EmitBlock(OMPAfterScanBlock);
5221     return;
5222   }
5223   if (!IsInclusive) {
5224     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5225     EmitBlock(OMPScanExitBlock);
5226   }
5227   if (OMPFirstScanLoop) {
5228     // Emit buffer[i] = red; at the end of the input phase.
5229     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5230                              .getIterationVariable()
5231                              ->IgnoreParenImpCasts();
5232     LValue IdxLVal = EmitLValue(IVExpr);
5233     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5234     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5235     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5236       const Expr *PrivateExpr = Privates[I];
5237       const Expr *OrigExpr = Shareds[I];
5238       const Expr *CopyArrayElem = CopyArrayElems[I];
5239       OpaqueValueMapping IdxMapping(
5240           *this,
5241           cast<OpaqueValueExpr>(
5242               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5243           RValue::get(IdxVal));
5244       LValue DestLVal = EmitLValue(CopyArrayElem);
5245       LValue SrcLVal = EmitLValue(OrigExpr);
5246       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5247                   SrcLVal.getAddress(*this),
5248                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5249                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5250                   CopyOps[I]);
5251     }
5252   }
5253   EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5254   if (IsInclusive) {
5255     EmitBlock(OMPScanExitBlock);
5256     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5257   }
5258   EmitBlock(OMPScanDispatch);
5259   if (!OMPFirstScanLoop) {
5260     // Emit red = buffer[i]; at the entrance to the scan phase.
5261     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5262                              .getIterationVariable()
5263                              ->IgnoreParenImpCasts();
5264     LValue IdxLVal = EmitLValue(IVExpr);
5265     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5266     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5267     llvm::BasicBlock *ExclusiveExitBB = nullptr;
5268     if (!IsInclusive) {
5269       llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5270       ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5271       llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5272       Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5273       EmitBlock(ContBB);
5274       // Use idx - 1 iteration for exclusive scan.
5275       IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5276     }
5277     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5278       const Expr *PrivateExpr = Privates[I];
5279       const Expr *OrigExpr = Shareds[I];
5280       const Expr *CopyArrayElem = CopyArrayElems[I];
5281       OpaqueValueMapping IdxMapping(
5282           *this,
5283           cast<OpaqueValueExpr>(
5284               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5285           RValue::get(IdxVal));
5286       LValue SrcLVal = EmitLValue(CopyArrayElem);
5287       LValue DestLVal = EmitLValue(OrigExpr);
5288       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5289                   SrcLVal.getAddress(*this),
5290                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5291                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5292                   CopyOps[I]);
5293     }
5294     if (!IsInclusive) {
5295       EmitBlock(ExclusiveExitBB);
5296     }
5297   }
5298   EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5299                                                : OMPAfterScanBlock);
5300   EmitBlock(OMPAfterScanBlock);
5301 }
5302 
5303 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5304                                             const CodeGenLoopTy &CodeGenLoop,
5305                                             Expr *IncExpr) {
5306   // Emit the loop iteration variable.
5307   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5308   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5309   EmitVarDecl(*IVDecl);
5310 
5311   // Emit the iterations count variable.
5312   // If it is not a variable, Sema decided to calculate iterations count on each
5313   // iteration (e.g., it is foldable into a constant).
5314   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5315     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5316     // Emit calculation of the iterations count.
5317     EmitIgnoredExpr(S.getCalcLastIteration());
5318   }
5319 
5320   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5321 
5322   bool HasLastprivateClause = false;
5323   // Check pre-condition.
5324   {
5325     OMPLoopScope PreInitScope(*this, S);
5326     // Skip the entire loop if we don't meet the precondition.
5327     // If the condition constant folds and can be elided, avoid emitting the
5328     // whole loop.
5329     bool CondConstant;
5330     llvm::BasicBlock *ContBlock = nullptr;
5331     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5332       if (!CondConstant)
5333         return;
5334     } else {
5335       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5336       ContBlock = createBasicBlock("omp.precond.end");
5337       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5338                   getProfileCount(&S));
5339       EmitBlock(ThenBlock);
5340       incrementProfileCounter(&S);
5341     }
5342 
5343     emitAlignedClause(*this, S);
5344     // Emit 'then' code.
5345     {
5346       // Emit helper vars inits.
5347 
5348       LValue LB = EmitOMPHelperVar(
5349           *this, cast<DeclRefExpr>(
5350                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5351                           ? S.getCombinedLowerBoundVariable()
5352                           : S.getLowerBoundVariable())));
5353       LValue UB = EmitOMPHelperVar(
5354           *this, cast<DeclRefExpr>(
5355                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5356                           ? S.getCombinedUpperBoundVariable()
5357                           : S.getUpperBoundVariable())));
5358       LValue ST =
5359           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5360       LValue IL =
5361           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5362 
5363       OMPPrivateScope LoopScope(*this);
5364       if (EmitOMPFirstprivateClause(S, LoopScope)) {
5365         // Emit implicit barrier to synchronize threads and avoid data races
5366         // on initialization of firstprivate variables and post-update of
5367         // lastprivate variables.
5368         CGM.getOpenMPRuntime().emitBarrierCall(
5369             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5370             /*ForceSimpleCall=*/true);
5371       }
5372       EmitOMPPrivateClause(S, LoopScope);
5373       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5374           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5375           !isOpenMPTeamsDirective(S.getDirectiveKind()))
5376         EmitOMPReductionClauseInit(S, LoopScope);
5377       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5378       EmitOMPPrivateLoopCounters(S, LoopScope);
5379       (void)LoopScope.Privatize();
5380       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5381         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5382 
5383       // Detect the distribute schedule kind and chunk.
5384       llvm::Value *Chunk = nullptr;
5385       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5386       if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5387         ScheduleKind = C->getDistScheduleKind();
5388         if (const Expr *Ch = C->getChunkSize()) {
5389           Chunk = EmitScalarExpr(Ch);
5390           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5391                                        S.getIterationVariable()->getType(),
5392                                        S.getBeginLoc());
5393         }
5394       } else {
5395         // Default behaviour for dist_schedule clause.
5396         CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5397             *this, S, ScheduleKind, Chunk);
5398       }
5399       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5400       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5401 
5402       // OpenMP [2.10.8, distribute Construct, Description]
5403       // If dist_schedule is specified, kind must be static. If specified,
5404       // iterations are divided into chunks of size chunk_size, chunks are
5405       // assigned to the teams of the league in a round-robin fashion in the
5406       // order of the team number. When no chunk_size is specified, the
5407       // iteration space is divided into chunks that are approximately equal
5408       // in size, and at most one chunk is distributed to each team of the
5409       // league. The size of the chunks is unspecified in this case.
5410       bool StaticChunked =
5411           RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5412           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5413       if (RT.isStaticNonchunked(ScheduleKind,
5414                                 /* Chunked */ Chunk != nullptr) ||
5415           StaticChunked) {
5416         CGOpenMPRuntime::StaticRTInput StaticInit(
5417             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
5418             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5419             StaticChunked ? Chunk : nullptr);
5420         RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5421                                     StaticInit);
5422         JumpDest LoopExit =
5423             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5424         // UB = min(UB, GlobalUB);
5425         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5426                             ? S.getCombinedEnsureUpperBound()
5427                             : S.getEnsureUpperBound());
5428         // IV = LB;
5429         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5430                             ? S.getCombinedInit()
5431                             : S.getInit());
5432 
5433         const Expr *Cond =
5434             isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5435                 ? S.getCombinedCond()
5436                 : S.getCond();
5437 
5438         if (StaticChunked)
5439           Cond = S.getCombinedDistCond();
5440 
5441         // For static unchunked schedules generate:
5442         //
5443         //  1. For distribute alone, codegen
5444         //    while (idx <= UB) {
5445         //      BODY;
5446         //      ++idx;
5447         //    }
5448         //
5449         //  2. When combined with 'for' (e.g. as in 'distribute parallel for')
5450         //    while (idx <= UB) {
5451         //      <CodeGen rest of pragma>(LB, UB);
5452         //      idx += ST;
5453         //    }
5454         //
5455         // For static chunk one schedule generate:
5456         //
5457         // while (IV <= GlobalUB) {
5458         //   <CodeGen rest of pragma>(LB, UB);
5459         //   LB += ST;
5460         //   UB += ST;
5461         //   UB = min(UB, GlobalUB);
5462         //   IV = LB;
5463         // }
5464         //
5465         emitCommonSimdLoop(
5466             *this, S,
5467             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5468               if (isOpenMPSimdDirective(S.getDirectiveKind()))
5469                 CGF.EmitOMPSimdInit(S);
5470             },
5471             [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5472              StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5473               CGF.EmitOMPInnerLoop(
5474                   S, LoopScope.requiresCleanups(), Cond, IncExpr,
5475                   [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5476                     CodeGenLoop(CGF, S, LoopExit);
5477                   },
5478                   [&S, StaticChunked](CodeGenFunction &CGF) {
5479                     if (StaticChunked) {
5480                       CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5481                       CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5482                       CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5483                       CGF.EmitIgnoredExpr(S.getCombinedInit());
5484                     }
5485                   });
5486             });
5487         EmitBlock(LoopExit.getBlock());
5488         // Tell the runtime we are done.
5489         RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5490       } else {
5491         // Emit the outer loop, which requests its work chunk [LB..UB] from
5492         // runtime and runs the inner loop to process it.
5493         const OMPLoopArguments LoopArguments = {
5494             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5495             IL.getAddress(*this), Chunk};
5496         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5497                                    CodeGenLoop);
5498       }
5499       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5500         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5501           return CGF.Builder.CreateIsNotNull(
5502               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5503         });
5504       }
5505       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5506           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5507           !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5508         EmitOMPReductionClauseFinal(S, OMPD_simd);
5509         // Emit post-update of the reduction variables if IsLastIter != 0.
5510         emitPostUpdateForReductionClause(
5511             *this, S, [IL, &S](CodeGenFunction &CGF) {
5512               return CGF.Builder.CreateIsNotNull(
5513                   CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5514             });
5515       }
5516       // Emit final copy of the lastprivate variables if IsLastIter != 0.
5517       if (HasLastprivateClause) {
5518         EmitOMPLastprivateClauseFinal(
5519             S, /*NoFinals=*/false,
5520             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5521       }
5522     }
5523 
5524     // We're now done with the loop, so jump to the continuation block.
5525     if (ContBlock) {
5526       EmitBranch(ContBlock);
5527       EmitBlock(ContBlock, true);
5528     }
5529   }
5530 }
5531 
5532 void CodeGenFunction::EmitOMPDistributeDirective(
5533     const OMPDistributeDirective &S) {
5534   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5535     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5536   };
5537   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5538   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5539 }
5540 
5541 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5542                                                    const CapturedStmt *S,
5543                                                    SourceLocation Loc) {
5544   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5545   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5546   CGF.CapturedStmtInfo = &CapStmtInfo;
5547   llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5548   Fn->setDoesNotRecurse();
5549   return Fn;
5550 }
5551 
5552 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5553   if (CGM.getLangOpts().OpenMPIRBuilder) {
5554     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5555     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5556 
5557     if (S.hasClausesOfKind<OMPDependClause>()) {
5558       // The ordered directive with depend clause.
5559       assert(!S.hasAssociatedStmt() &&
5560              "No associated statement must be in ordered depend construct.");
5561       InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5562                              AllocaInsertPt->getIterator());
5563       for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) {
5564         unsigned NumLoops = DC->getNumLoops();
5565         QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(
5566             /*DestWidth=*/64, /*Signed=*/1);
5567         llvm::SmallVector<llvm::Value *> StoreValues;
5568         for (unsigned I = 0; I < NumLoops; I++) {
5569           const Expr *CounterVal = DC->getLoopData(I);
5570           assert(CounterVal);
5571           llvm::Value *StoreValue = EmitScalarConversion(
5572               EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
5573               CounterVal->getExprLoc());
5574           StoreValues.emplace_back(StoreValue);
5575         }
5576         bool IsDependSource = false;
5577         if (DC->getDependencyKind() == OMPC_DEPEND_source)
5578           IsDependSource = true;
5579         Builder.restoreIP(OMPBuilder.createOrderedDepend(
5580             Builder, AllocaIP, NumLoops, StoreValues, ".cnt.addr",
5581             IsDependSource));
5582       }
5583     } else {
5584       // The ordered directive with threads or simd clause, or without clause.
5585       // Without clause, it behaves as if the threads clause is specified.
5586       const auto *C = S.getSingleClause<OMPSIMDClause>();
5587 
5588       auto FiniCB = [this](InsertPointTy IP) {
5589         OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
5590       };
5591 
5592       auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5593                                      InsertPointTy CodeGenIP,
5594                                      llvm::BasicBlock &FiniBB) {
5595         const CapturedStmt *CS = S.getInnermostCapturedStmt();
5596         if (C) {
5597           llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5598           GenerateOpenMPCapturedVars(*CS, CapturedVars);
5599           llvm::Function *OutlinedFn =
5600               emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5601           assert(S.getBeginLoc().isValid() &&
5602                  "Outlined function call location must be valid.");
5603           ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
5604           OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, FiniBB,
5605                                                OutlinedFn, CapturedVars);
5606         } else {
5607           OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
5608                                                          FiniBB);
5609           OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CS->getCapturedStmt(),
5610                                                  CodeGenIP, FiniBB);
5611         }
5612       };
5613 
5614       OMPLexicalScope Scope(*this, S, OMPD_unknown);
5615       Builder.restoreIP(
5616           OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
5617     }
5618     return;
5619   }
5620 
5621   if (S.hasClausesOfKind<OMPDependClause>()) {
5622     assert(!S.hasAssociatedStmt() &&
5623            "No associated statement must be in ordered depend construct.");
5624     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5625       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5626     return;
5627   }
5628   const auto *C = S.getSingleClause<OMPSIMDClause>();
5629   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5630                                  PrePostActionTy &Action) {
5631     const CapturedStmt *CS = S.getInnermostCapturedStmt();
5632     if (C) {
5633       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5634       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5635       llvm::Function *OutlinedFn =
5636           emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5637       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
5638                                                       OutlinedFn, CapturedVars);
5639     } else {
5640       Action.Enter(CGF);
5641       CGF.EmitStmt(CS->getCapturedStmt());
5642     }
5643   };
5644   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5645   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
5646 }
5647 
5648 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
5649                                          QualType SrcType, QualType DestType,
5650                                          SourceLocation Loc) {
5651   assert(CGF.hasScalarEvaluationKind(DestType) &&
5652          "DestType must have scalar evaluation kind.");
5653   assert(!Val.isAggregate() && "Must be a scalar or complex.");
5654   return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
5655                                                    DestType, Loc)
5656                         : CGF.EmitComplexToScalarConversion(
5657                               Val.getComplexVal(), SrcType, DestType, Loc);
5658 }
5659 
5660 static CodeGenFunction::ComplexPairTy
5661 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
5662                       QualType DestType, SourceLocation Loc) {
5663   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
5664          "DestType must have complex evaluation kind.");
5665   CodeGenFunction::ComplexPairTy ComplexVal;
5666   if (Val.isScalar()) {
5667     // Convert the input element to the element type of the complex.
5668     QualType DestElementType =
5669         DestType->castAs<ComplexType>()->getElementType();
5670     llvm::Value *ScalarVal = CGF.EmitScalarConversion(
5671         Val.getScalarVal(), SrcType, DestElementType, Loc);
5672     ComplexVal = CodeGenFunction::ComplexPairTy(
5673         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
5674   } else {
5675     assert(Val.isComplex() && "Must be a scalar or complex.");
5676     QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
5677     QualType DestElementType =
5678         DestType->castAs<ComplexType>()->getElementType();
5679     ComplexVal.first = CGF.EmitScalarConversion(
5680         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
5681     ComplexVal.second = CGF.EmitScalarConversion(
5682         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
5683   }
5684   return ComplexVal;
5685 }
5686 
5687 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5688                                   LValue LVal, RValue RVal) {
5689   if (LVal.isGlobalReg())
5690     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
5691   else
5692     CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
5693 }
5694 
5695 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
5696                                    llvm::AtomicOrdering AO, LValue LVal,
5697                                    SourceLocation Loc) {
5698   if (LVal.isGlobalReg())
5699     return CGF.EmitLoadOfLValue(LVal, Loc);
5700   return CGF.EmitAtomicLoad(
5701       LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
5702       LVal.isVolatile());
5703 }
5704 
5705 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
5706                                          QualType RValTy, SourceLocation Loc) {
5707   switch (getEvaluationKind(LVal.getType())) {
5708   case TEK_Scalar:
5709     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
5710                                *this, RVal, RValTy, LVal.getType(), Loc)),
5711                            LVal);
5712     break;
5713   case TEK_Complex:
5714     EmitStoreOfComplex(
5715         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
5716         /*isInit=*/false);
5717     break;
5718   case TEK_Aggregate:
5719     llvm_unreachable("Must be a scalar or complex.");
5720   }
5721 }
5722 
5723 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5724                                   const Expr *X, const Expr *V,
5725                                   SourceLocation Loc) {
5726   // v = x;
5727   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
5728   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
5729   LValue XLValue = CGF.EmitLValue(X);
5730   LValue VLValue = CGF.EmitLValue(V);
5731   RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
5732   // OpenMP, 2.17.7, atomic Construct
5733   // If the read or capture clause is specified and the acquire, acq_rel, or
5734   // seq_cst clause is specified then the strong flush on exit from the atomic
5735   // operation is also an acquire flush.
5736   switch (AO) {
5737   case llvm::AtomicOrdering::Acquire:
5738   case llvm::AtomicOrdering::AcquireRelease:
5739   case llvm::AtomicOrdering::SequentiallyConsistent:
5740     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5741                                          llvm::AtomicOrdering::Acquire);
5742     break;
5743   case llvm::AtomicOrdering::Monotonic:
5744   case llvm::AtomicOrdering::Release:
5745     break;
5746   case llvm::AtomicOrdering::NotAtomic:
5747   case llvm::AtomicOrdering::Unordered:
5748     llvm_unreachable("Unexpected ordering.");
5749   }
5750   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
5751   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
5752 }
5753 
5754 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
5755                                    llvm::AtomicOrdering AO, const Expr *X,
5756                                    const Expr *E, SourceLocation Loc) {
5757   // x = expr;
5758   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
5759   emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
5760   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5761   // OpenMP, 2.17.7, atomic Construct
5762   // If the write, update, or capture clause is specified and the release,
5763   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5764   // the atomic operation is also a release flush.
5765   switch (AO) {
5766   case llvm::AtomicOrdering::Release:
5767   case llvm::AtomicOrdering::AcquireRelease:
5768   case llvm::AtomicOrdering::SequentiallyConsistent:
5769     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5770                                          llvm::AtomicOrdering::Release);
5771     break;
5772   case llvm::AtomicOrdering::Acquire:
5773   case llvm::AtomicOrdering::Monotonic:
5774     break;
5775   case llvm::AtomicOrdering::NotAtomic:
5776   case llvm::AtomicOrdering::Unordered:
5777     llvm_unreachable("Unexpected ordering.");
5778   }
5779 }
5780 
5781 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
5782                                                 RValue Update,
5783                                                 BinaryOperatorKind BO,
5784                                                 llvm::AtomicOrdering AO,
5785                                                 bool IsXLHSInRHSPart) {
5786   ASTContext &Context = CGF.getContext();
5787   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
5788   // expression is simple and atomic is allowed for the given type for the
5789   // target platform.
5790   if (BO == BO_Comma || !Update.isScalar() ||
5791       !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
5792       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
5793        (Update.getScalarVal()->getType() !=
5794         X.getAddress(CGF).getElementType())) ||
5795       !X.getAddress(CGF).getElementType()->isIntegerTy() ||
5796       !Context.getTargetInfo().hasBuiltinAtomic(
5797           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
5798     return std::make_pair(false, RValue::get(nullptr));
5799 
5800   llvm::AtomicRMWInst::BinOp RMWOp;
5801   switch (BO) {
5802   case BO_Add:
5803     RMWOp = llvm::AtomicRMWInst::Add;
5804     break;
5805   case BO_Sub:
5806     if (!IsXLHSInRHSPart)
5807       return std::make_pair(false, RValue::get(nullptr));
5808     RMWOp = llvm::AtomicRMWInst::Sub;
5809     break;
5810   case BO_And:
5811     RMWOp = llvm::AtomicRMWInst::And;
5812     break;
5813   case BO_Or:
5814     RMWOp = llvm::AtomicRMWInst::Or;
5815     break;
5816   case BO_Xor:
5817     RMWOp = llvm::AtomicRMWInst::Xor;
5818     break;
5819   case BO_LT:
5820     RMWOp = X.getType()->hasSignedIntegerRepresentation()
5821                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
5822                                    : llvm::AtomicRMWInst::Max)
5823                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
5824                                    : llvm::AtomicRMWInst::UMax);
5825     break;
5826   case BO_GT:
5827     RMWOp = X.getType()->hasSignedIntegerRepresentation()
5828                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
5829                                    : llvm::AtomicRMWInst::Min)
5830                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
5831                                    : llvm::AtomicRMWInst::UMin);
5832     break;
5833   case BO_Assign:
5834     RMWOp = llvm::AtomicRMWInst::Xchg;
5835     break;
5836   case BO_Mul:
5837   case BO_Div:
5838   case BO_Rem:
5839   case BO_Shl:
5840   case BO_Shr:
5841   case BO_LAnd:
5842   case BO_LOr:
5843     return std::make_pair(false, RValue::get(nullptr));
5844   case BO_PtrMemD:
5845   case BO_PtrMemI:
5846   case BO_LE:
5847   case BO_GE:
5848   case BO_EQ:
5849   case BO_NE:
5850   case BO_Cmp:
5851   case BO_AddAssign:
5852   case BO_SubAssign:
5853   case BO_AndAssign:
5854   case BO_OrAssign:
5855   case BO_XorAssign:
5856   case BO_MulAssign:
5857   case BO_DivAssign:
5858   case BO_RemAssign:
5859   case BO_ShlAssign:
5860   case BO_ShrAssign:
5861   case BO_Comma:
5862     llvm_unreachable("Unsupported atomic update operation");
5863   }
5864   llvm::Value *UpdateVal = Update.getScalarVal();
5865   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
5866     UpdateVal = CGF.Builder.CreateIntCast(
5867         IC, X.getAddress(CGF).getElementType(),
5868         X.getType()->hasSignedIntegerRepresentation());
5869   }
5870   llvm::Value *Res =
5871       CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
5872   return std::make_pair(true, RValue::get(Res));
5873 }
5874 
5875 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
5876     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
5877     llvm::AtomicOrdering AO, SourceLocation Loc,
5878     const llvm::function_ref<RValue(RValue)> CommonGen) {
5879   // Update expressions are allowed to have the following forms:
5880   // x binop= expr; -> xrval + expr;
5881   // x++, ++x -> xrval + 1;
5882   // x--, --x -> xrval - 1;
5883   // x = x binop expr; -> xrval binop expr
5884   // x = expr Op x; - > expr binop xrval;
5885   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
5886   if (!Res.first) {
5887     if (X.isGlobalReg()) {
5888       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
5889       // 'xrval'.
5890       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
5891     } else {
5892       // Perform compare-and-swap procedure.
5893       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
5894     }
5895   }
5896   return Res;
5897 }
5898 
5899 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
5900                                     llvm::AtomicOrdering AO, const Expr *X,
5901                                     const Expr *E, const Expr *UE,
5902                                     bool IsXLHSInRHSPart, SourceLocation Loc) {
5903   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5904          "Update expr in 'atomic update' must be a binary operator.");
5905   const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5906   // Update expressions are allowed to have the following forms:
5907   // x binop= expr; -> xrval + expr;
5908   // x++, ++x -> xrval + 1;
5909   // x--, --x -> xrval - 1;
5910   // x = x binop expr; -> xrval binop expr
5911   // x = expr Op x; - > expr binop xrval;
5912   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
5913   LValue XLValue = CGF.EmitLValue(X);
5914   RValue ExprRValue = CGF.EmitAnyExpr(E);
5915   const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5916   const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5917   const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5918   const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5919   auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
5920     CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5921     CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5922     return CGF.EmitAnyExpr(UE);
5923   };
5924   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
5925       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5926   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5927   // OpenMP, 2.17.7, atomic Construct
5928   // If the write, update, or capture clause is specified and the release,
5929   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5930   // the atomic operation is also a release flush.
5931   switch (AO) {
5932   case llvm::AtomicOrdering::Release:
5933   case llvm::AtomicOrdering::AcquireRelease:
5934   case llvm::AtomicOrdering::SequentiallyConsistent:
5935     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5936                                          llvm::AtomicOrdering::Release);
5937     break;
5938   case llvm::AtomicOrdering::Acquire:
5939   case llvm::AtomicOrdering::Monotonic:
5940     break;
5941   case llvm::AtomicOrdering::NotAtomic:
5942   case llvm::AtomicOrdering::Unordered:
5943     llvm_unreachable("Unexpected ordering.");
5944   }
5945 }
5946 
5947 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
5948                             QualType SourceType, QualType ResType,
5949                             SourceLocation Loc) {
5950   switch (CGF.getEvaluationKind(ResType)) {
5951   case TEK_Scalar:
5952     return RValue::get(
5953         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
5954   case TEK_Complex: {
5955     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
5956     return RValue::getComplex(Res.first, Res.second);
5957   }
5958   case TEK_Aggregate:
5959     break;
5960   }
5961   llvm_unreachable("Must be a scalar or complex.");
5962 }
5963 
5964 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
5965                                      llvm::AtomicOrdering AO,
5966                                      bool IsPostfixUpdate, const Expr *V,
5967                                      const Expr *X, const Expr *E,
5968                                      const Expr *UE, bool IsXLHSInRHSPart,
5969                                      SourceLocation Loc) {
5970   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
5971   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
5972   RValue NewVVal;
5973   LValue VLValue = CGF.EmitLValue(V);
5974   LValue XLValue = CGF.EmitLValue(X);
5975   RValue ExprRValue = CGF.EmitAnyExpr(E);
5976   QualType NewVValType;
5977   if (UE) {
5978     // 'x' is updated with some additional value.
5979     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5980            "Update expr in 'atomic capture' must be a binary operator.");
5981     const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5982     // Update expressions are allowed to have the following forms:
5983     // x binop= expr; -> xrval + expr;
5984     // x++, ++x -> xrval + 1;
5985     // x--, --x -> xrval - 1;
5986     // x = x binop expr; -> xrval binop expr
5987     // x = expr Op x; - > expr binop xrval;
5988     const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5989     const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5990     const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5991     NewVValType = XRValExpr->getType();
5992     const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5993     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
5994                   IsPostfixUpdate](RValue XRValue) {
5995       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5996       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5997       RValue Res = CGF.EmitAnyExpr(UE);
5998       NewVVal = IsPostfixUpdate ? XRValue : Res;
5999       return Res;
6000     };
6001     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6002         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6003     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6004     if (Res.first) {
6005       // 'atomicrmw' instruction was generated.
6006       if (IsPostfixUpdate) {
6007         // Use old value from 'atomicrmw'.
6008         NewVVal = Res.second;
6009       } else {
6010         // 'atomicrmw' does not provide new value, so evaluate it using old
6011         // value of 'x'.
6012         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6013         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6014         NewVVal = CGF.EmitAnyExpr(UE);
6015       }
6016     }
6017   } else {
6018     // 'x' is simply rewritten with some 'expr'.
6019     NewVValType = X->getType().getNonReferenceType();
6020     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
6021                                X->getType().getNonReferenceType(), Loc);
6022     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6023       NewVVal = XRValue;
6024       return ExprRValue;
6025     };
6026     // Try to perform atomicrmw xchg, otherwise simple exchange.
6027     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6028         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6029         Loc, Gen);
6030     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6031     if (Res.first) {
6032       // 'atomicrmw' instruction was generated.
6033       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6034     }
6035   }
6036   // Emit post-update store to 'v' of old/new 'x' value.
6037   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
6038   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6039   // OpenMP 5.1 removes the required flush for capture clause.
6040   if (CGF.CGM.getLangOpts().OpenMP < 51) {
6041     // OpenMP, 2.17.7, atomic Construct
6042     // If the write, update, or capture clause is specified and the release,
6043     // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6044     // the atomic operation is also a release flush.
6045     // If the read or capture clause is specified and the acquire, acq_rel, or
6046     // seq_cst clause is specified then the strong flush on exit from the atomic
6047     // operation is also an acquire flush.
6048     switch (AO) {
6049     case llvm::AtomicOrdering::Release:
6050       CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
6051                                            llvm::AtomicOrdering::Release);
6052       break;
6053     case llvm::AtomicOrdering::Acquire:
6054       CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
6055                                            llvm::AtomicOrdering::Acquire);
6056       break;
6057     case llvm::AtomicOrdering::AcquireRelease:
6058     case llvm::AtomicOrdering::SequentiallyConsistent:
6059       CGF.CGM.getOpenMPRuntime().emitFlush(
6060           CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease);
6061       break;
6062     case llvm::AtomicOrdering::Monotonic:
6063       break;
6064     case llvm::AtomicOrdering::NotAtomic:
6065     case llvm::AtomicOrdering::Unordered:
6066       llvm_unreachable("Unexpected ordering.");
6067     }
6068   }
6069 }
6070 
6071 static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF,
6072                                      llvm::AtomicOrdering AO, const Expr *X,
6073                                      const Expr *E, const Expr *D,
6074                                      const Expr *CE, bool IsXBinopExpr,
6075                                      SourceLocation Loc) {
6076   llvm::OpenMPIRBuilder &OMPBuilder =
6077       CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6078 
6079   OMPAtomicCompareOp Op;
6080   assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6081   switch (cast<BinaryOperator>(CE)->getOpcode()) {
6082   case BO_EQ:
6083     Op = OMPAtomicCompareOp::EQ;
6084     break;
6085   case BO_LT:
6086     Op = OMPAtomicCompareOp::MIN;
6087     break;
6088   case BO_GT:
6089     Op = OMPAtomicCompareOp::MAX;
6090     break;
6091   default:
6092     llvm_unreachable("unsupported atomic compare binary operator");
6093   }
6094 
6095   LValue XLVal = CGF.EmitLValue(X);
6096   Address XAddr = XLVal.getAddress(CGF);
6097   llvm::Value *EVal = CGF.EmitScalarExpr(E);
6098   llvm::Value *DVal = D ? CGF.EmitScalarExpr(D) : nullptr;
6099 
6100   llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6101       XAddr.getPointer(), XAddr.getElementType(),
6102       X->getType().isVolatileQualified(),
6103       X->getType()->hasSignedIntegerRepresentation()};
6104 
6105   CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6106       CGF.Builder, XOpVal, EVal, DVal, AO, Op, IsXBinopExpr));
6107 }
6108 
6109 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6110                               llvm::AtomicOrdering AO, bool IsPostfixUpdate,
6111                               const Expr *X, const Expr *V, const Expr *E,
6112                               const Expr *UE, const Expr *D, const Expr *CE,
6113                               bool IsXLHSInRHSPart, bool IsCompareCapture,
6114                               SourceLocation Loc) {
6115   switch (Kind) {
6116   case OMPC_read:
6117     emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6118     break;
6119   case OMPC_write:
6120     emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6121     break;
6122   case OMPC_unknown:
6123   case OMPC_update:
6124     emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6125     break;
6126   case OMPC_capture:
6127     emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6128                              IsXLHSInRHSPart, Loc);
6129     break;
6130   case OMPC_compare: {
6131     if (IsCompareCapture) {
6132       // Emit an error here.
6133       unsigned DiagID = CGF.CGM.getDiags().getCustomDiagID(
6134           DiagnosticsEngine::Error,
6135           "'atomic compare capture' is not supported for now");
6136       CGF.CGM.getDiags().Report(DiagID);
6137     } else {
6138       emitOMPAtomicCompareExpr(CGF, AO, X, E, D, CE, IsXLHSInRHSPart, Loc);
6139     }
6140     break;
6141   }
6142   case OMPC_if:
6143   case OMPC_final:
6144   case OMPC_num_threads:
6145   case OMPC_private:
6146   case OMPC_firstprivate:
6147   case OMPC_lastprivate:
6148   case OMPC_reduction:
6149   case OMPC_task_reduction:
6150   case OMPC_in_reduction:
6151   case OMPC_safelen:
6152   case OMPC_simdlen:
6153   case OMPC_sizes:
6154   case OMPC_full:
6155   case OMPC_partial:
6156   case OMPC_allocator:
6157   case OMPC_allocate:
6158   case OMPC_collapse:
6159   case OMPC_default:
6160   case OMPC_seq_cst:
6161   case OMPC_acq_rel:
6162   case OMPC_acquire:
6163   case OMPC_release:
6164   case OMPC_relaxed:
6165   case OMPC_shared:
6166   case OMPC_linear:
6167   case OMPC_aligned:
6168   case OMPC_copyin:
6169   case OMPC_copyprivate:
6170   case OMPC_flush:
6171   case OMPC_depobj:
6172   case OMPC_proc_bind:
6173   case OMPC_schedule:
6174   case OMPC_ordered:
6175   case OMPC_nowait:
6176   case OMPC_untied:
6177   case OMPC_threadprivate:
6178   case OMPC_depend:
6179   case OMPC_mergeable:
6180   case OMPC_device:
6181   case OMPC_threads:
6182   case OMPC_simd:
6183   case OMPC_map:
6184   case OMPC_num_teams:
6185   case OMPC_thread_limit:
6186   case OMPC_priority:
6187   case OMPC_grainsize:
6188   case OMPC_nogroup:
6189   case OMPC_num_tasks:
6190   case OMPC_hint:
6191   case OMPC_dist_schedule:
6192   case OMPC_defaultmap:
6193   case OMPC_uniform:
6194   case OMPC_to:
6195   case OMPC_from:
6196   case OMPC_use_device_ptr:
6197   case OMPC_use_device_addr:
6198   case OMPC_is_device_ptr:
6199   case OMPC_unified_address:
6200   case OMPC_unified_shared_memory:
6201   case OMPC_reverse_offload:
6202   case OMPC_dynamic_allocators:
6203   case OMPC_atomic_default_mem_order:
6204   case OMPC_device_type:
6205   case OMPC_match:
6206   case OMPC_nontemporal:
6207   case OMPC_order:
6208   case OMPC_destroy:
6209   case OMPC_detach:
6210   case OMPC_inclusive:
6211   case OMPC_exclusive:
6212   case OMPC_uses_allocators:
6213   case OMPC_affinity:
6214   case OMPC_init:
6215   case OMPC_inbranch:
6216   case OMPC_notinbranch:
6217   case OMPC_link:
6218   case OMPC_indirect:
6219   case OMPC_use:
6220   case OMPC_novariants:
6221   case OMPC_nocontext:
6222   case OMPC_filter:
6223   case OMPC_when:
6224   case OMPC_adjust_args:
6225   case OMPC_append_args:
6226   case OMPC_memory_order:
6227   case OMPC_bind:
6228   case OMPC_align:
6229     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6230   }
6231 }
6232 
6233 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6234   llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
6235   bool MemOrderingSpecified = false;
6236   if (S.getSingleClause<OMPSeqCstClause>()) {
6237     AO = llvm::AtomicOrdering::SequentiallyConsistent;
6238     MemOrderingSpecified = true;
6239   } else if (S.getSingleClause<OMPAcqRelClause>()) {
6240     AO = llvm::AtomicOrdering::AcquireRelease;
6241     MemOrderingSpecified = true;
6242   } else if (S.getSingleClause<OMPAcquireClause>()) {
6243     AO = llvm::AtomicOrdering::Acquire;
6244     MemOrderingSpecified = true;
6245   } else if (S.getSingleClause<OMPReleaseClause>()) {
6246     AO = llvm::AtomicOrdering::Release;
6247     MemOrderingSpecified = true;
6248   } else if (S.getSingleClause<OMPRelaxedClause>()) {
6249     AO = llvm::AtomicOrdering::Monotonic;
6250     MemOrderingSpecified = true;
6251   }
6252   llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6253   OpenMPClauseKind Kind = OMPC_unknown;
6254   for (const OMPClause *C : S.clauses()) {
6255     // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6256     // if it is first).
6257     OpenMPClauseKind K = C->getClauseKind();
6258     if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6259         K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6260       continue;
6261     Kind = K;
6262     KindsEncountered.insert(K);
6263   }
6264   bool IsCompareCapture = false;
6265   if (KindsEncountered.contains(OMPC_compare) &&
6266       KindsEncountered.contains(OMPC_capture)) {
6267     IsCompareCapture = true;
6268     Kind = OMPC_compare;
6269   }
6270   if (!MemOrderingSpecified) {
6271     llvm::AtomicOrdering DefaultOrder =
6272         CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6273     if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6274         DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6275         (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6276          Kind == OMPC_capture)) {
6277       AO = DefaultOrder;
6278     } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6279       if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6280         AO = llvm::AtomicOrdering::Release;
6281       } else if (Kind == OMPC_read) {
6282         assert(Kind == OMPC_read && "Unexpected atomic kind.");
6283         AO = llvm::AtomicOrdering::Acquire;
6284       }
6285     }
6286   }
6287 
6288   LexicalScope Scope(*this, S.getSourceRange());
6289   EmitStopPoint(S.getAssociatedStmt());
6290   emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
6291                     S.getExpr(), S.getUpdateExpr(), S.getD(), S.getCondExpr(),
6292                     S.isXLHSInRHSPart(), IsCompareCapture, S.getBeginLoc());
6293 }
6294 
6295 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6296                                          const OMPExecutableDirective &S,
6297                                          const RegionCodeGenTy &CodeGen) {
6298   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6299   CodeGenModule &CGM = CGF.CGM;
6300 
6301   // On device emit this construct as inlined code.
6302   if (CGM.getLangOpts().OpenMPIsDevice) {
6303     OMPLexicalScope Scope(CGF, S, OMPD_target);
6304     CGM.getOpenMPRuntime().emitInlinedDirective(
6305         CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6306           CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6307         });
6308     return;
6309   }
6310 
6311   auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6312   llvm::Function *Fn = nullptr;
6313   llvm::Constant *FnID = nullptr;
6314 
6315   const Expr *IfCond = nullptr;
6316   // Check for the at most one if clause associated with the target region.
6317   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6318     if (C->getNameModifier() == OMPD_unknown ||
6319         C->getNameModifier() == OMPD_target) {
6320       IfCond = C->getCondition();
6321       break;
6322     }
6323   }
6324 
6325   // Check if we have any device clause associated with the directive.
6326   llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6327       nullptr, OMPC_DEVICE_unknown);
6328   if (auto *C = S.getSingleClause<OMPDeviceClause>())
6329     Device.setPointerAndInt(C->getDevice(), C->getModifier());
6330 
6331   // Check if we have an if clause whose conditional always evaluates to false
6332   // or if we do not have any targets specified. If so the target region is not
6333   // an offload entry point.
6334   bool IsOffloadEntry = true;
6335   if (IfCond) {
6336     bool Val;
6337     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
6338       IsOffloadEntry = false;
6339   }
6340   if (CGM.getLangOpts().OMPTargetTriples.empty())
6341     IsOffloadEntry = false;
6342 
6343   if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6344     unsigned DiagID = CGM.getDiags().getCustomDiagID(
6345         DiagnosticsEngine::Error,
6346         "No offloading entry generated while offloading is mandatory.");
6347     CGM.getDiags().Report(DiagID);
6348   }
6349 
6350   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6351   StringRef ParentName;
6352   // In case we have Ctors/Dtors we use the complete type variant to produce
6353   // the mangling of the device outlined kernel.
6354   if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
6355     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
6356   else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
6357     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
6358   else
6359     ParentName =
6360         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
6361 
6362   // Emit target region as a standalone region.
6363   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
6364                                                     IsOffloadEntry, CodeGen);
6365   OMPLexicalScope Scope(CGF, S, OMPD_task);
6366   auto &&SizeEmitter =
6367       [IsOffloadEntry](CodeGenFunction &CGF,
6368                        const OMPLoopDirective &D) -> llvm::Value * {
6369     if (IsOffloadEntry) {
6370       OMPLoopScope(CGF, D);
6371       // Emit calculation of the iterations count.
6372       llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
6373       NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
6374                                                 /*isSigned=*/false);
6375       return NumIterations;
6376     }
6377     return nullptr;
6378   };
6379   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
6380                                         SizeEmitter);
6381 }
6382 
6383 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6384                              PrePostActionTy &Action) {
6385   Action.Enter(CGF);
6386   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6387   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6388   CGF.EmitOMPPrivateClause(S, PrivateScope);
6389   (void)PrivateScope.Privatize();
6390   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6391     CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6392 
6393   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
6394   CGF.EnsureInsertPoint();
6395 }
6396 
6397 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6398                                                   StringRef ParentName,
6399                                                   const OMPTargetDirective &S) {
6400   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6401     emitTargetRegion(CGF, S, Action);
6402   };
6403   llvm::Function *Fn;
6404   llvm::Constant *Addr;
6405   // Emit target region as a standalone region.
6406   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6407       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6408   assert(Fn && Addr && "Target device function emission failed.");
6409 }
6410 
6411 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6412   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6413     emitTargetRegion(CGF, S, Action);
6414   };
6415   emitCommonOMPTargetDirective(*this, S, CodeGen);
6416 }
6417 
6418 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6419                                         const OMPExecutableDirective &S,
6420                                         OpenMPDirectiveKind InnermostKind,
6421                                         const RegionCodeGenTy &CodeGen) {
6422   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6423   llvm::Function *OutlinedFn =
6424       CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6425           S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
6426 
6427   const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6428   const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6429   if (NT || TL) {
6430     const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6431     const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6432 
6433     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6434                                                   S.getBeginLoc());
6435   }
6436 
6437   OMPTeamsScope Scope(CGF, S);
6438   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6439   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6440   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6441                                            CapturedVars);
6442 }
6443 
6444 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6445   // Emit teams region as a standalone region.
6446   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6447     Action.Enter(CGF);
6448     OMPPrivateScope PrivateScope(CGF);
6449     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6450     CGF.EmitOMPPrivateClause(S, PrivateScope);
6451     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6452     (void)PrivateScope.Privatize();
6453     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6454     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6455   };
6456   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6457   emitPostUpdateForReductionClause(*this, S,
6458                                    [](CodeGenFunction &) { return nullptr; });
6459 }
6460 
6461 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6462                                   const OMPTargetTeamsDirective &S) {
6463   auto *CS = S.getCapturedStmt(OMPD_teams);
6464   Action.Enter(CGF);
6465   // Emit teams region as a standalone region.
6466   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6467     Action.Enter(CGF);
6468     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6469     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6470     CGF.EmitOMPPrivateClause(S, PrivateScope);
6471     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6472     (void)PrivateScope.Privatize();
6473     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6474       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6475     CGF.EmitStmt(CS->getCapturedStmt());
6476     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6477   };
6478   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6479   emitPostUpdateForReductionClause(CGF, S,
6480                                    [](CodeGenFunction &) { return nullptr; });
6481 }
6482 
6483 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6484     CodeGenModule &CGM, StringRef ParentName,
6485     const OMPTargetTeamsDirective &S) {
6486   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6487     emitTargetTeamsRegion(CGF, Action, S);
6488   };
6489   llvm::Function *Fn;
6490   llvm::Constant *Addr;
6491   // Emit target region as a standalone region.
6492   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6493       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6494   assert(Fn && Addr && "Target device function emission failed.");
6495 }
6496 
6497 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6498     const OMPTargetTeamsDirective &S) {
6499   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6500     emitTargetTeamsRegion(CGF, Action, S);
6501   };
6502   emitCommonOMPTargetDirective(*this, S, CodeGen);
6503 }
6504 
6505 static void
6506 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6507                                 const OMPTargetTeamsDistributeDirective &S) {
6508   Action.Enter(CGF);
6509   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6510     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6511   };
6512 
6513   // Emit teams region as a standalone region.
6514   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6515                                             PrePostActionTy &Action) {
6516     Action.Enter(CGF);
6517     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6518     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6519     (void)PrivateScope.Privatize();
6520     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6521                                                     CodeGenDistribute);
6522     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6523   };
6524   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6525   emitPostUpdateForReductionClause(CGF, S,
6526                                    [](CodeGenFunction &) { return nullptr; });
6527 }
6528 
6529 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6530     CodeGenModule &CGM, StringRef ParentName,
6531     const OMPTargetTeamsDistributeDirective &S) {
6532   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6533     emitTargetTeamsDistributeRegion(CGF, Action, S);
6534   };
6535   llvm::Function *Fn;
6536   llvm::Constant *Addr;
6537   // Emit target region as a standalone region.
6538   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6539       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6540   assert(Fn && Addr && "Target device function emission failed.");
6541 }
6542 
6543 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6544     const OMPTargetTeamsDistributeDirective &S) {
6545   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6546     emitTargetTeamsDistributeRegion(CGF, Action, S);
6547   };
6548   emitCommonOMPTargetDirective(*this, S, CodeGen);
6549 }
6550 
6551 static void emitTargetTeamsDistributeSimdRegion(
6552     CodeGenFunction &CGF, PrePostActionTy &Action,
6553     const OMPTargetTeamsDistributeSimdDirective &S) {
6554   Action.Enter(CGF);
6555   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6556     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6557   };
6558 
6559   // Emit teams region as a standalone region.
6560   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6561                                             PrePostActionTy &Action) {
6562     Action.Enter(CGF);
6563     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6564     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6565     (void)PrivateScope.Privatize();
6566     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6567                                                     CodeGenDistribute);
6568     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6569   };
6570   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6571   emitPostUpdateForReductionClause(CGF, S,
6572                                    [](CodeGenFunction &) { return nullptr; });
6573 }
6574 
6575 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6576     CodeGenModule &CGM, StringRef ParentName,
6577     const OMPTargetTeamsDistributeSimdDirective &S) {
6578   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6579     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6580   };
6581   llvm::Function *Fn;
6582   llvm::Constant *Addr;
6583   // Emit target region as a standalone region.
6584   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6585       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6586   assert(Fn && Addr && "Target device function emission failed.");
6587 }
6588 
6589 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6590     const OMPTargetTeamsDistributeSimdDirective &S) {
6591   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6592     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6593   };
6594   emitCommonOMPTargetDirective(*this, S, CodeGen);
6595 }
6596 
6597 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6598     const OMPTeamsDistributeDirective &S) {
6599 
6600   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6601     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6602   };
6603 
6604   // Emit teams region as a standalone region.
6605   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6606                                             PrePostActionTy &Action) {
6607     Action.Enter(CGF);
6608     OMPPrivateScope PrivateScope(CGF);
6609     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6610     (void)PrivateScope.Privatize();
6611     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6612                                                     CodeGenDistribute);
6613     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6614   };
6615   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6616   emitPostUpdateForReductionClause(*this, S,
6617                                    [](CodeGenFunction &) { return nullptr; });
6618 }
6619 
6620 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6621     const OMPTeamsDistributeSimdDirective &S) {
6622   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6623     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6624   };
6625 
6626   // Emit teams region as a standalone region.
6627   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6628                                             PrePostActionTy &Action) {
6629     Action.Enter(CGF);
6630     OMPPrivateScope PrivateScope(CGF);
6631     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6632     (void)PrivateScope.Privatize();
6633     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6634                                                     CodeGenDistribute);
6635     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6636   };
6637   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
6638   emitPostUpdateForReductionClause(*this, S,
6639                                    [](CodeGenFunction &) { return nullptr; });
6640 }
6641 
6642 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6643     const OMPTeamsDistributeParallelForDirective &S) {
6644   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6645     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6646                               S.getDistInc());
6647   };
6648 
6649   // Emit teams region as a standalone region.
6650   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6651                                             PrePostActionTy &Action) {
6652     Action.Enter(CGF);
6653     OMPPrivateScope PrivateScope(CGF);
6654     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6655     (void)PrivateScope.Privatize();
6656     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6657                                                     CodeGenDistribute);
6658     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6659   };
6660   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
6661   emitPostUpdateForReductionClause(*this, S,
6662                                    [](CodeGenFunction &) { return nullptr; });
6663 }
6664 
6665 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6666     const OMPTeamsDistributeParallelForSimdDirective &S) {
6667   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6668     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6669                               S.getDistInc());
6670   };
6671 
6672   // Emit teams region as a standalone region.
6673   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6674                                             PrePostActionTy &Action) {
6675     Action.Enter(CGF);
6676     OMPPrivateScope PrivateScope(CGF);
6677     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6678     (void)PrivateScope.Privatize();
6679     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6680         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6681     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6682   };
6683   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
6684                               CodeGen);
6685   emitPostUpdateForReductionClause(*this, S,
6686                                    [](CodeGenFunction &) { return nullptr; });
6687 }
6688 
6689 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
6690   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6691   llvm::Value *Device = nullptr;
6692   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6693     Device = EmitScalarExpr(C->getDevice());
6694 
6695   llvm::Value *NumDependences = nullptr;
6696   llvm::Value *DependenceAddress = nullptr;
6697   if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
6698     OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
6699                                            DC->getModifier());
6700     Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
6701     std::pair<llvm::Value *, Address> DependencePair =
6702         CGM.getOpenMPRuntime().emitDependClause(*this, Dependencies,
6703                                                 DC->getBeginLoc());
6704     NumDependences = DependencePair.first;
6705     DependenceAddress = Builder.CreatePointerCast(
6706         DependencePair.second.getPointer(), CGM.Int8PtrTy);
6707   }
6708 
6709   assert(!(S.hasClausesOfKind<OMPNowaitClause>() &&
6710            !(S.getSingleClause<OMPInitClause>() ||
6711              S.getSingleClause<OMPDestroyClause>() ||
6712              S.getSingleClause<OMPUseClause>())) &&
6713          "OMPNowaitClause clause is used separately in OMPInteropDirective.");
6714 
6715   if (const auto *C = S.getSingleClause<OMPInitClause>()) {
6716     llvm::Value *InteropvarPtr =
6717         EmitLValue(C->getInteropVar()).getPointer(*this);
6718     llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown;
6719     if (C->getIsTarget()) {
6720       InteropType = llvm::omp::OMPInteropType::Target;
6721     } else {
6722       assert(C->getIsTargetSync() && "Expected interop-type target/targetsync");
6723       InteropType = llvm::omp::OMPInteropType::TargetSync;
6724     }
6725     OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device,
6726                                     NumDependences, DependenceAddress,
6727                                     S.hasClausesOfKind<OMPNowaitClause>());
6728   } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) {
6729     llvm::Value *InteropvarPtr =
6730         EmitLValue(C->getInteropVar()).getPointer(*this);
6731     OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
6732                                        NumDependences, DependenceAddress,
6733                                        S.hasClausesOfKind<OMPNowaitClause>());
6734   } else if (const auto *C = S.getSingleClause<OMPUseClause>()) {
6735     llvm::Value *InteropvarPtr =
6736         EmitLValue(C->getInteropVar()).getPointer(*this);
6737     OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
6738                                    NumDependences, DependenceAddress,
6739                                    S.hasClausesOfKind<OMPNowaitClause>());
6740   }
6741 }
6742 
6743 static void emitTargetTeamsDistributeParallelForRegion(
6744     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
6745     PrePostActionTy &Action) {
6746   Action.Enter(CGF);
6747   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6748     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6749                               S.getDistInc());
6750   };
6751 
6752   // Emit teams region as a standalone region.
6753   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6754                                                  PrePostActionTy &Action) {
6755     Action.Enter(CGF);
6756     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6757     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6758     (void)PrivateScope.Privatize();
6759     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6760         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6761     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6762   };
6763 
6764   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
6765                               CodeGenTeams);
6766   emitPostUpdateForReductionClause(CGF, S,
6767                                    [](CodeGenFunction &) { return nullptr; });
6768 }
6769 
6770 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
6771     CodeGenModule &CGM, StringRef ParentName,
6772     const OMPTargetTeamsDistributeParallelForDirective &S) {
6773   // Emit SPMD target teams distribute parallel for region as a standalone
6774   // region.
6775   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6776     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
6777   };
6778   llvm::Function *Fn;
6779   llvm::Constant *Addr;
6780   // Emit target region as a standalone region.
6781   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6782       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6783   assert(Fn && Addr && "Target device function emission failed.");
6784 }
6785 
6786 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
6787     const OMPTargetTeamsDistributeParallelForDirective &S) {
6788   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6789     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
6790   };
6791   emitCommonOMPTargetDirective(*this, S, CodeGen);
6792 }
6793 
6794 static void emitTargetTeamsDistributeParallelForSimdRegion(
6795     CodeGenFunction &CGF,
6796     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
6797     PrePostActionTy &Action) {
6798   Action.Enter(CGF);
6799   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6800     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6801                               S.getDistInc());
6802   };
6803 
6804   // Emit teams region as a standalone region.
6805   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6806                                                  PrePostActionTy &Action) {
6807     Action.Enter(CGF);
6808     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6809     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6810     (void)PrivateScope.Privatize();
6811     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6812         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6813     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6814   };
6815 
6816   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
6817                               CodeGenTeams);
6818   emitPostUpdateForReductionClause(CGF, S,
6819                                    [](CodeGenFunction &) { return nullptr; });
6820 }
6821 
6822 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
6823     CodeGenModule &CGM, StringRef ParentName,
6824     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
6825   // Emit SPMD target teams distribute parallel for simd region as a standalone
6826   // region.
6827   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6828     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
6829   };
6830   llvm::Function *Fn;
6831   llvm::Constant *Addr;
6832   // Emit target region as a standalone region.
6833   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6834       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6835   assert(Fn && Addr && "Target device function emission failed.");
6836 }
6837 
6838 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
6839     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
6840   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6841     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
6842   };
6843   emitCommonOMPTargetDirective(*this, S, CodeGen);
6844 }
6845 
6846 void CodeGenFunction::EmitOMPCancellationPointDirective(
6847     const OMPCancellationPointDirective &S) {
6848   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
6849                                                    S.getCancelRegion());
6850 }
6851 
6852 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
6853   const Expr *IfCond = nullptr;
6854   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6855     if (C->getNameModifier() == OMPD_unknown ||
6856         C->getNameModifier() == OMPD_cancel) {
6857       IfCond = C->getCondition();
6858       break;
6859     }
6860   }
6861   if (CGM.getLangOpts().OpenMPIRBuilder) {
6862     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6863     // TODO: This check is necessary as we only generate `omp parallel` through
6864     // the OpenMPIRBuilder for now.
6865     if (S.getCancelRegion() == OMPD_parallel ||
6866         S.getCancelRegion() == OMPD_sections ||
6867         S.getCancelRegion() == OMPD_section) {
6868       llvm::Value *IfCondition = nullptr;
6869       if (IfCond)
6870         IfCondition = EmitScalarExpr(IfCond,
6871                                      /*IgnoreResultAssign=*/true);
6872       return Builder.restoreIP(
6873           OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
6874     }
6875   }
6876 
6877   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
6878                                         S.getCancelRegion());
6879 }
6880 
6881 CodeGenFunction::JumpDest
6882 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
6883   if (Kind == OMPD_parallel || Kind == OMPD_task ||
6884       Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
6885       Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
6886     return ReturnBlock;
6887   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
6888          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
6889          Kind == OMPD_distribute_parallel_for ||
6890          Kind == OMPD_target_parallel_for ||
6891          Kind == OMPD_teams_distribute_parallel_for ||
6892          Kind == OMPD_target_teams_distribute_parallel_for);
6893   return OMPCancelStack.getExitBlock();
6894 }
6895 
6896 void CodeGenFunction::EmitOMPUseDevicePtrClause(
6897     const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
6898     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6899   auto OrigVarIt = C.varlist_begin();
6900   auto InitIt = C.inits().begin();
6901   for (const Expr *PvtVarIt : C.private_copies()) {
6902     const auto *OrigVD =
6903         cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
6904     const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
6905     const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
6906 
6907     // In order to identify the right initializer we need to match the
6908     // declaration used by the mapping logic. In some cases we may get
6909     // OMPCapturedExprDecl that refers to the original declaration.
6910     const ValueDecl *MatchingVD = OrigVD;
6911     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6912       // OMPCapturedExprDecl are used to privative fields of the current
6913       // structure.
6914       const auto *ME = cast<MemberExpr>(OED->getInit());
6915       assert(isa<CXXThisExpr>(ME->getBase()) &&
6916              "Base should be the current struct!");
6917       MatchingVD = ME->getMemberDecl();
6918     }
6919 
6920     // If we don't have information about the current list item, move on to
6921     // the next one.
6922     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6923     if (InitAddrIt == CaptureDeviceAddrMap.end())
6924       continue;
6925 
6926     bool IsRegistered = PrivateScope.addPrivate(
6927         OrigVD, [this, OrigVD, InitAddrIt, InitVD, PvtVD]() {
6928           // Initialize the temporary initialization variable with the address
6929           // we get from the runtime library. We have to cast the source address
6930           // because it is always a void *. References are materialized in the
6931           // privatization scope, so the initialization here disregards the fact
6932           // the original variable is a reference.
6933           llvm::Type *Ty =
6934               ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
6935           Address InitAddr =
6936               Builder.CreateElementBitCast(InitAddrIt->second, Ty);
6937           setAddrOfLocalVar(InitVD, InitAddr);
6938 
6939           // Emit private declaration, it will be initialized by the value we
6940           // declaration we just added to the local declarations map.
6941           EmitDecl(*PvtVD);
6942 
6943           // The initialization variables reached its purpose in the emission
6944           // of the previous declaration, so we don't need it anymore.
6945           LocalDeclMap.erase(InitVD);
6946 
6947           // Return the address of the private variable.
6948           return GetAddrOfLocalVar(PvtVD);
6949         });
6950     assert(IsRegistered && "firstprivate var already registered as private");
6951     // Silence the warning about unused variable.
6952     (void)IsRegistered;
6953 
6954     ++OrigVarIt;
6955     ++InitIt;
6956   }
6957 }
6958 
6959 static const VarDecl *getBaseDecl(const Expr *Ref) {
6960   const Expr *Base = Ref->IgnoreParenImpCasts();
6961   while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
6962     Base = OASE->getBase()->IgnoreParenImpCasts();
6963   while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
6964     Base = ASE->getBase()->IgnoreParenImpCasts();
6965   return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
6966 }
6967 
6968 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
6969     const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
6970     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6971   llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
6972   for (const Expr *Ref : C.varlists()) {
6973     const VarDecl *OrigVD = getBaseDecl(Ref);
6974     if (!Processed.insert(OrigVD).second)
6975       continue;
6976     // In order to identify the right initializer we need to match the
6977     // declaration used by the mapping logic. In some cases we may get
6978     // OMPCapturedExprDecl that refers to the original declaration.
6979     const ValueDecl *MatchingVD = OrigVD;
6980     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6981       // OMPCapturedExprDecl are used to privative fields of the current
6982       // structure.
6983       const auto *ME = cast<MemberExpr>(OED->getInit());
6984       assert(isa<CXXThisExpr>(ME->getBase()) &&
6985              "Base should be the current struct!");
6986       MatchingVD = ME->getMemberDecl();
6987     }
6988 
6989     // If we don't have information about the current list item, move on to
6990     // the next one.
6991     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6992     if (InitAddrIt == CaptureDeviceAddrMap.end())
6993       continue;
6994 
6995     Address PrivAddr = InitAddrIt->getSecond();
6996     // For declrefs and variable length array need to load the pointer for
6997     // correct mapping, since the pointer to the data was passed to the runtime.
6998     if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
6999         MatchingVD->getType()->isArrayType())
7000       PrivAddr =
7001           EmitLoadOfPointer(PrivAddr, getContext()
7002                                           .getPointerType(OrigVD->getType())
7003                                           ->castAs<PointerType>());
7004     llvm::Type *RealElTy =
7005         ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7006     llvm::Type *RealTy = RealElTy->getPointerTo();
7007     PrivAddr =
7008         Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy, RealElTy);
7009 
7010     (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
7011   }
7012 }
7013 
7014 // Generate the instructions for '#pragma omp target data' directive.
7015 void CodeGenFunction::EmitOMPTargetDataDirective(
7016     const OMPTargetDataDirective &S) {
7017   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7018                                        /*SeparateBeginEndCalls=*/true);
7019 
7020   // Create a pre/post action to signal the privatization of the device pointer.
7021   // This action can be replaced by the OpenMP runtime code generation to
7022   // deactivate privatization.
7023   bool PrivatizeDevicePointers = false;
7024   class DevicePointerPrivActionTy : public PrePostActionTy {
7025     bool &PrivatizeDevicePointers;
7026 
7027   public:
7028     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7029         : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7030     void Enter(CodeGenFunction &CGF) override {
7031       PrivatizeDevicePointers = true;
7032     }
7033   };
7034   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7035 
7036   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
7037                        CodeGenFunction &CGF, PrePostActionTy &Action) {
7038     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7039       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
7040     };
7041 
7042     // Codegen that selects whether to generate the privatization code or not.
7043     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
7044                           &InnermostCodeGen](CodeGenFunction &CGF,
7045                                              PrePostActionTy &Action) {
7046       RegionCodeGenTy RCG(InnermostCodeGen);
7047       PrivatizeDevicePointers = false;
7048 
7049       // Call the pre-action to change the status of PrivatizeDevicePointers if
7050       // needed.
7051       Action.Enter(CGF);
7052 
7053       if (PrivatizeDevicePointers) {
7054         OMPPrivateScope PrivateScope(CGF);
7055         // Emit all instances of the use_device_ptr clause.
7056         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7057           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
7058                                         Info.CaptureDeviceAddrMap);
7059         for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7060           CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
7061                                          Info.CaptureDeviceAddrMap);
7062         (void)PrivateScope.Privatize();
7063         RCG(CGF);
7064       } else {
7065         OMPLexicalScope Scope(CGF, S, OMPD_unknown);
7066         RCG(CGF);
7067       }
7068     };
7069 
7070     // Forward the provided action to the privatization codegen.
7071     RegionCodeGenTy PrivRCG(PrivCodeGen);
7072     PrivRCG.setAction(Action);
7073 
7074     // Notwithstanding the body of the region is emitted as inlined directive,
7075     // we don't use an inline scope as changes in the references inside the
7076     // region are expected to be visible outside, so we do not privative them.
7077     OMPLexicalScope Scope(CGF, S);
7078     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
7079                                                     PrivRCG);
7080   };
7081 
7082   RegionCodeGenTy RCG(CodeGen);
7083 
7084   // If we don't have target devices, don't bother emitting the data mapping
7085   // code.
7086   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7087     RCG(*this);
7088     return;
7089   }
7090 
7091   // Check if we have any if clause associated with the directive.
7092   const Expr *IfCond = nullptr;
7093   if (const auto *C = S.getSingleClause<OMPIfClause>())
7094     IfCond = C->getCondition();
7095 
7096   // Check if we have any device clause associated with the directive.
7097   const Expr *Device = nullptr;
7098   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7099     Device = C->getDevice();
7100 
7101   // Set the action to signal privatization of device pointers.
7102   RCG.setAction(PrivAction);
7103 
7104   // Emit region code.
7105   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
7106                                              Info);
7107 }
7108 
7109 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7110     const OMPTargetEnterDataDirective &S) {
7111   // If we don't have target devices, don't bother emitting the data mapping
7112   // code.
7113   if (CGM.getLangOpts().OMPTargetTriples.empty())
7114     return;
7115 
7116   // Check if we have any if clause associated with the directive.
7117   const Expr *IfCond = nullptr;
7118   if (const auto *C = S.getSingleClause<OMPIfClause>())
7119     IfCond = C->getCondition();
7120 
7121   // Check if we have any device clause associated with the directive.
7122   const Expr *Device = nullptr;
7123   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7124     Device = C->getDevice();
7125 
7126   OMPLexicalScope Scope(*this, S, OMPD_task);
7127   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7128 }
7129 
7130 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7131     const OMPTargetExitDataDirective &S) {
7132   // If we don't have target devices, don't bother emitting the data mapping
7133   // code.
7134   if (CGM.getLangOpts().OMPTargetTriples.empty())
7135     return;
7136 
7137   // Check if we have any if clause associated with the directive.
7138   const Expr *IfCond = nullptr;
7139   if (const auto *C = S.getSingleClause<OMPIfClause>())
7140     IfCond = C->getCondition();
7141 
7142   // Check if we have any device clause associated with the directive.
7143   const Expr *Device = nullptr;
7144   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7145     Device = C->getDevice();
7146 
7147   OMPLexicalScope Scope(*this, S, OMPD_task);
7148   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7149 }
7150 
7151 static void emitTargetParallelRegion(CodeGenFunction &CGF,
7152                                      const OMPTargetParallelDirective &S,
7153                                      PrePostActionTy &Action) {
7154   // Get the captured statement associated with the 'parallel' region.
7155   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
7156   Action.Enter(CGF);
7157   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7158     Action.Enter(CGF);
7159     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7160     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7161     CGF.EmitOMPPrivateClause(S, PrivateScope);
7162     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7163     (void)PrivateScope.Privatize();
7164     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7165       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
7166     // TODO: Add support for clauses.
7167     CGF.EmitStmt(CS->getCapturedStmt());
7168     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
7169   };
7170   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
7171                                  emitEmptyBoundParameters);
7172   emitPostUpdateForReductionClause(CGF, S,
7173                                    [](CodeGenFunction &) { return nullptr; });
7174 }
7175 
7176 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7177     CodeGenModule &CGM, StringRef ParentName,
7178     const OMPTargetParallelDirective &S) {
7179   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7180     emitTargetParallelRegion(CGF, S, Action);
7181   };
7182   llvm::Function *Fn;
7183   llvm::Constant *Addr;
7184   // Emit target region as a standalone region.
7185   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7186       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7187   assert(Fn && Addr && "Target device function emission failed.");
7188 }
7189 
7190 void CodeGenFunction::EmitOMPTargetParallelDirective(
7191     const OMPTargetParallelDirective &S) {
7192   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7193     emitTargetParallelRegion(CGF, S, Action);
7194   };
7195   emitCommonOMPTargetDirective(*this, S, CodeGen);
7196 }
7197 
7198 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7199                                         const OMPTargetParallelForDirective &S,
7200                                         PrePostActionTy &Action) {
7201   Action.Enter(CGF);
7202   // Emit directive as a combined directive that consists of two implicit
7203   // directives: 'parallel' with 'for' directive.
7204   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7205     Action.Enter(CGF);
7206     CodeGenFunction::OMPCancelStackRAII CancelRegion(
7207         CGF, OMPD_target_parallel_for, S.hasCancel());
7208     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7209                                emitDispatchForLoopBounds);
7210   };
7211   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7212                                  emitEmptyBoundParameters);
7213 }
7214 
7215 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7216     CodeGenModule &CGM, StringRef ParentName,
7217     const OMPTargetParallelForDirective &S) {
7218   // Emit SPMD target parallel for region as a standalone region.
7219   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7220     emitTargetParallelForRegion(CGF, S, Action);
7221   };
7222   llvm::Function *Fn;
7223   llvm::Constant *Addr;
7224   // Emit target region as a standalone region.
7225   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7226       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7227   assert(Fn && Addr && "Target device function emission failed.");
7228 }
7229 
7230 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7231     const OMPTargetParallelForDirective &S) {
7232   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7233     emitTargetParallelForRegion(CGF, S, Action);
7234   };
7235   emitCommonOMPTargetDirective(*this, S, CodeGen);
7236 }
7237 
7238 static void
7239 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7240                                 const OMPTargetParallelForSimdDirective &S,
7241                                 PrePostActionTy &Action) {
7242   Action.Enter(CGF);
7243   // Emit directive as a combined directive that consists of two implicit
7244   // directives: 'parallel' with 'for' directive.
7245   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7246     Action.Enter(CGF);
7247     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7248                                emitDispatchForLoopBounds);
7249   };
7250   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
7251                                  emitEmptyBoundParameters);
7252 }
7253 
7254 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7255     CodeGenModule &CGM, StringRef ParentName,
7256     const OMPTargetParallelForSimdDirective &S) {
7257   // Emit SPMD target parallel for region as a standalone region.
7258   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7259     emitTargetParallelForSimdRegion(CGF, S, Action);
7260   };
7261   llvm::Function *Fn;
7262   llvm::Constant *Addr;
7263   // Emit target region as a standalone region.
7264   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7265       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7266   assert(Fn && Addr && "Target device function emission failed.");
7267 }
7268 
7269 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7270     const OMPTargetParallelForSimdDirective &S) {
7271   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7272     emitTargetParallelForSimdRegion(CGF, S, Action);
7273   };
7274   emitCommonOMPTargetDirective(*this, S, CodeGen);
7275 }
7276 
7277 /// Emit a helper variable and return corresponding lvalue.
7278 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7279                      const ImplicitParamDecl *PVD,
7280                      CodeGenFunction::OMPPrivateScope &Privates) {
7281   const auto *VDecl = cast<VarDecl>(Helper->getDecl());
7282   Privates.addPrivate(VDecl,
7283                       [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
7284 }
7285 
7286 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7287   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7288   // Emit outlined function for task construct.
7289   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
7290   Address CapturedStruct = Address::invalid();
7291   {
7292     OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7293     CapturedStruct = GenerateCapturedStmtArgument(*CS);
7294   }
7295   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
7296   const Expr *IfCond = nullptr;
7297   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7298     if (C->getNameModifier() == OMPD_unknown ||
7299         C->getNameModifier() == OMPD_taskloop) {
7300       IfCond = C->getCondition();
7301       break;
7302     }
7303   }
7304 
7305   OMPTaskDataTy Data;
7306   // Check if taskloop must be emitted without taskgroup.
7307   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7308   // TODO: Check if we should emit tied or untied task.
7309   Data.Tied = true;
7310   // Set scheduling for taskloop
7311   if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7312     // grainsize clause
7313     Data.Schedule.setInt(/*IntVal=*/false);
7314     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
7315   } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7316     // num_tasks clause
7317     Data.Schedule.setInt(/*IntVal=*/true);
7318     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
7319   }
7320 
7321   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7322     // if (PreCond) {
7323     //   for (IV in 0..LastIteration) BODY;
7324     //   <Final counter/linear vars updates>;
7325     // }
7326     //
7327 
7328     // Emit: if (PreCond) - begin.
7329     // If the condition constant folds and can be elided, avoid emitting the
7330     // whole loop.
7331     bool CondConstant;
7332     llvm::BasicBlock *ContBlock = nullptr;
7333     OMPLoopScope PreInitScope(CGF, S);
7334     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
7335       if (!CondConstant)
7336         return;
7337     } else {
7338       llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
7339       ContBlock = CGF.createBasicBlock("taskloop.if.end");
7340       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
7341                   CGF.getProfileCount(&S));
7342       CGF.EmitBlock(ThenBlock);
7343       CGF.incrementProfileCounter(&S);
7344     }
7345 
7346     (void)CGF.EmitOMPLinearClauseInit(S);
7347 
7348     OMPPrivateScope LoopScope(CGF);
7349     // Emit helper vars inits.
7350     enum { LowerBound = 5, UpperBound, Stride, LastIter };
7351     auto *I = CS->getCapturedDecl()->param_begin();
7352     auto *LBP = std::next(I, LowerBound);
7353     auto *UBP = std::next(I, UpperBound);
7354     auto *STP = std::next(I, Stride);
7355     auto *LIP = std::next(I, LastIter);
7356     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
7357              LoopScope);
7358     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
7359              LoopScope);
7360     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
7361     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
7362              LoopScope);
7363     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7364     CGF.EmitOMPLinearClause(S, LoopScope);
7365     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
7366     (void)LoopScope.Privatize();
7367     // Emit the loop iteration variable.
7368     const Expr *IVExpr = S.getIterationVariable();
7369     const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
7370     CGF.EmitVarDecl(*IVDecl);
7371     CGF.EmitIgnoredExpr(S.getInit());
7372 
7373     // Emit the iterations count variable.
7374     // If it is not a variable, Sema decided to calculate iterations count on
7375     // each iteration (e.g., it is foldable into a constant).
7376     if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
7377       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
7378       // Emit calculation of the iterations count.
7379       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
7380     }
7381 
7382     {
7383       OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7384       emitCommonSimdLoop(
7385           CGF, S,
7386           [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7387             if (isOpenMPSimdDirective(S.getDirectiveKind()))
7388               CGF.EmitOMPSimdInit(S);
7389           },
7390           [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7391             CGF.EmitOMPInnerLoop(
7392                 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
7393                 [&S](CodeGenFunction &CGF) {
7394                   emitOMPLoopBodyWithStopPoint(CGF, S,
7395                                                CodeGenFunction::JumpDest());
7396                 },
7397                 [](CodeGenFunction &) {});
7398           });
7399     }
7400     // Emit: if (PreCond) - end.
7401     if (ContBlock) {
7402       CGF.EmitBranch(ContBlock);
7403       CGF.EmitBlock(ContBlock, true);
7404     }
7405     // Emit final copy of the lastprivate variables if IsLastIter != 0.
7406     if (HasLastprivateClause) {
7407       CGF.EmitOMPLastprivateClauseFinal(
7408           S, isOpenMPSimdDirective(S.getDirectiveKind()),
7409           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
7410               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7411               (*LIP)->getType(), S.getBeginLoc())));
7412     }
7413     CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
7414       return CGF.Builder.CreateIsNotNull(
7415           CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7416                                (*LIP)->getType(), S.getBeginLoc()));
7417     });
7418   };
7419   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
7420                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
7421                             const OMPTaskDataTy &Data) {
7422     auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
7423                       &Data](CodeGenFunction &CGF, PrePostActionTy &) {
7424       OMPLoopScope PreInitScope(CGF, S);
7425       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
7426                                                   OutlinedFn, SharedsTy,
7427                                                   CapturedStruct, IfCond, Data);
7428     };
7429     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
7430                                                     CodeGen);
7431   };
7432   if (Data.Nogroup) {
7433     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
7434   } else {
7435     CGM.getOpenMPRuntime().emitTaskgroupRegion(
7436         *this,
7437         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7438                                         PrePostActionTy &Action) {
7439           Action.Enter(CGF);
7440           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
7441                                         Data);
7442         },
7443         S.getBeginLoc());
7444   }
7445 }
7446 
7447 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7448   auto LPCRegion =
7449       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7450   EmitOMPTaskLoopBasedDirective(S);
7451 }
7452 
7453 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7454     const OMPTaskLoopSimdDirective &S) {
7455   auto LPCRegion =
7456       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7457   OMPLexicalScope Scope(*this, S);
7458   EmitOMPTaskLoopBasedDirective(S);
7459 }
7460 
7461 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7462     const OMPMasterTaskLoopDirective &S) {
7463   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7464     Action.Enter(CGF);
7465     EmitOMPTaskLoopBasedDirective(S);
7466   };
7467   auto LPCRegion =
7468       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7469   OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
7470   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7471 }
7472 
7473 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7474     const OMPMasterTaskLoopSimdDirective &S) {
7475   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7476     Action.Enter(CGF);
7477     EmitOMPTaskLoopBasedDirective(S);
7478   };
7479   auto LPCRegion =
7480       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7481   OMPLexicalScope Scope(*this, S);
7482   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7483 }
7484 
7485 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7486     const OMPParallelMasterTaskLoopDirective &S) {
7487   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7488     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7489                                   PrePostActionTy &Action) {
7490       Action.Enter(CGF);
7491       CGF.EmitOMPTaskLoopBasedDirective(S);
7492     };
7493     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7494     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7495                                             S.getBeginLoc());
7496   };
7497   auto LPCRegion =
7498       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7499   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7500                                  emitEmptyBoundParameters);
7501 }
7502 
7503 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7504     const OMPParallelMasterTaskLoopSimdDirective &S) {
7505   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7506     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7507                                   PrePostActionTy &Action) {
7508       Action.Enter(CGF);
7509       CGF.EmitOMPTaskLoopBasedDirective(S);
7510     };
7511     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7512     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7513                                             S.getBeginLoc());
7514   };
7515   auto LPCRegion =
7516       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7517   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7518                                  emitEmptyBoundParameters);
7519 }
7520 
7521 // Generate the instructions for '#pragma omp target update' directive.
7522 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7523     const OMPTargetUpdateDirective &S) {
7524   // If we don't have target devices, don't bother emitting the data mapping
7525   // code.
7526   if (CGM.getLangOpts().OMPTargetTriples.empty())
7527     return;
7528 
7529   // Check if we have any if clause associated with the directive.
7530   const Expr *IfCond = nullptr;
7531   if (const auto *C = S.getSingleClause<OMPIfClause>())
7532     IfCond = C->getCondition();
7533 
7534   // Check if we have any device clause associated with the directive.
7535   const Expr *Device = nullptr;
7536   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7537     Device = C->getDevice();
7538 
7539   OMPLexicalScope Scope(*this, S, OMPD_task);
7540   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7541 }
7542 
7543 void CodeGenFunction::EmitOMPGenericLoopDirective(
7544     const OMPGenericLoopDirective &S) {
7545   // Unimplemented, just inline the underlying statement for now.
7546   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7547     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
7548   };
7549   OMPLexicalScope Scope(*this, S, OMPD_unknown);
7550   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
7551 }
7552 
7553 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
7554     const OMPExecutableDirective &D) {
7555   if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
7556     EmitOMPScanDirective(*SD);
7557     return;
7558   }
7559   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
7560     return;
7561   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
7562     OMPPrivateScope GlobalsScope(CGF);
7563     if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
7564       // Capture global firstprivates to avoid crash.
7565       for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
7566         for (const Expr *Ref : C->varlists()) {
7567           const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
7568           if (!DRE)
7569             continue;
7570           const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
7571           if (!VD || VD->hasLocalStorage())
7572             continue;
7573           if (!CGF.LocalDeclMap.count(VD)) {
7574             LValue GlobLVal = CGF.EmitLValue(Ref);
7575             GlobalsScope.addPrivate(
7576                 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
7577           }
7578         }
7579       }
7580     }
7581     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
7582       (void)GlobalsScope.Privatize();
7583       ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
7584       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
7585     } else {
7586       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
7587         for (const Expr *E : LD->counters()) {
7588           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
7589           if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
7590             LValue GlobLVal = CGF.EmitLValue(E);
7591             GlobalsScope.addPrivate(
7592                 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
7593           }
7594           if (isa<OMPCapturedExprDecl>(VD)) {
7595             // Emit only those that were not explicitly referenced in clauses.
7596             if (!CGF.LocalDeclMap.count(VD))
7597               CGF.EmitVarDecl(*VD);
7598           }
7599         }
7600         for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
7601           if (!C->getNumForLoops())
7602             continue;
7603           for (unsigned I = LD->getLoopsNumber(),
7604                         E = C->getLoopNumIterations().size();
7605                I < E; ++I) {
7606             if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
7607                     cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
7608               // Emit only those that were not explicitly referenced in clauses.
7609               if (!CGF.LocalDeclMap.count(VD))
7610                 CGF.EmitVarDecl(*VD);
7611             }
7612           }
7613         }
7614       }
7615       (void)GlobalsScope.Privatize();
7616       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
7617     }
7618   };
7619   if (D.getDirectiveKind() == OMPD_atomic ||
7620       D.getDirectiveKind() == OMPD_critical ||
7621       D.getDirectiveKind() == OMPD_section ||
7622       D.getDirectiveKind() == OMPD_master ||
7623       D.getDirectiveKind() == OMPD_masked) {
7624     EmitStmt(D.getAssociatedStmt());
7625   } else {
7626     auto LPCRegion =
7627         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
7628     OMPSimdLexicalScope Scope(*this, D);
7629     CGM.getOpenMPRuntime().emitInlinedDirective(
7630         *this,
7631         isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
7632                                                     : D.getDirectiveKind(),
7633         CodeGen);
7634   }
7635   // Check for outer lastprivate conditional update.
7636   checkForLastprivateConditionalUpdate(*this, D);
7637 }
7638