1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(
57       CodeGenFunction &CGF, const OMPExecutableDirective &S,
58       const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
59       const bool EmitPreInitStmt = true)
60       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
61         InlinedShareds(CGF) {
62     if (EmitPreInitStmt)
63       emitPreInitStmt(CGF, S);
64     if (!CapturedRegion.hasValue())
65       return;
66     assert(S.hasAssociatedStmt() &&
67            "Expected associated statement for inlined directive.");
68     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
69     for (auto &C : CS->captures()) {
70       if (C.capturesVariable() || C.capturesVariableByCopy()) {
71         auto *VD = C.getCapturedVar();
72         assert(VD == VD->getCanonicalDecl() &&
73                "Canonical decl must be captured.");
74         DeclRefExpr DRE(
75             const_cast<VarDecl *>(VD),
76             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
77                                        InlinedShareds.isGlobalVarCaptured(VD)),
78             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
79         InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
80           return CGF.EmitLValue(&DRE).getAddress();
81         });
82       }
83     }
84     (void)InlinedShareds.Privatize();
85   }
86 };
87 
88 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
89 /// for captured expressions.
90 class OMPParallelScope final : public OMPLexicalScope {
91   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
92     OpenMPDirectiveKind Kind = S.getDirectiveKind();
93     return !(isOpenMPTargetExecutionDirective(Kind) ||
94              isOpenMPLoopBoundSharingDirective(Kind)) &&
95            isOpenMPParallelDirective(Kind);
96   }
97 
98 public:
99   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
100       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
101                         EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
116                         EmitPreInitStmt(S)) {}
117 };
118 
119 /// Private scope for OpenMP loop-based directives, that supports capturing
120 /// of used expression from loop statement.
121 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
122   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
123     CodeGenFunction::OMPMapVars PreCondVars;
124     for (auto *E : S.counters()) {
125       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
126       (void)PreCondVars.setVarAddr(
127           CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
128     }
129     (void)PreCondVars.apply(CGF);
130     if (auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
131       for (const auto *I : PreInits->decls())
132         CGF.EmitVarDecl(cast<VarDecl>(*I));
133     }
134     PreCondVars.restore(CGF);
135   }
136 
137 public:
138   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
139       : CodeGenFunction::RunCleanupsScope(CGF) {
140     emitPreInitStmt(CGF, S);
141   }
142 };
143 
144 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
145   CodeGenFunction::OMPPrivateScope InlinedShareds;
146 
147   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
148     return CGF.LambdaCaptureFields.lookup(VD) ||
149            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
150            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
151             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
152   }
153 
154 public:
155   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
156       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
157         InlinedShareds(CGF) {
158     for (const auto *C : S.clauses()) {
159       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
160         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
161           for (const auto *I : PreInit->decls()) {
162             if (!I->hasAttr<OMPCaptureNoInitAttr>())
163               CGF.EmitVarDecl(cast<VarDecl>(*I));
164             else {
165               CodeGenFunction::AutoVarEmission Emission =
166                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
167               CGF.EmitAutoVarCleanups(Emission);
168             }
169           }
170         }
171       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
172         for (const Expr *E : UDP->varlists()) {
173           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
174           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
175             CGF.EmitVarDecl(*OED);
176         }
177       }
178     }
179     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
180       CGF.EmitOMPPrivateClause(S, InlinedShareds);
181     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
182       if (const Expr *E = TG->getReductionRef())
183         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
184     }
185     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
186     while (CS) {
187       for (auto &C : CS->captures()) {
188         if (C.capturesVariable() || C.capturesVariableByCopy()) {
189           auto *VD = C.getCapturedVar();
190           assert(VD == VD->getCanonicalDecl() &&
191                  "Canonical decl must be captured.");
192           DeclRefExpr DRE(const_cast<VarDecl *>(VD),
193                           isCapturedVar(CGF, VD) ||
194                               (CGF.CapturedStmtInfo &&
195                                InlinedShareds.isGlobalVarCaptured(VD)),
196                           VD->getType().getNonReferenceType(), VK_LValue,
197                           C.getLocation());
198           InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
199             return CGF.EmitLValue(&DRE).getAddress();
200           });
201         }
202       }
203       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
204     }
205     (void)InlinedShareds.Privatize();
206   }
207 };
208 
209 } // namespace
210 
211 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
212                                          const OMPExecutableDirective &S,
213                                          const RegionCodeGenTy &CodeGen);
214 
215 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
216   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
217     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
218       OrigVD = OrigVD->getCanonicalDecl();
219       bool IsCaptured =
220           LambdaCaptureFields.lookup(OrigVD) ||
221           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
222           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
223       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
224                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
225       return EmitLValue(&DRE);
226     }
227   }
228   return EmitLValue(E);
229 }
230 
231 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
232   auto &C = getContext();
233   llvm::Value *Size = nullptr;
234   auto SizeInChars = C.getTypeSizeInChars(Ty);
235   if (SizeInChars.isZero()) {
236     // getTypeSizeInChars() returns 0 for a VLA.
237     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
238       auto VlaSize = getVLASize(VAT);
239       Ty = VlaSize.Type;
240       Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
241                   : VlaSize.NumElts;
242     }
243     SizeInChars = C.getTypeSizeInChars(Ty);
244     if (SizeInChars.isZero())
245       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
246     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
247   } else
248     Size = CGM.getSize(SizeInChars);
249   return Size;
250 }
251 
252 void CodeGenFunction::GenerateOpenMPCapturedVars(
253     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
254   const RecordDecl *RD = S.getCapturedRecordDecl();
255   auto CurField = RD->field_begin();
256   auto CurCap = S.captures().begin();
257   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
258                                                  E = S.capture_init_end();
259        I != E; ++I, ++CurField, ++CurCap) {
260     if (CurField->hasCapturedVLAType()) {
261       auto VAT = CurField->getCapturedVLAType();
262       auto *Val = VLASizeMap[VAT->getSizeExpr()];
263       CapturedVars.push_back(Val);
264     } else if (CurCap->capturesThis())
265       CapturedVars.push_back(CXXThisValue);
266     else if (CurCap->capturesVariableByCopy()) {
267       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
268 
269       // If the field is not a pointer, we need to save the actual value
270       // and load it as a void pointer.
271       if (!CurField->getType()->isAnyPointerType()) {
272         auto &Ctx = getContext();
273         auto DstAddr = CreateMemTemp(
274             Ctx.getUIntPtrType(),
275             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
276         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
277 
278         auto *SrcAddrVal = EmitScalarConversion(
279             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
280             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
281         LValue SrcLV =
282             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
283 
284         // Store the value using the source type pointer.
285         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
286 
287         // Load the value using the destination type pointer.
288         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
289       }
290       CapturedVars.push_back(CV);
291     } else {
292       assert(CurCap->capturesVariable() && "Expected capture by reference.");
293       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
294     }
295   }
296 }
297 
298 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
299                                     QualType DstType, StringRef Name,
300                                     LValue AddrLV,
301                                     bool isReferenceType = false) {
302   ASTContext &Ctx = CGF.getContext();
303 
304   auto *CastedPtr = CGF.EmitScalarConversion(AddrLV.getAddress().getPointer(),
305                                              Ctx.getUIntPtrType(),
306                                              Ctx.getPointerType(DstType), Loc);
307   auto TmpAddr =
308       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
309           .getAddress();
310 
311   // If we are dealing with references we need to return the address of the
312   // reference instead of the reference of the value.
313   if (isReferenceType) {
314     QualType RefType = Ctx.getLValueReferenceType(DstType);
315     auto *RefVal = TmpAddr.getPointer();
316     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
317     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
318     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
319   }
320 
321   return TmpAddr;
322 }
323 
324 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
325   if (T->isLValueReferenceType()) {
326     return C.getLValueReferenceType(
327         getCanonicalParamType(C, T.getNonReferenceType()),
328         /*SpelledAsLValue=*/false);
329   }
330   if (T->isPointerType())
331     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
332   if (auto *A = T->getAsArrayTypeUnsafe()) {
333     if (auto *VLA = dyn_cast<VariableArrayType>(A))
334       return getCanonicalParamType(C, VLA->getElementType());
335     else if (!A->isVariablyModifiedType())
336       return C.getCanonicalType(T);
337   }
338   return C.getCanonicalParamType(T);
339 }
340 
341 namespace {
342   /// Contains required data for proper outlined function codegen.
343   struct FunctionOptions {
344     /// Captured statement for which the function is generated.
345     const CapturedStmt *S = nullptr;
346     /// true if cast to/from  UIntPtr is required for variables captured by
347     /// value.
348     const bool UIntPtrCastRequired = true;
349     /// true if only casted arguments must be registered as local args or VLA
350     /// sizes.
351     const bool RegisterCastedArgsOnly = false;
352     /// Name of the generated function.
353     const StringRef FunctionName;
354     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
355                              bool RegisterCastedArgsOnly,
356                              StringRef FunctionName)
357         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
358           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
359           FunctionName(FunctionName) {}
360   };
361 }
362 
363 static llvm::Function *emitOutlinedFunctionPrologue(
364     CodeGenFunction &CGF, FunctionArgList &Args,
365     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
366         &LocalAddrs,
367     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
368         &VLASizes,
369     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
370   const CapturedDecl *CD = FO.S->getCapturedDecl();
371   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
372   assert(CD->hasBody() && "missing CapturedDecl body");
373 
374   CXXThisValue = nullptr;
375   // Build the argument list.
376   CodeGenModule &CGM = CGF.CGM;
377   ASTContext &Ctx = CGM.getContext();
378   FunctionArgList TargetArgs;
379   Args.append(CD->param_begin(),
380               std::next(CD->param_begin(), CD->getContextParamPosition()));
381   TargetArgs.append(
382       CD->param_begin(),
383       std::next(CD->param_begin(), CD->getContextParamPosition()));
384   auto I = FO.S->captures().begin();
385   FunctionDecl *DebugFunctionDecl = nullptr;
386   if (!FO.UIntPtrCastRequired) {
387     FunctionProtoType::ExtProtoInfo EPI;
388     DebugFunctionDecl = FunctionDecl::Create(
389         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
390         SourceLocation(), DeclarationName(), Ctx.VoidTy,
391         Ctx.getTrivialTypeSourceInfo(
392             Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
393         SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
394   }
395   for (auto *FD : RD->fields()) {
396     QualType ArgType = FD->getType();
397     IdentifierInfo *II = nullptr;
398     VarDecl *CapVar = nullptr;
399 
400     // If this is a capture by copy and the type is not a pointer, the outlined
401     // function argument type should be uintptr and the value properly casted to
402     // uintptr. This is necessary given that the runtime library is only able to
403     // deal with pointers. We can pass in the same way the VLA type sizes to the
404     // outlined function.
405     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
406         I->capturesVariableArrayType()) {
407       if (FO.UIntPtrCastRequired)
408         ArgType = Ctx.getUIntPtrType();
409     }
410 
411     if (I->capturesVariable() || I->capturesVariableByCopy()) {
412       CapVar = I->getCapturedVar();
413       II = CapVar->getIdentifier();
414     } else if (I->capturesThis())
415       II = &Ctx.Idents.get("this");
416     else {
417       assert(I->capturesVariableArrayType());
418       II = &Ctx.Idents.get("vla");
419     }
420     if (ArgType->isVariablyModifiedType())
421       ArgType = getCanonicalParamType(Ctx, ArgType);
422     VarDecl *Arg;
423     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
424       Arg = ParmVarDecl::Create(
425           Ctx, DebugFunctionDecl,
426           CapVar ? CapVar->getLocStart() : FD->getLocStart(),
427           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
428           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
429     } else {
430       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
431                                       II, ArgType, ImplicitParamDecl::Other);
432     }
433     Args.emplace_back(Arg);
434     // Do not cast arguments if we emit function with non-original types.
435     TargetArgs.emplace_back(
436         FO.UIntPtrCastRequired
437             ? Arg
438             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
439     ++I;
440   }
441   Args.append(
442       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
443       CD->param_end());
444   TargetArgs.append(
445       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
446       CD->param_end());
447 
448   // Create the function declaration.
449   const CGFunctionInfo &FuncInfo =
450       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
451   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
452 
453   llvm::Function *F =
454       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
455                              FO.FunctionName, &CGM.getModule());
456   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
457   if (CD->isNothrow())
458     F->setDoesNotThrow();
459 
460   // Generate the function.
461   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
462                     FO.S->getLocStart(), CD->getBody()->getLocStart());
463   unsigned Cnt = CD->getContextParamPosition();
464   I = FO.S->captures().begin();
465   for (auto *FD : RD->fields()) {
466     // Do not map arguments if we emit function with non-original types.
467     Address LocalAddr(Address::invalid());
468     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
469       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
470                                                              TargetArgs[Cnt]);
471     } else {
472       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
473     }
474     // If we are capturing a pointer by copy we don't need to do anything, just
475     // use the value that we get from the arguments.
476     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
477       const VarDecl *CurVD = I->getCapturedVar();
478       // If the variable is a reference we need to materialize it here.
479       if (CurVD->getType()->isReferenceType()) {
480         Address RefAddr = CGF.CreateMemTemp(
481             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
482         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
483                               /*Volatile=*/false, CurVD->getType());
484         LocalAddr = RefAddr;
485       }
486       if (!FO.RegisterCastedArgsOnly)
487         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
488       ++Cnt;
489       ++I;
490       continue;
491     }
492 
493     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
494                                         AlignmentSource::Decl);
495     if (FD->hasCapturedVLAType()) {
496       if (FO.UIntPtrCastRequired) {
497         ArgLVal = CGF.MakeAddrLValue(
498             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
499                                  Args[Cnt]->getName(), ArgLVal),
500             FD->getType(), AlignmentSource::Decl);
501       }
502       auto *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
503       auto VAT = FD->getCapturedVLAType();
504       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
505     } else if (I->capturesVariable()) {
506       auto *Var = I->getCapturedVar();
507       QualType VarTy = Var->getType();
508       Address ArgAddr = ArgLVal.getAddress();
509       if (!VarTy->isReferenceType()) {
510         if (ArgLVal.getType()->isLValueReferenceType()) {
511           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
512         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
513           assert(ArgLVal.getType()->isPointerType());
514           ArgAddr = CGF.EmitLoadOfPointer(
515               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
516         }
517       }
518       if (!FO.RegisterCastedArgsOnly) {
519         LocalAddrs.insert(
520             {Args[Cnt],
521              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
522       }
523     } else if (I->capturesVariableByCopy()) {
524       assert(!FD->getType()->isAnyPointerType() &&
525              "Not expecting a captured pointer.");
526       auto *Var = I->getCapturedVar();
527       QualType VarTy = Var->getType();
528       LocalAddrs.insert(
529           {Args[Cnt],
530            {Var, FO.UIntPtrCastRequired
531                      ? castValueFromUintptr(CGF, I->getLocation(),
532                                             FD->getType(), Args[Cnt]->getName(),
533                                             ArgLVal, VarTy->isReferenceType())
534                      : ArgLVal.getAddress()}});
535     } else {
536       // If 'this' is captured, load it into CXXThisValue.
537       assert(I->capturesThis());
538       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
539       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
540     }
541     ++Cnt;
542     ++I;
543   }
544 
545   return F;
546 }
547 
548 llvm::Function *
549 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
550   assert(
551       CapturedStmtInfo &&
552       "CapturedStmtInfo should be set when generating the captured function");
553   const CapturedDecl *CD = S.getCapturedDecl();
554   // Build the argument list.
555   bool NeedWrapperFunction =
556       getDebugInfo() &&
557       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
558   FunctionArgList Args;
559   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
560   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
561   SmallString<256> Buffer;
562   llvm::raw_svector_ostream Out(Buffer);
563   Out << CapturedStmtInfo->getHelperName();
564   if (NeedWrapperFunction)
565     Out << "_debug__";
566   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
567                      Out.str());
568   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
569                                                    VLASizes, CXXThisValue, FO);
570   for (const auto &LocalAddrPair : LocalAddrs) {
571     if (LocalAddrPair.second.first) {
572       setAddrOfLocalVar(LocalAddrPair.second.first,
573                         LocalAddrPair.second.second);
574     }
575   }
576   for (const auto &VLASizePair : VLASizes)
577     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
578   PGO.assignRegionCounters(GlobalDecl(CD), F);
579   CapturedStmtInfo->EmitBody(*this, CD->getBody());
580   FinishFunction(CD->getBodyRBrace());
581   if (!NeedWrapperFunction)
582     return F;
583 
584   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
585                             /*RegisterCastedArgsOnly=*/true,
586                             CapturedStmtInfo->getHelperName());
587   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
588   WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
589   Args.clear();
590   LocalAddrs.clear();
591   VLASizes.clear();
592   llvm::Function *WrapperF =
593       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
594                                    WrapperCGF.CXXThisValue, WrapperFO);
595   llvm::SmallVector<llvm::Value *, 4> CallArgs;
596   for (const auto *Arg : Args) {
597     llvm::Value *CallArg;
598     auto I = LocalAddrs.find(Arg);
599     if (I != LocalAddrs.end()) {
600       LValue LV = WrapperCGF.MakeAddrLValue(
601           I->second.second,
602           I->second.first ? I->second.first->getType() : Arg->getType(),
603           AlignmentSource::Decl);
604       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
605     } else {
606       auto EI = VLASizes.find(Arg);
607       if (EI != VLASizes.end())
608         CallArg = EI->second.second;
609       else {
610         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
611                                               Arg->getType(),
612                                               AlignmentSource::Decl);
613         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
614       }
615     }
616     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
617   }
618   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
619                                                   F, CallArgs);
620   WrapperCGF.FinishFunction();
621   return WrapperF;
622 }
623 
624 //===----------------------------------------------------------------------===//
625 //                              OpenMP Directive Emission
626 //===----------------------------------------------------------------------===//
627 void CodeGenFunction::EmitOMPAggregateAssign(
628     Address DestAddr, Address SrcAddr, QualType OriginalType,
629     const llvm::function_ref<void(Address, Address)> &CopyGen) {
630   // Perform element-by-element initialization.
631   QualType ElementTy;
632 
633   // Drill down to the base element type on both arrays.
634   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
635   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
636   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
637 
638   auto SrcBegin = SrcAddr.getPointer();
639   auto DestBegin = DestAddr.getPointer();
640   // Cast from pointer to array type to pointer to single element.
641   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
642   // The basic structure here is a while-do loop.
643   auto BodyBB = createBasicBlock("omp.arraycpy.body");
644   auto DoneBB = createBasicBlock("omp.arraycpy.done");
645   auto IsEmpty =
646       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
647   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
648 
649   // Enter the loop body, making that address the current address.
650   auto EntryBB = Builder.GetInsertBlock();
651   EmitBlock(BodyBB);
652 
653   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
654 
655   llvm::PHINode *SrcElementPHI =
656     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
657   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
658   Address SrcElementCurrent =
659       Address(SrcElementPHI,
660               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
661 
662   llvm::PHINode *DestElementPHI =
663     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
664   DestElementPHI->addIncoming(DestBegin, EntryBB);
665   Address DestElementCurrent =
666     Address(DestElementPHI,
667             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
668 
669   // Emit copy.
670   CopyGen(DestElementCurrent, SrcElementCurrent);
671 
672   // Shift the address forward by one element.
673   auto DestElementNext = Builder.CreateConstGEP1_32(
674       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
675   auto SrcElementNext = Builder.CreateConstGEP1_32(
676       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
677   // Check whether we've reached the end.
678   auto Done =
679       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
680   Builder.CreateCondBr(Done, DoneBB, BodyBB);
681   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
682   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
683 
684   // Done.
685   EmitBlock(DoneBB, /*IsFinished=*/true);
686 }
687 
688 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
689                                   Address SrcAddr, const VarDecl *DestVD,
690                                   const VarDecl *SrcVD, const Expr *Copy) {
691   if (OriginalType->isArrayType()) {
692     auto *BO = dyn_cast<BinaryOperator>(Copy);
693     if (BO && BO->getOpcode() == BO_Assign) {
694       // Perform simple memcpy for simple copying.
695       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
696       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
697       EmitAggregateAssign(Dest, Src, OriginalType);
698     } else {
699       // For arrays with complex element types perform element by element
700       // copying.
701       EmitOMPAggregateAssign(
702           DestAddr, SrcAddr, OriginalType,
703           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
704             // Working with the single array element, so have to remap
705             // destination and source variables to corresponding array
706             // elements.
707             CodeGenFunction::OMPPrivateScope Remap(*this);
708             Remap.addPrivate(DestVD, [DestElement]() -> Address {
709               return DestElement;
710             });
711             Remap.addPrivate(
712                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
713             (void)Remap.Privatize();
714             EmitIgnoredExpr(Copy);
715           });
716     }
717   } else {
718     // Remap pseudo source variable to private copy.
719     CodeGenFunction::OMPPrivateScope Remap(*this);
720     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
721     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
722     (void)Remap.Privatize();
723     // Emit copying of the whole variable.
724     EmitIgnoredExpr(Copy);
725   }
726 }
727 
728 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
729                                                 OMPPrivateScope &PrivateScope) {
730   if (!HaveInsertPoint())
731     return false;
732   bool FirstprivateIsLastprivate = false;
733   llvm::DenseSet<const VarDecl *> Lastprivates;
734   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
735     for (const auto *D : C->varlists())
736       Lastprivates.insert(
737           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
738   }
739   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
740   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
741   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
742   // Force emission of the firstprivate copy if the directive does not emit
743   // outlined function, like omp for, omp simd, omp distribute etc.
744   bool MustEmitFirstprivateCopy =
745       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
746   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
747     auto IRef = C->varlist_begin();
748     auto InitsRef = C->inits().begin();
749     for (auto IInit : C->private_copies()) {
750       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
751       bool ThisFirstprivateIsLastprivate =
752           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
753       auto *FD = CapturedStmtInfo->lookup(OrigVD);
754       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
755           !FD->getType()->isReferenceType()) {
756         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
757         ++IRef;
758         ++InitsRef;
759         continue;
760       }
761       FirstprivateIsLastprivate =
762           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
763       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
764         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
765         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
766         bool IsRegistered;
767         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
768                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
769                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
770         LValue OriginalLVal = EmitLValue(&DRE);
771         Address OriginalAddr = OriginalLVal.getAddress();
772         QualType Type = VD->getType();
773         if (Type->isArrayType()) {
774           // Emit VarDecl with copy init for arrays.
775           // Get the address of the original variable captured in current
776           // captured region.
777           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
778             auto Emission = EmitAutoVarAlloca(*VD);
779             auto *Init = VD->getInit();
780             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
781               // Perform simple memcpy.
782               LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(),
783                                            Type);
784               EmitAggregateAssign(Dest, OriginalLVal, Type);
785             } else {
786               EmitOMPAggregateAssign(
787                   Emission.getAllocatedAddress(), OriginalAddr, Type,
788                   [this, VDInit, Init](Address DestElement,
789                                        Address SrcElement) {
790                     // Clean up any temporaries needed by the initialization.
791                     RunCleanupsScope InitScope(*this);
792                     // Emit initialization for single element.
793                     setAddrOfLocalVar(VDInit, SrcElement);
794                     EmitAnyExprToMem(Init, DestElement,
795                                      Init->getType().getQualifiers(),
796                                      /*IsInitializer*/ false);
797                     LocalDeclMap.erase(VDInit);
798                   });
799             }
800             EmitAutoVarCleanups(Emission);
801             return Emission.getAllocatedAddress();
802           });
803         } else {
804           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
805             // Emit private VarDecl with copy init.
806             // Remap temp VDInit variable to the address of the original
807             // variable
808             // (for proper handling of captured global variables).
809             setAddrOfLocalVar(VDInit, OriginalAddr);
810             EmitDecl(*VD);
811             LocalDeclMap.erase(VDInit);
812             return GetAddrOfLocalVar(VD);
813           });
814         }
815         assert(IsRegistered &&
816                "firstprivate var already registered as private");
817         // Silence the warning about unused variable.
818         (void)IsRegistered;
819       }
820       ++IRef;
821       ++InitsRef;
822     }
823   }
824   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
825 }
826 
827 void CodeGenFunction::EmitOMPPrivateClause(
828     const OMPExecutableDirective &D,
829     CodeGenFunction::OMPPrivateScope &PrivateScope) {
830   if (!HaveInsertPoint())
831     return;
832   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
833   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
834     auto IRef = C->varlist_begin();
835     for (auto IInit : C->private_copies()) {
836       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
837       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
838         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
839         bool IsRegistered =
840             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
841               // Emit private VarDecl with copy init.
842               EmitDecl(*VD);
843               return GetAddrOfLocalVar(VD);
844             });
845         assert(IsRegistered && "private var already registered as private");
846         // Silence the warning about unused variable.
847         (void)IsRegistered;
848       }
849       ++IRef;
850     }
851   }
852 }
853 
854 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
855   if (!HaveInsertPoint())
856     return false;
857   // threadprivate_var1 = master_threadprivate_var1;
858   // operator=(threadprivate_var2, master_threadprivate_var2);
859   // ...
860   // __kmpc_barrier(&loc, global_tid);
861   llvm::DenseSet<const VarDecl *> CopiedVars;
862   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
863   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
864     auto IRef = C->varlist_begin();
865     auto ISrcRef = C->source_exprs().begin();
866     auto IDestRef = C->destination_exprs().begin();
867     for (auto *AssignOp : C->assignment_ops()) {
868       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
869       QualType Type = VD->getType();
870       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
871         // Get the address of the master variable. If we are emitting code with
872         // TLS support, the address is passed from the master as field in the
873         // captured declaration.
874         Address MasterAddr = Address::invalid();
875         if (getLangOpts().OpenMPUseTLS &&
876             getContext().getTargetInfo().isTLSSupported()) {
877           assert(CapturedStmtInfo->lookup(VD) &&
878                  "Copyin threadprivates should have been captured!");
879           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
880                           VK_LValue, (*IRef)->getExprLoc());
881           MasterAddr = EmitLValue(&DRE).getAddress();
882           LocalDeclMap.erase(VD);
883         } else {
884           MasterAddr =
885             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
886                                         : CGM.GetAddrOfGlobal(VD),
887                     getContext().getDeclAlign(VD));
888         }
889         // Get the address of the threadprivate variable.
890         Address PrivateAddr = EmitLValue(*IRef).getAddress();
891         if (CopiedVars.size() == 1) {
892           // At first check if current thread is a master thread. If it is, no
893           // need to copy data.
894           CopyBegin = createBasicBlock("copyin.not.master");
895           CopyEnd = createBasicBlock("copyin.not.master.end");
896           Builder.CreateCondBr(
897               Builder.CreateICmpNE(
898                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
899                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
900               CopyBegin, CopyEnd);
901           EmitBlock(CopyBegin);
902         }
903         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
904         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
905         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
906       }
907       ++IRef;
908       ++ISrcRef;
909       ++IDestRef;
910     }
911   }
912   if (CopyEnd) {
913     // Exit out of copying procedure for non-master thread.
914     EmitBlock(CopyEnd, /*IsFinished=*/true);
915     return true;
916   }
917   return false;
918 }
919 
920 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
921     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
922   if (!HaveInsertPoint())
923     return false;
924   bool HasAtLeastOneLastprivate = false;
925   llvm::DenseSet<const VarDecl *> SIMDLCVs;
926   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
927     auto *LoopDirective = cast<OMPLoopDirective>(&D);
928     for (auto *C : LoopDirective->counters()) {
929       SIMDLCVs.insert(
930           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
931     }
932   }
933   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
934   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
935     HasAtLeastOneLastprivate = true;
936     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
937         !getLangOpts().OpenMPSimd)
938       break;
939     auto IRef = C->varlist_begin();
940     auto IDestRef = C->destination_exprs().begin();
941     for (auto *IInit : C->private_copies()) {
942       // Keep the address of the original variable for future update at the end
943       // of the loop.
944       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
945       // Taskloops do not require additional initialization, it is done in
946       // runtime support library.
947       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
948         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
949         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
950           DeclRefExpr DRE(
951               const_cast<VarDecl *>(OrigVD),
952               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
953                   OrigVD) != nullptr,
954               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
955           return EmitLValue(&DRE).getAddress();
956         });
957         // Check if the variable is also a firstprivate: in this case IInit is
958         // not generated. Initialization of this variable will happen in codegen
959         // for 'firstprivate' clause.
960         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
961           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
962           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
963             // Emit private VarDecl with copy init.
964             EmitDecl(*VD);
965             return GetAddrOfLocalVar(VD);
966           });
967           assert(IsRegistered &&
968                  "lastprivate var already registered as private");
969           (void)IsRegistered;
970         }
971       }
972       ++IRef;
973       ++IDestRef;
974     }
975   }
976   return HasAtLeastOneLastprivate;
977 }
978 
979 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
980     const OMPExecutableDirective &D, bool NoFinals,
981     llvm::Value *IsLastIterCond) {
982   if (!HaveInsertPoint())
983     return;
984   // Emit following code:
985   // if (<IsLastIterCond>) {
986   //   orig_var1 = private_orig_var1;
987   //   ...
988   //   orig_varn = private_orig_varn;
989   // }
990   llvm::BasicBlock *ThenBB = nullptr;
991   llvm::BasicBlock *DoneBB = nullptr;
992   if (IsLastIterCond) {
993     ThenBB = createBasicBlock(".omp.lastprivate.then");
994     DoneBB = createBasicBlock(".omp.lastprivate.done");
995     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
996     EmitBlock(ThenBB);
997   }
998   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
999   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1000   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1001     auto IC = LoopDirective->counters().begin();
1002     for (auto F : LoopDirective->finals()) {
1003       auto *D =
1004           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1005       if (NoFinals)
1006         AlreadyEmittedVars.insert(D);
1007       else
1008         LoopCountersAndUpdates[D] = F;
1009       ++IC;
1010     }
1011   }
1012   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1013     auto IRef = C->varlist_begin();
1014     auto ISrcRef = C->source_exprs().begin();
1015     auto IDestRef = C->destination_exprs().begin();
1016     for (auto *AssignOp : C->assignment_ops()) {
1017       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1018       QualType Type = PrivateVD->getType();
1019       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1020       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1021         // If lastprivate variable is a loop control variable for loop-based
1022         // directive, update its value before copyin back to original
1023         // variable.
1024         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1025           EmitIgnoredExpr(FinalExpr);
1026         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1027         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1028         // Get the address of the original variable.
1029         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1030         // Get the address of the private variable.
1031         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1032         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1033           PrivateAddr =
1034               Address(Builder.CreateLoad(PrivateAddr),
1035                       getNaturalTypeAlignment(RefTy->getPointeeType()));
1036         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1037       }
1038       ++IRef;
1039       ++ISrcRef;
1040       ++IDestRef;
1041     }
1042     if (auto *PostUpdate = C->getPostUpdateExpr())
1043       EmitIgnoredExpr(PostUpdate);
1044   }
1045   if (IsLastIterCond)
1046     EmitBlock(DoneBB, /*IsFinished=*/true);
1047 }
1048 
1049 void CodeGenFunction::EmitOMPReductionClauseInit(
1050     const OMPExecutableDirective &D,
1051     CodeGenFunction::OMPPrivateScope &PrivateScope) {
1052   if (!HaveInsertPoint())
1053     return;
1054   SmallVector<const Expr *, 4> Shareds;
1055   SmallVector<const Expr *, 4> Privates;
1056   SmallVector<const Expr *, 4> ReductionOps;
1057   SmallVector<const Expr *, 4> LHSs;
1058   SmallVector<const Expr *, 4> RHSs;
1059   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1060     auto IPriv = C->privates().begin();
1061     auto IRed = C->reduction_ops().begin();
1062     auto ILHS = C->lhs_exprs().begin();
1063     auto IRHS = C->rhs_exprs().begin();
1064     for (const auto *Ref : C->varlists()) {
1065       Shareds.emplace_back(Ref);
1066       Privates.emplace_back(*IPriv);
1067       ReductionOps.emplace_back(*IRed);
1068       LHSs.emplace_back(*ILHS);
1069       RHSs.emplace_back(*IRHS);
1070       std::advance(IPriv, 1);
1071       std::advance(IRed, 1);
1072       std::advance(ILHS, 1);
1073       std::advance(IRHS, 1);
1074     }
1075   }
1076   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
1077   unsigned Count = 0;
1078   auto ILHS = LHSs.begin();
1079   auto IRHS = RHSs.begin();
1080   auto IPriv = Privates.begin();
1081   for (const auto *IRef : Shareds) {
1082     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1083     // Emit private VarDecl with reduction init.
1084     RedCG.emitSharedLValue(*this, Count);
1085     RedCG.emitAggregateType(*this, Count);
1086     auto Emission = EmitAutoVarAlloca(*PrivateVD);
1087     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1088                              RedCG.getSharedLValue(Count),
1089                              [&Emission](CodeGenFunction &CGF) {
1090                                CGF.EmitAutoVarInit(Emission);
1091                                return true;
1092                              });
1093     EmitAutoVarCleanups(Emission);
1094     Address BaseAddr = RedCG.adjustPrivateAddress(
1095         *this, Count, Emission.getAllocatedAddress());
1096     bool IsRegistered = PrivateScope.addPrivate(
1097         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
1098     assert(IsRegistered && "private var already registered as private");
1099     // Silence the warning about unused variable.
1100     (void)IsRegistered;
1101 
1102     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1103     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1104     QualType Type = PrivateVD->getType();
1105     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1106     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1107       // Store the address of the original variable associated with the LHS
1108       // implicit variable.
1109       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1110         return RedCG.getSharedLValue(Count).getAddress();
1111       });
1112       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1113         return GetAddrOfLocalVar(PrivateVD);
1114       });
1115     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1116                isa<ArraySubscriptExpr>(IRef)) {
1117       // Store the address of the original variable associated with the LHS
1118       // implicit variable.
1119       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1120         return RedCG.getSharedLValue(Count).getAddress();
1121       });
1122       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1123         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1124                                             ConvertTypeForMem(RHSVD->getType()),
1125                                             "rhs.begin");
1126       });
1127     } else {
1128       QualType Type = PrivateVD->getType();
1129       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1130       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1131       // Store the address of the original variable associated with the LHS
1132       // implicit variable.
1133       if (IsArray) {
1134         OriginalAddr = Builder.CreateElementBitCast(
1135             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1136       }
1137       PrivateScope.addPrivate(
1138           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1139       PrivateScope.addPrivate(
1140           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1141             return IsArray
1142                        ? Builder.CreateElementBitCast(
1143                              GetAddrOfLocalVar(PrivateVD),
1144                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1145                        : GetAddrOfLocalVar(PrivateVD);
1146           });
1147     }
1148     ++ILHS;
1149     ++IRHS;
1150     ++IPriv;
1151     ++Count;
1152   }
1153 }
1154 
1155 void CodeGenFunction::EmitOMPReductionClauseFinal(
1156     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1157   if (!HaveInsertPoint())
1158     return;
1159   llvm::SmallVector<const Expr *, 8> Privates;
1160   llvm::SmallVector<const Expr *, 8> LHSExprs;
1161   llvm::SmallVector<const Expr *, 8> RHSExprs;
1162   llvm::SmallVector<const Expr *, 8> ReductionOps;
1163   bool HasAtLeastOneReduction = false;
1164   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1165     HasAtLeastOneReduction = true;
1166     Privates.append(C->privates().begin(), C->privates().end());
1167     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1168     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1169     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1170   }
1171   if (HasAtLeastOneReduction) {
1172     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1173                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1174                       ReductionKind == OMPD_simd;
1175     bool SimpleReduction = ReductionKind == OMPD_simd;
1176     // Emit nowait reduction if nowait clause is present or directive is a
1177     // parallel directive (it always has implicit barrier).
1178     CGM.getOpenMPRuntime().emitReduction(
1179         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1180         {WithNowait, SimpleReduction, ReductionKind});
1181   }
1182 }
1183 
1184 static void emitPostUpdateForReductionClause(
1185     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1186     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1187   if (!CGF.HaveInsertPoint())
1188     return;
1189   llvm::BasicBlock *DoneBB = nullptr;
1190   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1191     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1192       if (!DoneBB) {
1193         if (auto *Cond = CondGen(CGF)) {
1194           // If the first post-update expression is found, emit conditional
1195           // block if it was requested.
1196           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1197           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1198           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1199           CGF.EmitBlock(ThenBB);
1200         }
1201       }
1202       CGF.EmitIgnoredExpr(PostUpdate);
1203     }
1204   }
1205   if (DoneBB)
1206     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1207 }
1208 
1209 namespace {
1210 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1211 /// parallel function. This is necessary for combined constructs such as
1212 /// 'distribute parallel for'
1213 typedef llvm::function_ref<void(CodeGenFunction &,
1214                                 const OMPExecutableDirective &,
1215                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1216     CodeGenBoundParametersTy;
1217 } // anonymous namespace
1218 
1219 static void emitCommonOMPParallelDirective(
1220     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1221     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1222     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1223   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1224   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1225       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1226   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1227     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1228     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1229                                          /*IgnoreResultAssign*/ true);
1230     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1231         CGF, NumThreads, NumThreadsClause->getLocStart());
1232   }
1233   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1234     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1235     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1236         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1237   }
1238   const Expr *IfCond = nullptr;
1239   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1240     if (C->getNameModifier() == OMPD_unknown ||
1241         C->getNameModifier() == OMPD_parallel) {
1242       IfCond = C->getCondition();
1243       break;
1244     }
1245   }
1246 
1247   OMPParallelScope Scope(CGF, S);
1248   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1249   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1250   // lower and upper bounds with the pragma 'for' chunking mechanism.
1251   // The following lambda takes care of appending the lower and upper bound
1252   // parameters when necessary
1253   CodeGenBoundParameters(CGF, S, CapturedVars);
1254   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1255   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1256                                               CapturedVars, IfCond);
1257 }
1258 
1259 static void emitEmptyBoundParameters(CodeGenFunction &,
1260                                      const OMPExecutableDirective &,
1261                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1262 
1263 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1264   // Emit parallel region as a standalone region.
1265   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1266     OMPPrivateScope PrivateScope(CGF);
1267     bool Copyins = CGF.EmitOMPCopyinClause(S);
1268     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1269     if (Copyins) {
1270       // Emit implicit barrier to synchronize threads and avoid data races on
1271       // propagation master's thread values of threadprivate variables to local
1272       // instances of that variables of all other implicit threads.
1273       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1274           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1275           /*ForceSimpleCall=*/true);
1276     }
1277     CGF.EmitOMPPrivateClause(S, PrivateScope);
1278     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1279     (void)PrivateScope.Privatize();
1280     Action.Enter(CGF);
1281     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1282     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1283   };
1284   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1285                                  emitEmptyBoundParameters);
1286   emitPostUpdateForReductionClause(
1287       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1288 }
1289 
1290 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1291                                       JumpDest LoopExit) {
1292   RunCleanupsScope BodyScope(*this);
1293   // Update counters values on current iteration.
1294   for (auto I : D.updates()) {
1295     EmitIgnoredExpr(I);
1296   }
1297   // Update the linear variables.
1298   // In distribute directives only loop counters may be marked as linear, no
1299   // need to generate the code for them.
1300   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1301     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1302       for (auto *U : C->updates())
1303         EmitIgnoredExpr(U);
1304     }
1305   }
1306 
1307   // On a continue in the body, jump to the end.
1308   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1309   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1310   // Emit loop body.
1311   EmitStmt(D.getBody());
1312   // The end (updates/cleanups).
1313   EmitBlock(Continue.getBlock());
1314   BreakContinueStack.pop_back();
1315 }
1316 
1317 void CodeGenFunction::EmitOMPInnerLoop(
1318     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1319     const Expr *IncExpr,
1320     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1321     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1322   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1323 
1324   // Start the loop with a block that tests the condition.
1325   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1326   EmitBlock(CondBlock);
1327   const SourceRange &R = S.getSourceRange();
1328   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1329                  SourceLocToDebugLoc(R.getEnd()));
1330 
1331   // If there are any cleanups between here and the loop-exit scope,
1332   // create a block to stage a loop exit along.
1333   auto ExitBlock = LoopExit.getBlock();
1334   if (RequiresCleanup)
1335     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1336 
1337   auto LoopBody = createBasicBlock("omp.inner.for.body");
1338 
1339   // Emit condition.
1340   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1341   if (ExitBlock != LoopExit.getBlock()) {
1342     EmitBlock(ExitBlock);
1343     EmitBranchThroughCleanup(LoopExit);
1344   }
1345 
1346   EmitBlock(LoopBody);
1347   incrementProfileCounter(&S);
1348 
1349   // Create a block for the increment.
1350   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1351   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1352 
1353   BodyGen(*this);
1354 
1355   // Emit "IV = IV + 1" and a back-edge to the condition block.
1356   EmitBlock(Continue.getBlock());
1357   EmitIgnoredExpr(IncExpr);
1358   PostIncGen(*this);
1359   BreakContinueStack.pop_back();
1360   EmitBranch(CondBlock);
1361   LoopStack.pop();
1362   // Emit the fall-through block.
1363   EmitBlock(LoopExit.getBlock());
1364 }
1365 
1366 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1367   if (!HaveInsertPoint())
1368     return false;
1369   // Emit inits for the linear variables.
1370   bool HasLinears = false;
1371   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1372     for (auto *Init : C->inits()) {
1373       HasLinears = true;
1374       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1375       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1376         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1377         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1378         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1379                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1380                         VD->getInit()->getType(), VK_LValue,
1381                         VD->getInit()->getExprLoc());
1382         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1383                                                 VD->getType()),
1384                        /*capturedByInit=*/false);
1385         EmitAutoVarCleanups(Emission);
1386       } else
1387         EmitVarDecl(*VD);
1388     }
1389     // Emit the linear steps for the linear clauses.
1390     // If a step is not constant, it is pre-calculated before the loop.
1391     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1392       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1393         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1394         // Emit calculation of the linear step.
1395         EmitIgnoredExpr(CS);
1396       }
1397   }
1398   return HasLinears;
1399 }
1400 
1401 void CodeGenFunction::EmitOMPLinearClauseFinal(
1402     const OMPLoopDirective &D,
1403     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1404   if (!HaveInsertPoint())
1405     return;
1406   llvm::BasicBlock *DoneBB = nullptr;
1407   // Emit the final values of the linear variables.
1408   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1409     auto IC = C->varlist_begin();
1410     for (auto *F : C->finals()) {
1411       if (!DoneBB) {
1412         if (auto *Cond = CondGen(*this)) {
1413           // If the first post-update expression is found, emit conditional
1414           // block if it was requested.
1415           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1416           DoneBB = createBasicBlock(".omp.linear.pu.done");
1417           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1418           EmitBlock(ThenBB);
1419         }
1420       }
1421       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1422       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1423                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1424                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1425       Address OrigAddr = EmitLValue(&DRE).getAddress();
1426       CodeGenFunction::OMPPrivateScope VarScope(*this);
1427       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1428       (void)VarScope.Privatize();
1429       EmitIgnoredExpr(F);
1430       ++IC;
1431     }
1432     if (auto *PostUpdate = C->getPostUpdateExpr())
1433       EmitIgnoredExpr(PostUpdate);
1434   }
1435   if (DoneBB)
1436     EmitBlock(DoneBB, /*IsFinished=*/true);
1437 }
1438 
1439 static void emitAlignedClause(CodeGenFunction &CGF,
1440                               const OMPExecutableDirective &D) {
1441   if (!CGF.HaveInsertPoint())
1442     return;
1443   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1444     unsigned ClauseAlignment = 0;
1445     if (auto AlignmentExpr = Clause->getAlignment()) {
1446       auto AlignmentCI =
1447           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1448       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1449     }
1450     for (auto E : Clause->varlists()) {
1451       unsigned Alignment = ClauseAlignment;
1452       if (Alignment == 0) {
1453         // OpenMP [2.8.1, Description]
1454         // If no optional parameter is specified, implementation-defined default
1455         // alignments for SIMD instructions on the target platforms are assumed.
1456         Alignment =
1457             CGF.getContext()
1458                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1459                     E->getType()->getPointeeType()))
1460                 .getQuantity();
1461       }
1462       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1463              "alignment is not power of 2");
1464       if (Alignment != 0) {
1465         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1466         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1467       }
1468     }
1469   }
1470 }
1471 
1472 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1473     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1474   if (!HaveInsertPoint())
1475     return;
1476   auto I = S.private_counters().begin();
1477   for (auto *E : S.counters()) {
1478     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1479     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1480     // Emit var without initialization.
1481     auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1482     EmitAutoVarCleanups(VarEmission);
1483     LocalDeclMap.erase(PrivateVD);
1484     (void)LoopScope.addPrivate(VD, [&VarEmission]() {
1485       return VarEmission.getAllocatedAddress();
1486     });
1487     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1488         VD->hasGlobalStorage()) {
1489       (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
1490         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1491                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1492                         E->getType(), VK_LValue, E->getExprLoc());
1493         return EmitLValue(&DRE).getAddress();
1494       });
1495     } else {
1496       (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
1497         return VarEmission.getAllocatedAddress();
1498       });
1499     }
1500     ++I;
1501   }
1502 }
1503 
1504 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1505                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1506                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1507   if (!CGF.HaveInsertPoint())
1508     return;
1509   {
1510     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1511     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1512     (void)PreCondScope.Privatize();
1513     // Get initial values of real counters.
1514     for (auto I : S.inits()) {
1515       CGF.EmitIgnoredExpr(I);
1516     }
1517   }
1518   // Check that loop is executed at least one time.
1519   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1520 }
1521 
1522 void CodeGenFunction::EmitOMPLinearClause(
1523     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1524   if (!HaveInsertPoint())
1525     return;
1526   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1527   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1528     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1529     for (auto *C : LoopDirective->counters()) {
1530       SIMDLCVs.insert(
1531           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1532     }
1533   }
1534   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1535     auto CurPrivate = C->privates().begin();
1536     for (auto *E : C->varlists()) {
1537       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1538       auto *PrivateVD =
1539           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1540       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1541         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1542           // Emit private VarDecl with copy init.
1543           EmitVarDecl(*PrivateVD);
1544           return GetAddrOfLocalVar(PrivateVD);
1545         });
1546         assert(IsRegistered && "linear var already registered as private");
1547         // Silence the warning about unused variable.
1548         (void)IsRegistered;
1549       } else
1550         EmitVarDecl(*PrivateVD);
1551       ++CurPrivate;
1552     }
1553   }
1554 }
1555 
1556 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1557                                      const OMPExecutableDirective &D,
1558                                      bool IsMonotonic) {
1559   if (!CGF.HaveInsertPoint())
1560     return;
1561   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1562     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1563                                  /*ignoreResult=*/true);
1564     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1565     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1566     // In presence of finite 'safelen', it may be unsafe to mark all
1567     // the memory instructions parallel, because loop-carried
1568     // dependences of 'safelen' iterations are possible.
1569     if (!IsMonotonic)
1570       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1571   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1572     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1573                                  /*ignoreResult=*/true);
1574     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1575     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1576     // In presence of finite 'safelen', it may be unsafe to mark all
1577     // the memory instructions parallel, because loop-carried
1578     // dependences of 'safelen' iterations are possible.
1579     CGF.LoopStack.setParallel(false);
1580   }
1581 }
1582 
1583 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1584                                       bool IsMonotonic) {
1585   // Walk clauses and process safelen/lastprivate.
1586   LoopStack.setParallel(!IsMonotonic);
1587   LoopStack.setVectorizeEnable(true);
1588   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1589 }
1590 
1591 void CodeGenFunction::EmitOMPSimdFinal(
1592     const OMPLoopDirective &D,
1593     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1594   if (!HaveInsertPoint())
1595     return;
1596   llvm::BasicBlock *DoneBB = nullptr;
1597   auto IC = D.counters().begin();
1598   auto IPC = D.private_counters().begin();
1599   for (auto F : D.finals()) {
1600     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1601     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1602     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1603     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1604         OrigVD->hasGlobalStorage() || CED) {
1605       if (!DoneBB) {
1606         if (auto *Cond = CondGen(*this)) {
1607           // If the first post-update expression is found, emit conditional
1608           // block if it was requested.
1609           auto *ThenBB = createBasicBlock(".omp.final.then");
1610           DoneBB = createBasicBlock(".omp.final.done");
1611           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1612           EmitBlock(ThenBB);
1613         }
1614       }
1615       Address OrigAddr = Address::invalid();
1616       if (CED) {
1617         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1618       } else {
1619         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1620                         /*RefersToEnclosingVariableOrCapture=*/false,
1621                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1622         OrigAddr = EmitLValue(&DRE).getAddress();
1623       }
1624       OMPPrivateScope VarScope(*this);
1625       VarScope.addPrivate(OrigVD,
1626                           [OrigAddr]() -> Address { return OrigAddr; });
1627       (void)VarScope.Privatize();
1628       EmitIgnoredExpr(F);
1629     }
1630     ++IC;
1631     ++IPC;
1632   }
1633   if (DoneBB)
1634     EmitBlock(DoneBB, /*IsFinished=*/true);
1635 }
1636 
1637 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1638                                          const OMPLoopDirective &S,
1639                                          CodeGenFunction::JumpDest LoopExit) {
1640   CGF.EmitOMPLoopBody(S, LoopExit);
1641   CGF.EmitStopPoint(&S);
1642 }
1643 
1644 /// Emit a helper variable and return corresponding lvalue.
1645 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1646                                const DeclRefExpr *Helper) {
1647   auto VDecl = cast<VarDecl>(Helper->getDecl());
1648   CGF.EmitVarDecl(*VDecl);
1649   return CGF.EmitLValue(Helper);
1650 }
1651 
1652 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1653                               PrePostActionTy &Action) {
1654   Action.Enter(CGF);
1655   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1656          "Expected simd directive");
1657   OMPLoopScope PreInitScope(CGF, S);
1658   // if (PreCond) {
1659   //   for (IV in 0..LastIteration) BODY;
1660   //   <Final counter/linear vars updates>;
1661   // }
1662   //
1663   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
1664       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
1665       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
1666     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1667     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1668   }
1669 
1670   // Emit: if (PreCond) - begin.
1671   // If the condition constant folds and can be elided, avoid emitting the
1672   // whole loop.
1673   bool CondConstant;
1674   llvm::BasicBlock *ContBlock = nullptr;
1675   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1676     if (!CondConstant)
1677       return;
1678   } else {
1679     auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1680     ContBlock = CGF.createBasicBlock("simd.if.end");
1681     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1682                 CGF.getProfileCount(&S));
1683     CGF.EmitBlock(ThenBlock);
1684     CGF.incrementProfileCounter(&S);
1685   }
1686 
1687   // Emit the loop iteration variable.
1688   const Expr *IVExpr = S.getIterationVariable();
1689   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1690   CGF.EmitVarDecl(*IVDecl);
1691   CGF.EmitIgnoredExpr(S.getInit());
1692 
1693   // Emit the iterations count variable.
1694   // If it is not a variable, Sema decided to calculate iterations count on
1695   // each iteration (e.g., it is foldable into a constant).
1696   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1697     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1698     // Emit calculation of the iterations count.
1699     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1700   }
1701 
1702   CGF.EmitOMPSimdInit(S);
1703 
1704   emitAlignedClause(CGF, S);
1705   (void)CGF.EmitOMPLinearClauseInit(S);
1706   {
1707     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1708     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1709     CGF.EmitOMPLinearClause(S, LoopScope);
1710     CGF.EmitOMPPrivateClause(S, LoopScope);
1711     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1712     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1713     (void)LoopScope.Privatize();
1714     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1715                          S.getInc(),
1716                          [&S](CodeGenFunction &CGF) {
1717                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1718                            CGF.EmitStopPoint(&S);
1719                          },
1720                          [](CodeGenFunction &) {});
1721     CGF.EmitOMPSimdFinal(
1722         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1723     // Emit final copy of the lastprivate variables at the end of loops.
1724     if (HasLastprivateClause)
1725       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1726     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1727     emitPostUpdateForReductionClause(
1728         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1729   }
1730   CGF.EmitOMPLinearClauseFinal(
1731       S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1732   // Emit: if (PreCond) - end.
1733   if (ContBlock) {
1734     CGF.EmitBranch(ContBlock);
1735     CGF.EmitBlock(ContBlock, true);
1736   }
1737 }
1738 
1739 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1740   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1741     emitOMPSimdRegion(CGF, S, Action);
1742   };
1743   OMPLexicalScope Scope(*this, S, OMPD_unknown);
1744   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1745 }
1746 
1747 void CodeGenFunction::EmitOMPOuterLoop(
1748     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1749     CodeGenFunction::OMPPrivateScope &LoopScope,
1750     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1751     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1752     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1753   auto &RT = CGM.getOpenMPRuntime();
1754 
1755   const Expr *IVExpr = S.getIterationVariable();
1756   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1757   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1758 
1759   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1760 
1761   // Start the loop with a block that tests the condition.
1762   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1763   EmitBlock(CondBlock);
1764   const SourceRange &R = S.getSourceRange();
1765   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1766                  SourceLocToDebugLoc(R.getEnd()));
1767 
1768   llvm::Value *BoolCondVal = nullptr;
1769   if (!DynamicOrOrdered) {
1770     // UB = min(UB, GlobalUB) or
1771     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1772     // 'distribute parallel for')
1773     EmitIgnoredExpr(LoopArgs.EUB);
1774     // IV = LB
1775     EmitIgnoredExpr(LoopArgs.Init);
1776     // IV < UB
1777     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1778   } else {
1779     BoolCondVal =
1780         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1781                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1782   }
1783 
1784   // If there are any cleanups between here and the loop-exit scope,
1785   // create a block to stage a loop exit along.
1786   auto ExitBlock = LoopExit.getBlock();
1787   if (LoopScope.requiresCleanups())
1788     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1789 
1790   auto LoopBody = createBasicBlock("omp.dispatch.body");
1791   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1792   if (ExitBlock != LoopExit.getBlock()) {
1793     EmitBlock(ExitBlock);
1794     EmitBranchThroughCleanup(LoopExit);
1795   }
1796   EmitBlock(LoopBody);
1797 
1798   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1799   // LB for loop condition and emitted it above).
1800   if (DynamicOrOrdered)
1801     EmitIgnoredExpr(LoopArgs.Init);
1802 
1803   // Create a block for the increment.
1804   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1805   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1806 
1807   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1808   // with dynamic/guided scheduling and without ordered clause.
1809   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1810     LoopStack.setParallel(!IsMonotonic);
1811   else
1812     EmitOMPSimdInit(S, IsMonotonic);
1813 
1814   SourceLocation Loc = S.getLocStart();
1815 
1816   // when 'distribute' is not combined with a 'for':
1817   // while (idx <= UB) { BODY; ++idx; }
1818   // when 'distribute' is combined with a 'for'
1819   // (e.g. 'distribute parallel for')
1820   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1821   EmitOMPInnerLoop(
1822       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1823       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1824         CodeGenLoop(CGF, S, LoopExit);
1825       },
1826       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1827         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1828       });
1829 
1830   EmitBlock(Continue.getBlock());
1831   BreakContinueStack.pop_back();
1832   if (!DynamicOrOrdered) {
1833     // Emit "LB = LB + Stride", "UB = UB + Stride".
1834     EmitIgnoredExpr(LoopArgs.NextLB);
1835     EmitIgnoredExpr(LoopArgs.NextUB);
1836   }
1837 
1838   EmitBranch(CondBlock);
1839   LoopStack.pop();
1840   // Emit the fall-through block.
1841   EmitBlock(LoopExit.getBlock());
1842 
1843   // Tell the runtime we are done.
1844   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1845     if (!DynamicOrOrdered)
1846       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1847                                                      S.getDirectiveKind());
1848   };
1849   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1850 }
1851 
1852 void CodeGenFunction::EmitOMPForOuterLoop(
1853     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1854     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1855     const OMPLoopArguments &LoopArgs,
1856     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1857   auto &RT = CGM.getOpenMPRuntime();
1858 
1859   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1860   const bool DynamicOrOrdered =
1861       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1862 
1863   assert((Ordered ||
1864           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1865                                  LoopArgs.Chunk != nullptr)) &&
1866          "static non-chunked schedule does not need outer loop");
1867 
1868   // Emit outer loop.
1869   //
1870   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1871   // When schedule(dynamic,chunk_size) is specified, the iterations are
1872   // distributed to threads in the team in chunks as the threads request them.
1873   // Each thread executes a chunk of iterations, then requests another chunk,
1874   // until no chunks remain to be distributed. Each chunk contains chunk_size
1875   // iterations, except for the last chunk to be distributed, which may have
1876   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1877   //
1878   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1879   // to threads in the team in chunks as the executing threads request them.
1880   // Each thread executes a chunk of iterations, then requests another chunk,
1881   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1882   // each chunk is proportional to the number of unassigned iterations divided
1883   // by the number of threads in the team, decreasing to 1. For a chunk_size
1884   // with value k (greater than 1), the size of each chunk is determined in the
1885   // same way, with the restriction that the chunks do not contain fewer than k
1886   // iterations (except for the last chunk to be assigned, which may have fewer
1887   // than k iterations).
1888   //
1889   // When schedule(auto) is specified, the decision regarding scheduling is
1890   // delegated to the compiler and/or runtime system. The programmer gives the
1891   // implementation the freedom to choose any possible mapping of iterations to
1892   // threads in the team.
1893   //
1894   // When schedule(runtime) is specified, the decision regarding scheduling is
1895   // deferred until run time, and the schedule and chunk size are taken from the
1896   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1897   // implementation defined
1898   //
1899   // while(__kmpc_dispatch_next(&LB, &UB)) {
1900   //   idx = LB;
1901   //   while (idx <= UB) { BODY; ++idx;
1902   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1903   //   } // inner loop
1904   // }
1905   //
1906   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1907   // When schedule(static, chunk_size) is specified, iterations are divided into
1908   // chunks of size chunk_size, and the chunks are assigned to the threads in
1909   // the team in a round-robin fashion in the order of the thread number.
1910   //
1911   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1912   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1913   //   LB = LB + ST;
1914   //   UB = UB + ST;
1915   // }
1916   //
1917 
1918   const Expr *IVExpr = S.getIterationVariable();
1919   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1920   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1921 
1922   if (DynamicOrOrdered) {
1923     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1924     llvm::Value *LBVal = DispatchBounds.first;
1925     llvm::Value *UBVal = DispatchBounds.second;
1926     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1927                                                              LoopArgs.Chunk};
1928     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1929                            IVSigned, Ordered, DipatchRTInputValues);
1930   } else {
1931     CGOpenMPRuntime::StaticRTInput StaticInit(
1932         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1933         LoopArgs.ST, LoopArgs.Chunk);
1934     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1935                          ScheduleKind, StaticInit);
1936   }
1937 
1938   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1939                                     const unsigned IVSize,
1940                                     const bool IVSigned) {
1941     if (Ordered) {
1942       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1943                                                             IVSigned);
1944     }
1945   };
1946 
1947   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1948                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1949   OuterLoopArgs.IncExpr = S.getInc();
1950   OuterLoopArgs.Init = S.getInit();
1951   OuterLoopArgs.Cond = S.getCond();
1952   OuterLoopArgs.NextLB = S.getNextLowerBound();
1953   OuterLoopArgs.NextUB = S.getNextUpperBound();
1954   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1955                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1956 }
1957 
1958 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1959                              const unsigned IVSize, const bool IVSigned) {}
1960 
1961 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1962     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1963     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1964     const CodeGenLoopTy &CodeGenLoopContent) {
1965 
1966   auto &RT = CGM.getOpenMPRuntime();
1967 
1968   // Emit outer loop.
1969   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1970   // dynamic
1971   //
1972 
1973   const Expr *IVExpr = S.getIterationVariable();
1974   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1975   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1976 
1977   CGOpenMPRuntime::StaticRTInput StaticInit(
1978       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1979       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1980   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1981 
1982   // for combined 'distribute' and 'for' the increment expression of distribute
1983   // is store in DistInc. For 'distribute' alone, it is in Inc.
1984   Expr *IncExpr;
1985   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1986     IncExpr = S.getDistInc();
1987   else
1988     IncExpr = S.getInc();
1989 
1990   // this routine is shared by 'omp distribute parallel for' and
1991   // 'omp distribute': select the right EUB expression depending on the
1992   // directive
1993   OMPLoopArguments OuterLoopArgs;
1994   OuterLoopArgs.LB = LoopArgs.LB;
1995   OuterLoopArgs.UB = LoopArgs.UB;
1996   OuterLoopArgs.ST = LoopArgs.ST;
1997   OuterLoopArgs.IL = LoopArgs.IL;
1998   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1999   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2000                           ? S.getCombinedEnsureUpperBound()
2001                           : S.getEnsureUpperBound();
2002   OuterLoopArgs.IncExpr = IncExpr;
2003   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2004                            ? S.getCombinedInit()
2005                            : S.getInit();
2006   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2007                            ? S.getCombinedCond()
2008                            : S.getCond();
2009   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2010                              ? S.getCombinedNextLowerBound()
2011                              : S.getNextLowerBound();
2012   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2013                              ? S.getCombinedNextUpperBound()
2014                              : S.getNextUpperBound();
2015 
2016   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2017                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
2018                    emitEmptyOrdered);
2019 }
2020 
2021 static std::pair<LValue, LValue>
2022 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2023                                      const OMPExecutableDirective &S) {
2024   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2025   LValue LB =
2026       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2027   LValue UB =
2028       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2029 
2030   // When composing 'distribute' with 'for' (e.g. as in 'distribute
2031   // parallel for') we need to use the 'distribute'
2032   // chunk lower and upper bounds rather than the whole loop iteration
2033   // space. These are parameters to the outlined function for 'parallel'
2034   // and we copy the bounds of the previous schedule into the
2035   // the current ones.
2036   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2037   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2038   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2039       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2040   PrevLBVal = CGF.EmitScalarConversion(
2041       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2042       LS.getIterationVariable()->getType(),
2043       LS.getPrevLowerBoundVariable()->getExprLoc());
2044   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2045       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2046   PrevUBVal = CGF.EmitScalarConversion(
2047       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2048       LS.getIterationVariable()->getType(),
2049       LS.getPrevUpperBoundVariable()->getExprLoc());
2050 
2051   CGF.EmitStoreOfScalar(PrevLBVal, LB);
2052   CGF.EmitStoreOfScalar(PrevUBVal, UB);
2053 
2054   return {LB, UB};
2055 }
2056 
2057 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2058 /// we need to use the LB and UB expressions generated by the worksharing
2059 /// code generation support, whereas in non combined situations we would
2060 /// just emit 0 and the LastIteration expression
2061 /// This function is necessary due to the difference of the LB and UB
2062 /// types for the RT emission routines for 'for_static_init' and
2063 /// 'for_dispatch_init'
2064 static std::pair<llvm::Value *, llvm::Value *>
2065 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2066                                         const OMPExecutableDirective &S,
2067                                         Address LB, Address UB) {
2068   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2069   const Expr *IVExpr = LS.getIterationVariable();
2070   // when implementing a dynamic schedule for a 'for' combined with a
2071   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2072   // is not normalized as each team only executes its own assigned
2073   // distribute chunk
2074   QualType IteratorTy = IVExpr->getType();
2075   llvm::Value *LBVal =
2076       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart());
2077   llvm::Value *UBVal =
2078       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart());
2079   return {LBVal, UBVal};
2080 }
2081 
2082 static void emitDistributeParallelForDistributeInnerBoundParams(
2083     CodeGenFunction &CGF, const OMPExecutableDirective &S,
2084     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2085   const auto &Dir = cast<OMPLoopDirective>(S);
2086   LValue LB =
2087       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2088   auto LBCast = CGF.Builder.CreateIntCast(
2089       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2090   CapturedVars.push_back(LBCast);
2091   LValue UB =
2092       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2093 
2094   auto UBCast = CGF.Builder.CreateIntCast(
2095       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2096   CapturedVars.push_back(UBCast);
2097 }
2098 
2099 static void
2100 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2101                                  const OMPLoopDirective &S,
2102                                  CodeGenFunction::JumpDest LoopExit) {
2103   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2104                                          PrePostActionTy &Action) {
2105     Action.Enter(CGF);
2106     bool HasCancel = false;
2107     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2108       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2109         HasCancel = D->hasCancel();
2110       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2111         HasCancel = D->hasCancel();
2112       else if (const auto *D =
2113                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2114         HasCancel = D->hasCancel();
2115     }
2116     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2117                                                      HasCancel);
2118     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2119                                emitDistributeParallelForInnerBounds,
2120                                emitDistributeParallelForDispatchBounds);
2121   };
2122 
2123   emitCommonOMPParallelDirective(
2124       CGF, S,
2125       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2126       CGInlinedWorksharingLoop,
2127       emitDistributeParallelForDistributeInnerBoundParams);
2128 }
2129 
2130 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2131     const OMPDistributeParallelForDirective &S) {
2132   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2133     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2134                               S.getDistInc());
2135   };
2136   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2137   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2138 }
2139 
2140 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2141     const OMPDistributeParallelForSimdDirective &S) {
2142   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2143     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2144                               S.getDistInc());
2145   };
2146   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2147   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2148 }
2149 
2150 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2151     const OMPDistributeSimdDirective &S) {
2152   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2153     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2154   };
2155   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2156   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2157 }
2158 
2159 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2160     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2161   // Emit SPMD target parallel for region as a standalone region.
2162   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2163     emitOMPSimdRegion(CGF, S, Action);
2164   };
2165   llvm::Function *Fn;
2166   llvm::Constant *Addr;
2167   // Emit target region as a standalone region.
2168   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2169       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2170   assert(Fn && Addr && "Target device function emission failed.");
2171 }
2172 
2173 void CodeGenFunction::EmitOMPTargetSimdDirective(
2174     const OMPTargetSimdDirective &S) {
2175   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2176     emitOMPSimdRegion(CGF, S, Action);
2177   };
2178   emitCommonOMPTargetDirective(*this, S, CodeGen);
2179 }
2180 
2181 namespace {
2182   struct ScheduleKindModifiersTy {
2183     OpenMPScheduleClauseKind Kind;
2184     OpenMPScheduleClauseModifier M1;
2185     OpenMPScheduleClauseModifier M2;
2186     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2187                             OpenMPScheduleClauseModifier M1,
2188                             OpenMPScheduleClauseModifier M2)
2189         : Kind(Kind), M1(M1), M2(M2) {}
2190   };
2191 } // namespace
2192 
2193 bool CodeGenFunction::EmitOMPWorksharingLoop(
2194     const OMPLoopDirective &S, Expr *EUB,
2195     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2196     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2197   // Emit the loop iteration variable.
2198   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2199   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2200   EmitVarDecl(*IVDecl);
2201 
2202   // Emit the iterations count variable.
2203   // If it is not a variable, Sema decided to calculate iterations count on each
2204   // iteration (e.g., it is foldable into a constant).
2205   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2206     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2207     // Emit calculation of the iterations count.
2208     EmitIgnoredExpr(S.getCalcLastIteration());
2209   }
2210 
2211   auto &RT = CGM.getOpenMPRuntime();
2212 
2213   bool HasLastprivateClause;
2214   // Check pre-condition.
2215   {
2216     OMPLoopScope PreInitScope(*this, S);
2217     // Skip the entire loop if we don't meet the precondition.
2218     // If the condition constant folds and can be elided, avoid emitting the
2219     // whole loop.
2220     bool CondConstant;
2221     llvm::BasicBlock *ContBlock = nullptr;
2222     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2223       if (!CondConstant)
2224         return false;
2225     } else {
2226       auto *ThenBlock = createBasicBlock("omp.precond.then");
2227       ContBlock = createBasicBlock("omp.precond.end");
2228       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2229                   getProfileCount(&S));
2230       EmitBlock(ThenBlock);
2231       incrementProfileCounter(&S);
2232     }
2233 
2234     RunCleanupsScope DoacrossCleanupScope(*this);
2235     bool Ordered = false;
2236     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2237       if (OrderedClause->getNumForLoops())
2238         RT.emitDoacrossInit(*this, S);
2239       else
2240         Ordered = true;
2241     }
2242 
2243     llvm::DenseSet<const Expr *> EmittedFinals;
2244     emitAlignedClause(*this, S);
2245     bool HasLinears = EmitOMPLinearClauseInit(S);
2246     // Emit helper vars inits.
2247 
2248     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2249     LValue LB = Bounds.first;
2250     LValue UB = Bounds.second;
2251     LValue ST =
2252         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2253     LValue IL =
2254         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2255 
2256     // Emit 'then' code.
2257     {
2258       OMPPrivateScope LoopScope(*this);
2259       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2260         // Emit implicit barrier to synchronize threads and avoid data races on
2261         // initialization of firstprivate variables and post-update of
2262         // lastprivate variables.
2263         CGM.getOpenMPRuntime().emitBarrierCall(
2264             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2265             /*ForceSimpleCall=*/true);
2266       }
2267       EmitOMPPrivateClause(S, LoopScope);
2268       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2269       EmitOMPReductionClauseInit(S, LoopScope);
2270       EmitOMPPrivateLoopCounters(S, LoopScope);
2271       EmitOMPLinearClause(S, LoopScope);
2272       (void)LoopScope.Privatize();
2273 
2274       // Detect the loop schedule kind and chunk.
2275       llvm::Value *Chunk = nullptr;
2276       OpenMPScheduleTy ScheduleKind;
2277       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2278         ScheduleKind.Schedule = C->getScheduleKind();
2279         ScheduleKind.M1 = C->getFirstScheduleModifier();
2280         ScheduleKind.M2 = C->getSecondScheduleModifier();
2281         if (const auto *Ch = C->getChunkSize()) {
2282           Chunk = EmitScalarExpr(Ch);
2283           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2284                                        S.getIterationVariable()->getType(),
2285                                        S.getLocStart());
2286         }
2287       }
2288       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2289       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2290       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2291       // If the static schedule kind is specified or if the ordered clause is
2292       // specified, and if no monotonic modifier is specified, the effect will
2293       // be as if the monotonic modifier was specified.
2294       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2295                                 /* Chunked */ Chunk != nullptr) &&
2296           !Ordered) {
2297         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2298           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2299         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2300         // When no chunk_size is specified, the iteration space is divided into
2301         // chunks that are approximately equal in size, and at most one chunk is
2302         // distributed to each thread. Note that the size of the chunks is
2303         // unspecified in this case.
2304         CGOpenMPRuntime::StaticRTInput StaticInit(
2305             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2306             UB.getAddress(), ST.getAddress());
2307         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2308                              ScheduleKind, StaticInit);
2309         auto LoopExit =
2310             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2311         // UB = min(UB, GlobalUB);
2312         EmitIgnoredExpr(S.getEnsureUpperBound());
2313         // IV = LB;
2314         EmitIgnoredExpr(S.getInit());
2315         // while (idx <= UB) { BODY; ++idx; }
2316         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2317                          S.getInc(),
2318                          [&S, LoopExit](CodeGenFunction &CGF) {
2319                            CGF.EmitOMPLoopBody(S, LoopExit);
2320                            CGF.EmitStopPoint(&S);
2321                          },
2322                          [](CodeGenFunction &) {});
2323         EmitBlock(LoopExit.getBlock());
2324         // Tell the runtime we are done.
2325         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2326           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2327                                                          S.getDirectiveKind());
2328         };
2329         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2330       } else {
2331         const bool IsMonotonic =
2332             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2333             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2334             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2335             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2336         // Emit the outer loop, which requests its work chunk [LB..UB] from
2337         // runtime and runs the inner loop to process it.
2338         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2339                                              ST.getAddress(), IL.getAddress(),
2340                                              Chunk, EUB);
2341         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2342                             LoopArguments, CGDispatchBounds);
2343       }
2344       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2345         EmitOMPSimdFinal(S,
2346                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2347                            return CGF.Builder.CreateIsNotNull(
2348                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2349                          });
2350       }
2351       EmitOMPReductionClauseFinal(
2352           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2353                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2354                  : /*Parallel only*/ OMPD_parallel);
2355       // Emit post-update of the reduction variables if IsLastIter != 0.
2356       emitPostUpdateForReductionClause(
2357           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2358             return CGF.Builder.CreateIsNotNull(
2359                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2360           });
2361       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2362       if (HasLastprivateClause)
2363         EmitOMPLastprivateClauseFinal(
2364             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2365             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2366     }
2367     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2368       return CGF.Builder.CreateIsNotNull(
2369           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2370     });
2371     DoacrossCleanupScope.ForceCleanup();
2372     // We're now done with the loop, so jump to the continuation block.
2373     if (ContBlock) {
2374       EmitBranch(ContBlock);
2375       EmitBlock(ContBlock, true);
2376     }
2377   }
2378   return HasLastprivateClause;
2379 }
2380 
2381 /// The following two functions generate expressions for the loop lower
2382 /// and upper bounds in case of static and dynamic (dispatch) schedule
2383 /// of the associated 'for' or 'distribute' loop.
2384 static std::pair<LValue, LValue>
2385 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2386   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2387   LValue LB =
2388       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2389   LValue UB =
2390       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2391   return {LB, UB};
2392 }
2393 
2394 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2395 /// consider the lower and upper bound expressions generated by the
2396 /// worksharing loop support, but we use 0 and the iteration space size as
2397 /// constants
2398 static std::pair<llvm::Value *, llvm::Value *>
2399 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2400                           Address LB, Address UB) {
2401   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2402   const Expr *IVExpr = LS.getIterationVariable();
2403   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2404   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2405   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2406   return {LBVal, UBVal};
2407 }
2408 
2409 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2410   bool HasLastprivates = false;
2411   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2412                                           PrePostActionTy &) {
2413     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2414     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2415                                                  emitForLoopBounds,
2416                                                  emitDispatchForLoopBounds);
2417   };
2418   {
2419     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2420     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2421                                                 S.hasCancel());
2422   }
2423 
2424   // Emit an implicit barrier at the end.
2425   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2426     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2427   }
2428 }
2429 
2430 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2431   bool HasLastprivates = false;
2432   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2433                                           PrePostActionTy &) {
2434     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2435                                                  emitForLoopBounds,
2436                                                  emitDispatchForLoopBounds);
2437   };
2438   {
2439     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2440     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2441   }
2442 
2443   // Emit an implicit barrier at the end.
2444   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2445     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2446   }
2447 }
2448 
2449 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2450                                 const Twine &Name,
2451                                 llvm::Value *Init = nullptr) {
2452   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2453   if (Init)
2454     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2455   return LVal;
2456 }
2457 
2458 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2459   const Stmt *Stmt = S.getInnermostCapturedStmt()->getCapturedStmt();
2460   const auto *CS = dyn_cast<CompoundStmt>(Stmt);
2461   bool HasLastprivates = false;
2462   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2463                                                     PrePostActionTy &) {
2464     auto &C = CGF.CGM.getContext();
2465     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2466     // Emit helper vars inits.
2467     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2468                                   CGF.Builder.getInt32(0));
2469     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2470                                       : CGF.Builder.getInt32(0);
2471     LValue UB =
2472         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2473     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2474                                   CGF.Builder.getInt32(1));
2475     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2476                                   CGF.Builder.getInt32(0));
2477     // Loop counter.
2478     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2479     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2480     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2481     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2482     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2483     // Generate condition for loop.
2484     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2485                         OK_Ordinary, S.getLocStart(), FPOptions());
2486     // Increment for loop counter.
2487     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2488                       S.getLocStart(), true);
2489     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2490       // Iterate through all sections and emit a switch construct:
2491       // switch (IV) {
2492       //   case 0:
2493       //     <SectionStmt[0]>;
2494       //     break;
2495       // ...
2496       //   case <NumSection> - 1:
2497       //     <SectionStmt[<NumSection> - 1]>;
2498       //     break;
2499       // }
2500       // .omp.sections.exit:
2501       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2502       auto *SwitchStmt =
2503           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()),
2504                                    ExitBB, CS == nullptr ? 1 : CS->size());
2505       if (CS) {
2506         unsigned CaseNumber = 0;
2507         for (auto *SubStmt : CS->children()) {
2508           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2509           CGF.EmitBlock(CaseBB);
2510           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2511           CGF.EmitStmt(SubStmt);
2512           CGF.EmitBranch(ExitBB);
2513           ++CaseNumber;
2514         }
2515       } else {
2516         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2517         CGF.EmitBlock(CaseBB);
2518         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2519         CGF.EmitStmt(Stmt);
2520         CGF.EmitBranch(ExitBB);
2521       }
2522       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2523     };
2524 
2525     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2526     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2527       // Emit implicit barrier to synchronize threads and avoid data races on
2528       // initialization of firstprivate variables and post-update of lastprivate
2529       // variables.
2530       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2531           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2532           /*ForceSimpleCall=*/true);
2533     }
2534     CGF.EmitOMPPrivateClause(S, LoopScope);
2535     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2536     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2537     (void)LoopScope.Privatize();
2538 
2539     // Emit static non-chunked loop.
2540     OpenMPScheduleTy ScheduleKind;
2541     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2542     CGOpenMPRuntime::StaticRTInput StaticInit(
2543         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2544         LB.getAddress(), UB.getAddress(), ST.getAddress());
2545     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2546         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2547     // UB = min(UB, GlobalUB);
2548     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2549     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2550         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2551     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2552     // IV = LB;
2553     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2554     // while (idx <= UB) { BODY; ++idx; }
2555     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2556                          [](CodeGenFunction &) {});
2557     // Tell the runtime we are done.
2558     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2559       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2560                                                      S.getDirectiveKind());
2561     };
2562     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2563     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2564     // Emit post-update of the reduction variables if IsLastIter != 0.
2565     emitPostUpdateForReductionClause(
2566         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2567           return CGF.Builder.CreateIsNotNull(
2568               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2569         });
2570 
2571     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2572     if (HasLastprivates)
2573       CGF.EmitOMPLastprivateClauseFinal(
2574           S, /*NoFinals=*/false,
2575           CGF.Builder.CreateIsNotNull(
2576               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2577   };
2578 
2579   bool HasCancel = false;
2580   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2581     HasCancel = OSD->hasCancel();
2582   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2583     HasCancel = OPSD->hasCancel();
2584   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2585   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2586                                               HasCancel);
2587   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2588   // clause. Otherwise the barrier will be generated by the codegen for the
2589   // directive.
2590   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2591     // Emit implicit barrier to synchronize threads and avoid data races on
2592     // initialization of firstprivate variables.
2593     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2594                                            OMPD_unknown);
2595   }
2596 }
2597 
2598 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2599   {
2600     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2601     EmitSections(S);
2602   }
2603   // Emit an implicit barrier at the end.
2604   if (!S.getSingleClause<OMPNowaitClause>()) {
2605     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2606                                            OMPD_sections);
2607   }
2608 }
2609 
2610 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2611   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2612     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2613   };
2614   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2615   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2616                                               S.hasCancel());
2617 }
2618 
2619 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2620   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2621   llvm::SmallVector<const Expr *, 8> DestExprs;
2622   llvm::SmallVector<const Expr *, 8> SrcExprs;
2623   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2624   // Check if there are any 'copyprivate' clauses associated with this
2625   // 'single' construct.
2626   // Build a list of copyprivate variables along with helper expressions
2627   // (<source>, <destination>, <destination>=<source> expressions)
2628   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2629     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2630     DestExprs.append(C->destination_exprs().begin(),
2631                      C->destination_exprs().end());
2632     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2633     AssignmentOps.append(C->assignment_ops().begin(),
2634                          C->assignment_ops().end());
2635   }
2636   // Emit code for 'single' region along with 'copyprivate' clauses
2637   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2638     Action.Enter(CGF);
2639     OMPPrivateScope SingleScope(CGF);
2640     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2641     CGF.EmitOMPPrivateClause(S, SingleScope);
2642     (void)SingleScope.Privatize();
2643     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2644   };
2645   {
2646     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2647     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2648                                             CopyprivateVars, DestExprs,
2649                                             SrcExprs, AssignmentOps);
2650   }
2651   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2652   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2653   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2654     CGM.getOpenMPRuntime().emitBarrierCall(
2655         *this, S.getLocStart(),
2656         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2657   }
2658 }
2659 
2660 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2661   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2662     Action.Enter(CGF);
2663     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2664   };
2665   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2666   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2667 }
2668 
2669 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2670   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2671     Action.Enter(CGF);
2672     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2673   };
2674   Expr *Hint = nullptr;
2675   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2676     Hint = HintClause->getHint();
2677   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2678   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2679                                             S.getDirectiveName().getAsString(),
2680                                             CodeGen, S.getLocStart(), Hint);
2681 }
2682 
2683 void CodeGenFunction::EmitOMPParallelForDirective(
2684     const OMPParallelForDirective &S) {
2685   // Emit directive as a combined directive that consists of two implicit
2686   // directives: 'parallel' with 'for' directive.
2687   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2688     Action.Enter(CGF);
2689     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2690     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2691                                emitDispatchForLoopBounds);
2692   };
2693   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2694                                  emitEmptyBoundParameters);
2695 }
2696 
2697 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2698     const OMPParallelForSimdDirective &S) {
2699   // Emit directive as a combined directive that consists of two implicit
2700   // directives: 'parallel' with 'for' directive.
2701   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2702     Action.Enter(CGF);
2703     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2704                                emitDispatchForLoopBounds);
2705   };
2706   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2707                                  emitEmptyBoundParameters);
2708 }
2709 
2710 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2711     const OMPParallelSectionsDirective &S) {
2712   // Emit directive as a combined directive that consists of two implicit
2713   // directives: 'parallel' with 'sections' directive.
2714   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2715     Action.Enter(CGF);
2716     CGF.EmitSections(S);
2717   };
2718   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2719                                  emitEmptyBoundParameters);
2720 }
2721 
2722 void CodeGenFunction::EmitOMPTaskBasedDirective(
2723     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
2724     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
2725     OMPTaskDataTy &Data) {
2726   // Emit outlined function for task construct.
2727   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
2728   auto *I = CS->getCapturedDecl()->param_begin();
2729   auto *PartId = std::next(I);
2730   auto *TaskT = std::next(I, 4);
2731   // Check if the task is final
2732   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2733     // If the condition constant folds and can be elided, try to avoid emitting
2734     // the condition and the dead arm of the if/else.
2735     auto *Cond = Clause->getCondition();
2736     bool CondConstant;
2737     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2738       Data.Final.setInt(CondConstant);
2739     else
2740       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2741   } else {
2742     // By default the task is not final.
2743     Data.Final.setInt(/*IntVal=*/false);
2744   }
2745   // Check if the task has 'priority' clause.
2746   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2747     auto *Prio = Clause->getPriority();
2748     Data.Priority.setInt(/*IntVal=*/true);
2749     Data.Priority.setPointer(EmitScalarConversion(
2750         EmitScalarExpr(Prio), Prio->getType(),
2751         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2752         Prio->getExprLoc()));
2753   }
2754   // The first function argument for tasks is a thread id, the second one is a
2755   // part id (0 for tied tasks, >=0 for untied task).
2756   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2757   // Get list of private variables.
2758   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2759     auto IRef = C->varlist_begin();
2760     for (auto *IInit : C->private_copies()) {
2761       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2762       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2763         Data.PrivateVars.push_back(*IRef);
2764         Data.PrivateCopies.push_back(IInit);
2765       }
2766       ++IRef;
2767     }
2768   }
2769   EmittedAsPrivate.clear();
2770   // Get list of firstprivate variables.
2771   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2772     auto IRef = C->varlist_begin();
2773     auto IElemInitRef = C->inits().begin();
2774     for (auto *IInit : C->private_copies()) {
2775       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2776       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2777         Data.FirstprivateVars.push_back(*IRef);
2778         Data.FirstprivateCopies.push_back(IInit);
2779         Data.FirstprivateInits.push_back(*IElemInitRef);
2780       }
2781       ++IRef;
2782       ++IElemInitRef;
2783     }
2784   }
2785   // Get list of lastprivate variables (for taskloops).
2786   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2787   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2788     auto IRef = C->varlist_begin();
2789     auto ID = C->destination_exprs().begin();
2790     for (auto *IInit : C->private_copies()) {
2791       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2792       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2793         Data.LastprivateVars.push_back(*IRef);
2794         Data.LastprivateCopies.push_back(IInit);
2795       }
2796       LastprivateDstsOrigs.insert(
2797           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2798            cast<DeclRefExpr>(*IRef)});
2799       ++IRef;
2800       ++ID;
2801     }
2802   }
2803   SmallVector<const Expr *, 4> LHSs;
2804   SmallVector<const Expr *, 4> RHSs;
2805   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2806     auto IPriv = C->privates().begin();
2807     auto IRed = C->reduction_ops().begin();
2808     auto ILHS = C->lhs_exprs().begin();
2809     auto IRHS = C->rhs_exprs().begin();
2810     for (const auto *Ref : C->varlists()) {
2811       Data.ReductionVars.emplace_back(Ref);
2812       Data.ReductionCopies.emplace_back(*IPriv);
2813       Data.ReductionOps.emplace_back(*IRed);
2814       LHSs.emplace_back(*ILHS);
2815       RHSs.emplace_back(*IRHS);
2816       std::advance(IPriv, 1);
2817       std::advance(IRed, 1);
2818       std::advance(ILHS, 1);
2819       std::advance(IRHS, 1);
2820     }
2821   }
2822   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2823       *this, S.getLocStart(), LHSs, RHSs, Data);
2824   // Build list of dependences.
2825   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2826     for (auto *IRef : C->varlists())
2827       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2828   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
2829                     CapturedRegion](CodeGenFunction &CGF,
2830                                     PrePostActionTy &Action) {
2831     // Set proper addresses for generated private copies.
2832     OMPPrivateScope Scope(CGF);
2833     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2834         !Data.LastprivateVars.empty()) {
2835       enum { PrivatesParam = 2, CopyFnParam = 3 };
2836       auto *CopyFn = CGF.Builder.CreateLoad(
2837           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2838       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2839           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2840       // Map privates.
2841       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2842       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2843       CallArgs.push_back(PrivatesPtr);
2844       for (auto *E : Data.PrivateVars) {
2845         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2846         Address PrivatePtr = CGF.CreateMemTemp(
2847             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2848         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2849         CallArgs.push_back(PrivatePtr.getPointer());
2850       }
2851       for (auto *E : Data.FirstprivateVars) {
2852         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2853         Address PrivatePtr =
2854             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2855                               ".firstpriv.ptr.addr");
2856         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2857         CallArgs.push_back(PrivatePtr.getPointer());
2858       }
2859       for (auto *E : Data.LastprivateVars) {
2860         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2861         Address PrivatePtr =
2862             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2863                               ".lastpriv.ptr.addr");
2864         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2865         CallArgs.push_back(PrivatePtr.getPointer());
2866       }
2867       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2868                                                           CopyFn, CallArgs);
2869       for (auto &&Pair : LastprivateDstsOrigs) {
2870         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2871         DeclRefExpr DRE(
2872             const_cast<VarDecl *>(OrigVD),
2873             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2874                 OrigVD) != nullptr,
2875             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2876         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2877           return CGF.EmitLValue(&DRE).getAddress();
2878         });
2879       }
2880       for (auto &&Pair : PrivatePtrs) {
2881         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2882                             CGF.getContext().getDeclAlign(Pair.first));
2883         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2884       }
2885     }
2886     if (Data.Reductions) {
2887       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
2888       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2889                              Data.ReductionOps);
2890       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2891           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2892       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2893         RedCG.emitSharedLValue(CGF, Cnt);
2894         RedCG.emitAggregateType(CGF, Cnt);
2895         // FIXME: This must removed once the runtime library is fixed.
2896         // Emit required threadprivate variables for
2897         // initilizer/combiner/finalizer.
2898         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2899                                                            RedCG, Cnt);
2900         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2901             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2902         Replacement =
2903             Address(CGF.EmitScalarConversion(
2904                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2905                         CGF.getContext().getPointerType(
2906                             Data.ReductionCopies[Cnt]->getType()),
2907                         Data.ReductionCopies[Cnt]->getExprLoc()),
2908                     Replacement.getAlignment());
2909         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2910         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2911                          [Replacement]() { return Replacement; });
2912       }
2913     }
2914     // Privatize all private variables except for in_reduction items.
2915     (void)Scope.Privatize();
2916     SmallVector<const Expr *, 4> InRedVars;
2917     SmallVector<const Expr *, 4> InRedPrivs;
2918     SmallVector<const Expr *, 4> InRedOps;
2919     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2920     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2921       auto IPriv = C->privates().begin();
2922       auto IRed = C->reduction_ops().begin();
2923       auto ITD = C->taskgroup_descriptors().begin();
2924       for (const auto *Ref : C->varlists()) {
2925         InRedVars.emplace_back(Ref);
2926         InRedPrivs.emplace_back(*IPriv);
2927         InRedOps.emplace_back(*IRed);
2928         TaskgroupDescriptors.emplace_back(*ITD);
2929         std::advance(IPriv, 1);
2930         std::advance(IRed, 1);
2931         std::advance(ITD, 1);
2932       }
2933     }
2934     // Privatize in_reduction items here, because taskgroup descriptors must be
2935     // privatized earlier.
2936     OMPPrivateScope InRedScope(CGF);
2937     if (!InRedVars.empty()) {
2938       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2939       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2940         RedCG.emitSharedLValue(CGF, Cnt);
2941         RedCG.emitAggregateType(CGF, Cnt);
2942         // The taskgroup descriptor variable is always implicit firstprivate and
2943         // privatized already during procoessing of the firstprivates.
2944         // FIXME: This must removed once the runtime library is fixed.
2945         // Emit required threadprivate variables for
2946         // initilizer/combiner/finalizer.
2947         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2948                                                            RedCG, Cnt);
2949         llvm::Value *ReductionsPtr =
2950             CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
2951                                  TaskgroupDescriptors[Cnt]->getExprLoc());
2952         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2953             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2954         Replacement = Address(
2955             CGF.EmitScalarConversion(
2956                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2957                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2958                 InRedPrivs[Cnt]->getExprLoc()),
2959             Replacement.getAlignment());
2960         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2961         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2962                               [Replacement]() { return Replacement; });
2963       }
2964     }
2965     (void)InRedScope.Privatize();
2966 
2967     Action.Enter(CGF);
2968     BodyGen(CGF);
2969   };
2970   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2971       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2972       Data.NumberOfParts);
2973   OMPLexicalScope Scope(*this, S);
2974   TaskGen(*this, OutlinedFn, Data);
2975 }
2976 
2977 static ImplicitParamDecl *
2978 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
2979                                   QualType Ty, CapturedDecl *CD,
2980                                   SourceLocation Loc) {
2981   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
2982                                            ImplicitParamDecl::Other);
2983   auto *OrigRef = DeclRefExpr::Create(
2984       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
2985       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
2986   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
2987                                               ImplicitParamDecl::Other);
2988   auto *PrivateRef = DeclRefExpr::Create(
2989       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
2990       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
2991   QualType ElemType = C.getBaseElementType(Ty);
2992   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
2993                                            ImplicitParamDecl::Other);
2994   auto *InitRef = DeclRefExpr::Create(
2995       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
2996       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
2997   PrivateVD->setInitStyle(VarDecl::CInit);
2998   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
2999                                               InitRef, /*BasePath=*/nullptr,
3000                                               VK_RValue));
3001   Data.FirstprivateVars.emplace_back(OrigRef);
3002   Data.FirstprivateCopies.emplace_back(PrivateRef);
3003   Data.FirstprivateInits.emplace_back(InitRef);
3004   return OrigVD;
3005 }
3006 
3007 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
3008     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
3009     OMPTargetDataInfo &InputInfo) {
3010   // Emit outlined function for task construct.
3011   auto CS = S.getCapturedStmt(OMPD_task);
3012   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3013   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3014   auto *I = CS->getCapturedDecl()->param_begin();
3015   auto *PartId = std::next(I);
3016   auto *TaskT = std::next(I, 4);
3017   OMPTaskDataTy Data;
3018   // The task is not final.
3019   Data.Final.setInt(/*IntVal=*/false);
3020   // Get list of firstprivate variables.
3021   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3022     auto IRef = C->varlist_begin();
3023     auto IElemInitRef = C->inits().begin();
3024     for (auto *IInit : C->private_copies()) {
3025       Data.FirstprivateVars.push_back(*IRef);
3026       Data.FirstprivateCopies.push_back(IInit);
3027       Data.FirstprivateInits.push_back(*IElemInitRef);
3028       ++IRef;
3029       ++IElemInitRef;
3030     }
3031   }
3032   OMPPrivateScope TargetScope(*this);
3033   VarDecl *BPVD = nullptr;
3034   VarDecl *PVD = nullptr;
3035   VarDecl *SVD = nullptr;
3036   if (InputInfo.NumberOfTargetItems > 0) {
3037     auto *CD = CapturedDecl::Create(
3038         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
3039     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
3040     QualType BaseAndPointersType = getContext().getConstantArrayType(
3041         getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
3042         /*IndexTypeQuals=*/0);
3043     BPVD = createImplicitFirstprivateForType(
3044         getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
3045     PVD = createImplicitFirstprivateForType(
3046         getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
3047     QualType SizesType = getContext().getConstantArrayType(
3048         getContext().getSizeType(), ArrSize, ArrayType::Normal,
3049         /*IndexTypeQuals=*/0);
3050     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
3051                                             S.getLocStart());
3052     TargetScope.addPrivate(
3053         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
3054     TargetScope.addPrivate(PVD,
3055                            [&InputInfo]() { return InputInfo.PointersArray; });
3056     TargetScope.addPrivate(SVD,
3057                            [&InputInfo]() { return InputInfo.SizesArray; });
3058   }
3059   (void)TargetScope.Privatize();
3060   // Build list of dependences.
3061   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
3062     for (auto *IRef : C->varlists())
3063       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
3064   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
3065                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
3066     // Set proper addresses for generated private copies.
3067     OMPPrivateScope Scope(CGF);
3068     if (!Data.FirstprivateVars.empty()) {
3069       enum { PrivatesParam = 2, CopyFnParam = 3 };
3070       auto *CopyFn = CGF.Builder.CreateLoad(
3071           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
3072       auto *PrivatesPtr = CGF.Builder.CreateLoad(
3073           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
3074       // Map privates.
3075       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3076       llvm::SmallVector<llvm::Value *, 16> CallArgs;
3077       CallArgs.push_back(PrivatesPtr);
3078       for (auto *E : Data.FirstprivateVars) {
3079         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3080         Address PrivatePtr =
3081             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3082                               ".firstpriv.ptr.addr");
3083         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
3084         CallArgs.push_back(PrivatePtr.getPointer());
3085       }
3086       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3087                                                           CopyFn, CallArgs);
3088       for (auto &&Pair : PrivatePtrs) {
3089         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3090                             CGF.getContext().getDeclAlign(Pair.first));
3091         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3092       }
3093     }
3094     // Privatize all private variables except for in_reduction items.
3095     (void)Scope.Privatize();
3096     if (InputInfo.NumberOfTargetItems > 0) {
3097       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
3098           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
3099       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
3100           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
3101       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
3102           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
3103     }
3104 
3105     Action.Enter(CGF);
3106     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
3107     BodyGen(CGF);
3108   };
3109   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3110       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
3111       Data.NumberOfParts);
3112   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
3113   IntegerLiteral IfCond(getContext(), TrueOrFalse,
3114                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3115                         SourceLocation());
3116 
3117   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn,
3118                                       SharedsTy, CapturedStruct, &IfCond, Data);
3119 }
3120 
3121 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
3122   // Emit outlined function for task construct.
3123   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3124   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3125   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3126   const Expr *IfCond = nullptr;
3127   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3128     if (C->getNameModifier() == OMPD_unknown ||
3129         C->getNameModifier() == OMPD_task) {
3130       IfCond = C->getCondition();
3131       break;
3132     }
3133   }
3134 
3135   OMPTaskDataTy Data;
3136   // Check if we should emit tied or untied task.
3137   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
3138   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3139     CGF.EmitStmt(CS->getCapturedStmt());
3140   };
3141   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3142                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3143                             const OMPTaskDataTy &Data) {
3144     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
3145                                             SharedsTy, CapturedStruct, IfCond,
3146                                             Data);
3147   };
3148   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
3149 }
3150 
3151 void CodeGenFunction::EmitOMPTaskyieldDirective(
3152     const OMPTaskyieldDirective &S) {
3153   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
3154 }
3155 
3156 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3157   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
3158 }
3159 
3160 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3161   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
3162 }
3163 
3164 void CodeGenFunction::EmitOMPTaskgroupDirective(
3165     const OMPTaskgroupDirective &S) {
3166   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3167     Action.Enter(CGF);
3168     if (const Expr *E = S.getReductionRef()) {
3169       SmallVector<const Expr *, 4> LHSs;
3170       SmallVector<const Expr *, 4> RHSs;
3171       OMPTaskDataTy Data;
3172       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
3173         auto IPriv = C->privates().begin();
3174         auto IRed = C->reduction_ops().begin();
3175         auto ILHS = C->lhs_exprs().begin();
3176         auto IRHS = C->rhs_exprs().begin();
3177         for (const auto *Ref : C->varlists()) {
3178           Data.ReductionVars.emplace_back(Ref);
3179           Data.ReductionCopies.emplace_back(*IPriv);
3180           Data.ReductionOps.emplace_back(*IRed);
3181           LHSs.emplace_back(*ILHS);
3182           RHSs.emplace_back(*IRHS);
3183           std::advance(IPriv, 1);
3184           std::advance(IRed, 1);
3185           std::advance(ILHS, 1);
3186           std::advance(IRHS, 1);
3187         }
3188       }
3189       llvm::Value *ReductionDesc =
3190           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
3191                                                            LHSs, RHSs, Data);
3192       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3193       CGF.EmitVarDecl(*VD);
3194       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3195                             /*Volatile=*/false, E->getType());
3196     }
3197     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3198   };
3199   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3200   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3201 }
3202 
3203 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3204   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3205     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3206       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3207                                 FlushClause->varlist_end());
3208     }
3209     return llvm::None;
3210   }(), S.getLocStart());
3211 }
3212 
3213 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3214                                             const CodeGenLoopTy &CodeGenLoop,
3215                                             Expr *IncExpr) {
3216   // Emit the loop iteration variable.
3217   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3218   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3219   EmitVarDecl(*IVDecl);
3220 
3221   // Emit the iterations count variable.
3222   // If it is not a variable, Sema decided to calculate iterations count on each
3223   // iteration (e.g., it is foldable into a constant).
3224   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3225     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3226     // Emit calculation of the iterations count.
3227     EmitIgnoredExpr(S.getCalcLastIteration());
3228   }
3229 
3230   auto &RT = CGM.getOpenMPRuntime();
3231 
3232   bool HasLastprivateClause = false;
3233   // Check pre-condition.
3234   {
3235     OMPLoopScope PreInitScope(*this, S);
3236     // Skip the entire loop if we don't meet the precondition.
3237     // If the condition constant folds and can be elided, avoid emitting the
3238     // whole loop.
3239     bool CondConstant;
3240     llvm::BasicBlock *ContBlock = nullptr;
3241     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3242       if (!CondConstant)
3243         return;
3244     } else {
3245       auto *ThenBlock = createBasicBlock("omp.precond.then");
3246       ContBlock = createBasicBlock("omp.precond.end");
3247       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3248                   getProfileCount(&S));
3249       EmitBlock(ThenBlock);
3250       incrementProfileCounter(&S);
3251     }
3252 
3253     emitAlignedClause(*this, S);
3254     // Emit 'then' code.
3255     {
3256       // Emit helper vars inits.
3257 
3258       LValue LB = EmitOMPHelperVar(
3259           *this, cast<DeclRefExpr>(
3260                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3261                           ? S.getCombinedLowerBoundVariable()
3262                           : S.getLowerBoundVariable())));
3263       LValue UB = EmitOMPHelperVar(
3264           *this, cast<DeclRefExpr>(
3265                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3266                           ? S.getCombinedUpperBoundVariable()
3267                           : S.getUpperBoundVariable())));
3268       LValue ST =
3269           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3270       LValue IL =
3271           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3272 
3273       OMPPrivateScope LoopScope(*this);
3274       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3275         // Emit implicit barrier to synchronize threads and avoid data races
3276         // on initialization of firstprivate variables and post-update of
3277         // lastprivate variables.
3278         CGM.getOpenMPRuntime().emitBarrierCall(
3279             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3280             /*ForceSimpleCall=*/true);
3281       }
3282       EmitOMPPrivateClause(S, LoopScope);
3283       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3284           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3285           !isOpenMPTeamsDirective(S.getDirectiveKind()))
3286         EmitOMPReductionClauseInit(S, LoopScope);
3287       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3288       EmitOMPPrivateLoopCounters(S, LoopScope);
3289       (void)LoopScope.Privatize();
3290 
3291       // Detect the distribute schedule kind and chunk.
3292       llvm::Value *Chunk = nullptr;
3293       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3294       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3295         ScheduleKind = C->getDistScheduleKind();
3296         if (const auto *Ch = C->getChunkSize()) {
3297           Chunk = EmitScalarExpr(Ch);
3298           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3299                                        S.getIterationVariable()->getType(),
3300                                        S.getLocStart());
3301         }
3302       }
3303       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3304       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3305 
3306       // OpenMP [2.10.8, distribute Construct, Description]
3307       // If dist_schedule is specified, kind must be static. If specified,
3308       // iterations are divided into chunks of size chunk_size, chunks are
3309       // assigned to the teams of the league in a round-robin fashion in the
3310       // order of the team number. When no chunk_size is specified, the
3311       // iteration space is divided into chunks that are approximately equal
3312       // in size, and at most one chunk is distributed to each team of the
3313       // league. The size of the chunks is unspecified in this case.
3314       if (RT.isStaticNonchunked(ScheduleKind,
3315                                 /* Chunked */ Chunk != nullptr)) {
3316         if (isOpenMPSimdDirective(S.getDirectiveKind()))
3317           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
3318         CGOpenMPRuntime::StaticRTInput StaticInit(
3319             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3320             LB.getAddress(), UB.getAddress(), ST.getAddress());
3321         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3322                                     StaticInit);
3323         auto LoopExit =
3324             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3325         // UB = min(UB, GlobalUB);
3326         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3327                             ? S.getCombinedEnsureUpperBound()
3328                             : S.getEnsureUpperBound());
3329         // IV = LB;
3330         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3331                             ? S.getCombinedInit()
3332                             : S.getInit());
3333 
3334         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3335                          ? S.getCombinedCond()
3336                          : S.getCond();
3337 
3338         // for distribute alone,  codegen
3339         // while (idx <= UB) { BODY; ++idx; }
3340         // when combined with 'for' (e.g. as in 'distribute parallel for')
3341         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3342         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3343                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3344                            CodeGenLoop(CGF, S, LoopExit);
3345                          },
3346                          [](CodeGenFunction &) {});
3347         EmitBlock(LoopExit.getBlock());
3348         // Tell the runtime we are done.
3349         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3350       } else {
3351         // Emit the outer loop, which requests its work chunk [LB..UB] from
3352         // runtime and runs the inner loop to process it.
3353         const OMPLoopArguments LoopArguments = {
3354             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3355             Chunk};
3356         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3357                                    CodeGenLoop);
3358       }
3359       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3360         EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3361           return CGF.Builder.CreateIsNotNull(
3362               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3363         });
3364       }
3365       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3366           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3367           !isOpenMPTeamsDirective(S.getDirectiveKind())) {
3368         OpenMPDirectiveKind ReductionKind = OMPD_unknown;
3369         if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
3370             isOpenMPSimdDirective(S.getDirectiveKind())) {
3371           ReductionKind = OMPD_parallel_for_simd;
3372         } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3373           ReductionKind = OMPD_parallel_for;
3374         } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3375           ReductionKind = OMPD_simd;
3376         } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
3377                    S.hasClausesOfKind<OMPReductionClause>()) {
3378           llvm_unreachable(
3379               "No reduction clauses is allowed in distribute directive.");
3380         }
3381         EmitOMPReductionClauseFinal(S, ReductionKind);
3382         // Emit post-update of the reduction variables if IsLastIter != 0.
3383         emitPostUpdateForReductionClause(
3384             *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3385               return CGF.Builder.CreateIsNotNull(
3386                   CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3387             });
3388       }
3389       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3390       if (HasLastprivateClause) {
3391         EmitOMPLastprivateClauseFinal(
3392             S, /*NoFinals=*/false,
3393             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
3394       }
3395     }
3396 
3397     // We're now done with the loop, so jump to the continuation block.
3398     if (ContBlock) {
3399       EmitBranch(ContBlock);
3400       EmitBlock(ContBlock, true);
3401     }
3402   }
3403 }
3404 
3405 void CodeGenFunction::EmitOMPDistributeDirective(
3406     const OMPDistributeDirective &S) {
3407   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3408 
3409     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3410   };
3411   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3412   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3413 }
3414 
3415 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3416                                                    const CapturedStmt *S) {
3417   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3418   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3419   CGF.CapturedStmtInfo = &CapStmtInfo;
3420   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3421   Fn->addFnAttr(llvm::Attribute::NoInline);
3422   return Fn;
3423 }
3424 
3425 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3426   if (S.hasClausesOfKind<OMPDependClause>()) {
3427     assert(!S.getAssociatedStmt() &&
3428            "No associated statement must be in ordered depend construct.");
3429     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3430       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3431     return;
3432   }
3433   auto *C = S.getSingleClause<OMPSIMDClause>();
3434   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3435                                  PrePostActionTy &Action) {
3436     const CapturedStmt *CS = S.getInnermostCapturedStmt();
3437     if (C) {
3438       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3439       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3440       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3441       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3442                                                       OutlinedFn, CapturedVars);
3443     } else {
3444       Action.Enter(CGF);
3445       CGF.EmitStmt(CS->getCapturedStmt());
3446     }
3447   };
3448   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3449   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3450 }
3451 
3452 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3453                                          QualType SrcType, QualType DestType,
3454                                          SourceLocation Loc) {
3455   assert(CGF.hasScalarEvaluationKind(DestType) &&
3456          "DestType must have scalar evaluation kind.");
3457   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3458   return Val.isScalar()
3459              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3460                                         Loc)
3461              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3462                                                  DestType, Loc);
3463 }
3464 
3465 static CodeGenFunction::ComplexPairTy
3466 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3467                       QualType DestType, SourceLocation Loc) {
3468   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3469          "DestType must have complex evaluation kind.");
3470   CodeGenFunction::ComplexPairTy ComplexVal;
3471   if (Val.isScalar()) {
3472     // Convert the input element to the element type of the complex.
3473     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3474     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3475                                               DestElementType, Loc);
3476     ComplexVal = CodeGenFunction::ComplexPairTy(
3477         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3478   } else {
3479     assert(Val.isComplex() && "Must be a scalar or complex.");
3480     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3481     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3482     ComplexVal.first = CGF.EmitScalarConversion(
3483         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3484     ComplexVal.second = CGF.EmitScalarConversion(
3485         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3486   }
3487   return ComplexVal;
3488 }
3489 
3490 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3491                                   LValue LVal, RValue RVal) {
3492   if (LVal.isGlobalReg()) {
3493     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3494   } else {
3495     CGF.EmitAtomicStore(RVal, LVal,
3496                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3497                                  : llvm::AtomicOrdering::Monotonic,
3498                         LVal.isVolatile(), /*IsInit=*/false);
3499   }
3500 }
3501 
3502 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3503                                          QualType RValTy, SourceLocation Loc) {
3504   switch (getEvaluationKind(LVal.getType())) {
3505   case TEK_Scalar:
3506     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3507                                *this, RVal, RValTy, LVal.getType(), Loc)),
3508                            LVal);
3509     break;
3510   case TEK_Complex:
3511     EmitStoreOfComplex(
3512         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3513         /*isInit=*/false);
3514     break;
3515   case TEK_Aggregate:
3516     llvm_unreachable("Must be a scalar or complex.");
3517   }
3518 }
3519 
3520 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3521                                   const Expr *X, const Expr *V,
3522                                   SourceLocation Loc) {
3523   // v = x;
3524   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3525   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3526   LValue XLValue = CGF.EmitLValue(X);
3527   LValue VLValue = CGF.EmitLValue(V);
3528   RValue Res = XLValue.isGlobalReg()
3529                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3530                    : CGF.EmitAtomicLoad(
3531                          XLValue, Loc,
3532                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3533                                   : llvm::AtomicOrdering::Monotonic,
3534                          XLValue.isVolatile());
3535   // OpenMP, 2.12.6, atomic Construct
3536   // Any atomic construct with a seq_cst clause forces the atomically
3537   // performed operation to include an implicit flush operation without a
3538   // list.
3539   if (IsSeqCst)
3540     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3541   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3542 }
3543 
3544 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3545                                    const Expr *X, const Expr *E,
3546                                    SourceLocation Loc) {
3547   // x = expr;
3548   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3549   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3550   // OpenMP, 2.12.6, atomic Construct
3551   // Any atomic construct with a seq_cst clause forces the atomically
3552   // performed operation to include an implicit flush operation without a
3553   // list.
3554   if (IsSeqCst)
3555     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3556 }
3557 
3558 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3559                                                 RValue Update,
3560                                                 BinaryOperatorKind BO,
3561                                                 llvm::AtomicOrdering AO,
3562                                                 bool IsXLHSInRHSPart) {
3563   auto &Context = CGF.CGM.getContext();
3564   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3565   // expression is simple and atomic is allowed for the given type for the
3566   // target platform.
3567   if (BO == BO_Comma || !Update.isScalar() ||
3568       !Update.getScalarVal()->getType()->isIntegerTy() ||
3569       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3570                         (Update.getScalarVal()->getType() !=
3571                          X.getAddress().getElementType())) ||
3572       !X.getAddress().getElementType()->isIntegerTy() ||
3573       !Context.getTargetInfo().hasBuiltinAtomic(
3574           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3575     return std::make_pair(false, RValue::get(nullptr));
3576 
3577   llvm::AtomicRMWInst::BinOp RMWOp;
3578   switch (BO) {
3579   case BO_Add:
3580     RMWOp = llvm::AtomicRMWInst::Add;
3581     break;
3582   case BO_Sub:
3583     if (!IsXLHSInRHSPart)
3584       return std::make_pair(false, RValue::get(nullptr));
3585     RMWOp = llvm::AtomicRMWInst::Sub;
3586     break;
3587   case BO_And:
3588     RMWOp = llvm::AtomicRMWInst::And;
3589     break;
3590   case BO_Or:
3591     RMWOp = llvm::AtomicRMWInst::Or;
3592     break;
3593   case BO_Xor:
3594     RMWOp = llvm::AtomicRMWInst::Xor;
3595     break;
3596   case BO_LT:
3597     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3598                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3599                                    : llvm::AtomicRMWInst::Max)
3600                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3601                                    : llvm::AtomicRMWInst::UMax);
3602     break;
3603   case BO_GT:
3604     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3605                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3606                                    : llvm::AtomicRMWInst::Min)
3607                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3608                                    : llvm::AtomicRMWInst::UMin);
3609     break;
3610   case BO_Assign:
3611     RMWOp = llvm::AtomicRMWInst::Xchg;
3612     break;
3613   case BO_Mul:
3614   case BO_Div:
3615   case BO_Rem:
3616   case BO_Shl:
3617   case BO_Shr:
3618   case BO_LAnd:
3619   case BO_LOr:
3620     return std::make_pair(false, RValue::get(nullptr));
3621   case BO_PtrMemD:
3622   case BO_PtrMemI:
3623   case BO_LE:
3624   case BO_GE:
3625   case BO_EQ:
3626   case BO_NE:
3627   case BO_Cmp:
3628   case BO_AddAssign:
3629   case BO_SubAssign:
3630   case BO_AndAssign:
3631   case BO_OrAssign:
3632   case BO_XorAssign:
3633   case BO_MulAssign:
3634   case BO_DivAssign:
3635   case BO_RemAssign:
3636   case BO_ShlAssign:
3637   case BO_ShrAssign:
3638   case BO_Comma:
3639     llvm_unreachable("Unsupported atomic update operation");
3640   }
3641   auto *UpdateVal = Update.getScalarVal();
3642   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3643     UpdateVal = CGF.Builder.CreateIntCast(
3644         IC, X.getAddress().getElementType(),
3645         X.getType()->hasSignedIntegerRepresentation());
3646   }
3647   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3648   return std::make_pair(true, RValue::get(Res));
3649 }
3650 
3651 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3652     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3653     llvm::AtomicOrdering AO, SourceLocation Loc,
3654     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3655   // Update expressions are allowed to have the following forms:
3656   // x binop= expr; -> xrval + expr;
3657   // x++, ++x -> xrval + 1;
3658   // x--, --x -> xrval - 1;
3659   // x = x binop expr; -> xrval binop expr
3660   // x = expr Op x; - > expr binop xrval;
3661   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3662   if (!Res.first) {
3663     if (X.isGlobalReg()) {
3664       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3665       // 'xrval'.
3666       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3667     } else {
3668       // Perform compare-and-swap procedure.
3669       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3670     }
3671   }
3672   return Res;
3673 }
3674 
3675 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3676                                     const Expr *X, const Expr *E,
3677                                     const Expr *UE, bool IsXLHSInRHSPart,
3678                                     SourceLocation Loc) {
3679   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3680          "Update expr in 'atomic update' must be a binary operator.");
3681   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3682   // Update expressions are allowed to have the following forms:
3683   // x binop= expr; -> xrval + expr;
3684   // x++, ++x -> xrval + 1;
3685   // x--, --x -> xrval - 1;
3686   // x = x binop expr; -> xrval binop expr
3687   // x = expr Op x; - > expr binop xrval;
3688   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3689   LValue XLValue = CGF.EmitLValue(X);
3690   RValue ExprRValue = CGF.EmitAnyExpr(E);
3691   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3692                      : llvm::AtomicOrdering::Monotonic;
3693   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3694   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3695   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3696   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3697   auto Gen =
3698       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3699         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3700         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3701         return CGF.EmitAnyExpr(UE);
3702       };
3703   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3704       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3705   // OpenMP, 2.12.6, atomic Construct
3706   // Any atomic construct with a seq_cst clause forces the atomically
3707   // performed operation to include an implicit flush operation without a
3708   // list.
3709   if (IsSeqCst)
3710     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3711 }
3712 
3713 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3714                             QualType SourceType, QualType ResType,
3715                             SourceLocation Loc) {
3716   switch (CGF.getEvaluationKind(ResType)) {
3717   case TEK_Scalar:
3718     return RValue::get(
3719         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3720   case TEK_Complex: {
3721     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3722     return RValue::getComplex(Res.first, Res.second);
3723   }
3724   case TEK_Aggregate:
3725     break;
3726   }
3727   llvm_unreachable("Must be a scalar or complex.");
3728 }
3729 
3730 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3731                                      bool IsPostfixUpdate, const Expr *V,
3732                                      const Expr *X, const Expr *E,
3733                                      const Expr *UE, bool IsXLHSInRHSPart,
3734                                      SourceLocation Loc) {
3735   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3736   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3737   RValue NewVVal;
3738   LValue VLValue = CGF.EmitLValue(V);
3739   LValue XLValue = CGF.EmitLValue(X);
3740   RValue ExprRValue = CGF.EmitAnyExpr(E);
3741   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3742                      : llvm::AtomicOrdering::Monotonic;
3743   QualType NewVValType;
3744   if (UE) {
3745     // 'x' is updated with some additional value.
3746     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3747            "Update expr in 'atomic capture' must be a binary operator.");
3748     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3749     // Update expressions are allowed to have the following forms:
3750     // x binop= expr; -> xrval + expr;
3751     // x++, ++x -> xrval + 1;
3752     // x--, --x -> xrval - 1;
3753     // x = x binop expr; -> xrval binop expr
3754     // x = expr Op x; - > expr binop xrval;
3755     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3756     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3757     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3758     NewVValType = XRValExpr->getType();
3759     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3760     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3761                   IsPostfixUpdate](RValue XRValue) -> RValue {
3762       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3763       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3764       RValue Res = CGF.EmitAnyExpr(UE);
3765       NewVVal = IsPostfixUpdate ? XRValue : Res;
3766       return Res;
3767     };
3768     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3769         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3770     if (Res.first) {
3771       // 'atomicrmw' instruction was generated.
3772       if (IsPostfixUpdate) {
3773         // Use old value from 'atomicrmw'.
3774         NewVVal = Res.second;
3775       } else {
3776         // 'atomicrmw' does not provide new value, so evaluate it using old
3777         // value of 'x'.
3778         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3779         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3780         NewVVal = CGF.EmitAnyExpr(UE);
3781       }
3782     }
3783   } else {
3784     // 'x' is simply rewritten with some 'expr'.
3785     NewVValType = X->getType().getNonReferenceType();
3786     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3787                                X->getType().getNonReferenceType(), Loc);
3788     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3789       NewVVal = XRValue;
3790       return ExprRValue;
3791     };
3792     // Try to perform atomicrmw xchg, otherwise simple exchange.
3793     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3794         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3795         Loc, Gen);
3796     if (Res.first) {
3797       // 'atomicrmw' instruction was generated.
3798       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3799     }
3800   }
3801   // Emit post-update store to 'v' of old/new 'x' value.
3802   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3803   // OpenMP, 2.12.6, atomic Construct
3804   // Any atomic construct with a seq_cst clause forces the atomically
3805   // performed operation to include an implicit flush operation without a
3806   // list.
3807   if (IsSeqCst)
3808     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3809 }
3810 
3811 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3812                               bool IsSeqCst, bool IsPostfixUpdate,
3813                               const Expr *X, const Expr *V, const Expr *E,
3814                               const Expr *UE, bool IsXLHSInRHSPart,
3815                               SourceLocation Loc) {
3816   switch (Kind) {
3817   case OMPC_read:
3818     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3819     break;
3820   case OMPC_write:
3821     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3822     break;
3823   case OMPC_unknown:
3824   case OMPC_update:
3825     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3826     break;
3827   case OMPC_capture:
3828     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3829                              IsXLHSInRHSPart, Loc);
3830     break;
3831   case OMPC_if:
3832   case OMPC_final:
3833   case OMPC_num_threads:
3834   case OMPC_private:
3835   case OMPC_firstprivate:
3836   case OMPC_lastprivate:
3837   case OMPC_reduction:
3838   case OMPC_task_reduction:
3839   case OMPC_in_reduction:
3840   case OMPC_safelen:
3841   case OMPC_simdlen:
3842   case OMPC_collapse:
3843   case OMPC_default:
3844   case OMPC_seq_cst:
3845   case OMPC_shared:
3846   case OMPC_linear:
3847   case OMPC_aligned:
3848   case OMPC_copyin:
3849   case OMPC_copyprivate:
3850   case OMPC_flush:
3851   case OMPC_proc_bind:
3852   case OMPC_schedule:
3853   case OMPC_ordered:
3854   case OMPC_nowait:
3855   case OMPC_untied:
3856   case OMPC_threadprivate:
3857   case OMPC_depend:
3858   case OMPC_mergeable:
3859   case OMPC_device:
3860   case OMPC_threads:
3861   case OMPC_simd:
3862   case OMPC_map:
3863   case OMPC_num_teams:
3864   case OMPC_thread_limit:
3865   case OMPC_priority:
3866   case OMPC_grainsize:
3867   case OMPC_nogroup:
3868   case OMPC_num_tasks:
3869   case OMPC_hint:
3870   case OMPC_dist_schedule:
3871   case OMPC_defaultmap:
3872   case OMPC_uniform:
3873   case OMPC_to:
3874   case OMPC_from:
3875   case OMPC_use_device_ptr:
3876   case OMPC_is_device_ptr:
3877     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3878   }
3879 }
3880 
3881 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3882   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3883   OpenMPClauseKind Kind = OMPC_unknown;
3884   for (auto *C : S.clauses()) {
3885     // Find first clause (skip seq_cst clause, if it is first).
3886     if (C->getClauseKind() != OMPC_seq_cst) {
3887       Kind = C->getClauseKind();
3888       break;
3889     }
3890   }
3891 
3892   const auto *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
3893   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3894     enterFullExpression(EWC);
3895   }
3896   // Processing for statements under 'atomic capture'.
3897   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3898     for (const auto *C : Compound->body()) {
3899       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3900         enterFullExpression(EWC);
3901       }
3902     }
3903   }
3904 
3905   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3906                                             PrePostActionTy &) {
3907     CGF.EmitStopPoint(CS);
3908     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3909                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3910                       S.isXLHSInRHSPart(), S.getLocStart());
3911   };
3912   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3913   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3914 }
3915 
3916 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3917                                          const OMPExecutableDirective &S,
3918                                          const RegionCodeGenTy &CodeGen) {
3919   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3920   CodeGenModule &CGM = CGF.CGM;
3921 
3922   // On device emit this construct as inlined code.
3923   if (CGM.getLangOpts().OpenMPIsDevice) {
3924     OMPLexicalScope Scope(CGF, S, OMPD_target);
3925     CGM.getOpenMPRuntime().emitInlinedDirective(
3926         CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3927           CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
3928         });
3929     return;
3930   }
3931 
3932   llvm::Function *Fn = nullptr;
3933   llvm::Constant *FnID = nullptr;
3934 
3935   const Expr *IfCond = nullptr;
3936   // Check for the at most one if clause associated with the target region.
3937   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3938     if (C->getNameModifier() == OMPD_unknown ||
3939         C->getNameModifier() == OMPD_target) {
3940       IfCond = C->getCondition();
3941       break;
3942     }
3943   }
3944 
3945   // Check if we have any device clause associated with the directive.
3946   const Expr *Device = nullptr;
3947   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3948     Device = C->getDevice();
3949   }
3950 
3951   // Check if we have an if clause whose conditional always evaluates to false
3952   // or if we do not have any targets specified. If so the target region is not
3953   // an offload entry point.
3954   bool IsOffloadEntry = true;
3955   if (IfCond) {
3956     bool Val;
3957     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3958       IsOffloadEntry = false;
3959   }
3960   if (CGM.getLangOpts().OMPTargetTriples.empty())
3961     IsOffloadEntry = false;
3962 
3963   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3964   StringRef ParentName;
3965   // In case we have Ctors/Dtors we use the complete type variant to produce
3966   // the mangling of the device outlined kernel.
3967   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3968     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3969   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3970     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3971   else
3972     ParentName =
3973         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3974 
3975   // Emit target region as a standalone region.
3976   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3977                                                     IsOffloadEntry, CodeGen);
3978   OMPLexicalScope Scope(CGF, S, OMPD_task);
3979   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
3980 }
3981 
3982 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3983                              PrePostActionTy &Action) {
3984   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3985   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3986   CGF.EmitOMPPrivateClause(S, PrivateScope);
3987   (void)PrivateScope.Privatize();
3988 
3989   Action.Enter(CGF);
3990   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
3991 }
3992 
3993 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3994                                                   StringRef ParentName,
3995                                                   const OMPTargetDirective &S) {
3996   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3997     emitTargetRegion(CGF, S, Action);
3998   };
3999   llvm::Function *Fn;
4000   llvm::Constant *Addr;
4001   // Emit target region as a standalone region.
4002   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4003       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4004   assert(Fn && Addr && "Target device function emission failed.");
4005 }
4006 
4007 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
4008   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4009     emitTargetRegion(CGF, S, Action);
4010   };
4011   emitCommonOMPTargetDirective(*this, S, CodeGen);
4012 }
4013 
4014 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
4015                                         const OMPExecutableDirective &S,
4016                                         OpenMPDirectiveKind InnermostKind,
4017                                         const RegionCodeGenTy &CodeGen) {
4018   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
4019   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
4020       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
4021 
4022   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
4023   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
4024   if (NT || TL) {
4025     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
4026     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
4027 
4028     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
4029                                                   S.getLocStart());
4030   }
4031 
4032   OMPTeamsScope Scope(CGF, S);
4033   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4034   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4035   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
4036                                            CapturedVars);
4037 }
4038 
4039 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
4040   // Emit teams region as a standalone region.
4041   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4042     OMPPrivateScope PrivateScope(CGF);
4043     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4044     CGF.EmitOMPPrivateClause(S, PrivateScope);
4045     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4046     (void)PrivateScope.Privatize();
4047     Action.Enter(CGF);
4048     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
4049     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4050   };
4051   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4052   emitPostUpdateForReductionClause(
4053       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4054 }
4055 
4056 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4057                                   const OMPTargetTeamsDirective &S) {
4058   auto *CS = S.getCapturedStmt(OMPD_teams);
4059   Action.Enter(CGF);
4060   // Emit teams region as a standalone region.
4061   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4062     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4063     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4064     CGF.EmitOMPPrivateClause(S, PrivateScope);
4065     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4066     (void)PrivateScope.Privatize();
4067     Action.Enter(CGF);
4068     CGF.EmitStmt(CS->getCapturedStmt());
4069     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4070   };
4071   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
4072   emitPostUpdateForReductionClause(
4073       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4074 }
4075 
4076 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
4077     CodeGenModule &CGM, StringRef ParentName,
4078     const OMPTargetTeamsDirective &S) {
4079   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4080     emitTargetTeamsRegion(CGF, Action, S);
4081   };
4082   llvm::Function *Fn;
4083   llvm::Constant *Addr;
4084   // Emit target region as a standalone region.
4085   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4086       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4087   assert(Fn && Addr && "Target device function emission failed.");
4088 }
4089 
4090 void CodeGenFunction::EmitOMPTargetTeamsDirective(
4091     const OMPTargetTeamsDirective &S) {
4092   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4093     emitTargetTeamsRegion(CGF, Action, S);
4094   };
4095   emitCommonOMPTargetDirective(*this, S, CodeGen);
4096 }
4097 
4098 static void
4099 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4100                                 const OMPTargetTeamsDistributeDirective &S) {
4101   Action.Enter(CGF);
4102   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4103     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4104   };
4105 
4106   // Emit teams region as a standalone region.
4107   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4108                                             PrePostActionTy &Action) {
4109     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4110     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4111     (void)PrivateScope.Privatize();
4112     Action.Enter(CGF);
4113     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4114                                                     CodeGenDistribute);
4115     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4116   };
4117   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
4118   emitPostUpdateForReductionClause(CGF, S,
4119                                    [](CodeGenFunction &) { return nullptr; });
4120 }
4121 
4122 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
4123     CodeGenModule &CGM, StringRef ParentName,
4124     const OMPTargetTeamsDistributeDirective &S) {
4125   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4126     emitTargetTeamsDistributeRegion(CGF, Action, S);
4127   };
4128   llvm::Function *Fn;
4129   llvm::Constant *Addr;
4130   // Emit target region as a standalone region.
4131   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4132       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4133   assert(Fn && Addr && "Target device function emission failed.");
4134 }
4135 
4136 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
4137     const OMPTargetTeamsDistributeDirective &S) {
4138   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4139     emitTargetTeamsDistributeRegion(CGF, Action, S);
4140   };
4141   emitCommonOMPTargetDirective(*this, S, CodeGen);
4142 }
4143 
4144 static void emitTargetTeamsDistributeSimdRegion(
4145     CodeGenFunction &CGF, PrePostActionTy &Action,
4146     const OMPTargetTeamsDistributeSimdDirective &S) {
4147   Action.Enter(CGF);
4148   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4149     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4150   };
4151 
4152   // Emit teams region as a standalone region.
4153   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4154                                             PrePostActionTy &Action) {
4155     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4156     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4157     (void)PrivateScope.Privatize();
4158     Action.Enter(CGF);
4159     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4160                                                     CodeGenDistribute);
4161     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4162   };
4163   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
4164   emitPostUpdateForReductionClause(CGF, S,
4165                                    [](CodeGenFunction &) { return nullptr; });
4166 }
4167 
4168 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
4169     CodeGenModule &CGM, StringRef ParentName,
4170     const OMPTargetTeamsDistributeSimdDirective &S) {
4171   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4172     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4173   };
4174   llvm::Function *Fn;
4175   llvm::Constant *Addr;
4176   // Emit target region as a standalone region.
4177   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4178       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4179   assert(Fn && Addr && "Target device function emission failed.");
4180 }
4181 
4182 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
4183     const OMPTargetTeamsDistributeSimdDirective &S) {
4184   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4185     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4186   };
4187   emitCommonOMPTargetDirective(*this, S, CodeGen);
4188 }
4189 
4190 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
4191     const OMPTeamsDistributeDirective &S) {
4192 
4193   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4194     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4195   };
4196 
4197   // Emit teams region as a standalone region.
4198   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4199                                             PrePostActionTy &Action) {
4200     OMPPrivateScope PrivateScope(CGF);
4201     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4202     (void)PrivateScope.Privatize();
4203     Action.Enter(CGF);
4204     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4205                                                     CodeGenDistribute);
4206     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4207   };
4208   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4209   emitPostUpdateForReductionClause(*this, S,
4210                                    [](CodeGenFunction &) { return nullptr; });
4211 }
4212 
4213 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
4214     const OMPTeamsDistributeSimdDirective &S) {
4215   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4216     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4217   };
4218 
4219   // Emit teams region as a standalone region.
4220   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4221                                             PrePostActionTy &Action) {
4222     OMPPrivateScope PrivateScope(CGF);
4223     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4224     (void)PrivateScope.Privatize();
4225     Action.Enter(CGF);
4226     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
4227                                                     CodeGenDistribute);
4228     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4229   };
4230   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
4231   emitPostUpdateForReductionClause(*this, S,
4232                                    [](CodeGenFunction &) { return nullptr; });
4233 }
4234 
4235 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
4236     const OMPTeamsDistributeParallelForDirective &S) {
4237   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4238     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4239                               S.getDistInc());
4240   };
4241 
4242   // Emit teams region as a standalone region.
4243   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4244                                             PrePostActionTy &Action) {
4245     OMPPrivateScope PrivateScope(CGF);
4246     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4247     (void)PrivateScope.Privatize();
4248     Action.Enter(CGF);
4249     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4250                                                     CodeGenDistribute);
4251     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4252   };
4253   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4254   emitPostUpdateForReductionClause(*this, S,
4255                                    [](CodeGenFunction &) { return nullptr; });
4256 }
4257 
4258 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
4259     const OMPTeamsDistributeParallelForSimdDirective &S) {
4260   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4261     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4262                               S.getDistInc());
4263   };
4264 
4265   // Emit teams region as a standalone region.
4266   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4267                                             PrePostActionTy &Action) {
4268     OMPPrivateScope PrivateScope(CGF);
4269     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4270     (void)PrivateScope.Privatize();
4271     Action.Enter(CGF);
4272     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4273         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4274     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4275   };
4276   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4277   emitPostUpdateForReductionClause(*this, S,
4278                                    [](CodeGenFunction &) { return nullptr; });
4279 }
4280 
4281 static void emitTargetTeamsDistributeParallelForRegion(
4282     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
4283     PrePostActionTy &Action) {
4284   Action.Enter(CGF);
4285   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4286     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4287                               S.getDistInc());
4288   };
4289 
4290   // Emit teams region as a standalone region.
4291   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4292                                                  PrePostActionTy &Action) {
4293     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4294     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4295     (void)PrivateScope.Privatize();
4296     Action.Enter(CGF);
4297     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4298         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4299     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4300   };
4301 
4302   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
4303                               CodeGenTeams);
4304   emitPostUpdateForReductionClause(CGF, S,
4305                                    [](CodeGenFunction &) { return nullptr; });
4306 }
4307 
4308 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
4309     CodeGenModule &CGM, StringRef ParentName,
4310     const OMPTargetTeamsDistributeParallelForDirective &S) {
4311   // Emit SPMD target teams distribute parallel for region as a standalone
4312   // region.
4313   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4314     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4315   };
4316   llvm::Function *Fn;
4317   llvm::Constant *Addr;
4318   // Emit target region as a standalone region.
4319   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4320       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4321   assert(Fn && Addr && "Target device function emission failed.");
4322 }
4323 
4324 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
4325     const OMPTargetTeamsDistributeParallelForDirective &S) {
4326   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4327     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4328   };
4329   emitCommonOMPTargetDirective(*this, S, CodeGen);
4330 }
4331 
4332 static void emitTargetTeamsDistributeParallelForSimdRegion(
4333     CodeGenFunction &CGF,
4334     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
4335     PrePostActionTy &Action) {
4336   Action.Enter(CGF);
4337   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4338     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4339                               S.getDistInc());
4340   };
4341 
4342   // Emit teams region as a standalone region.
4343   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4344                                                  PrePostActionTy &Action) {
4345     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4346     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4347     (void)PrivateScope.Privatize();
4348     Action.Enter(CGF);
4349     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4350         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4351     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4352   };
4353 
4354   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
4355                               CodeGenTeams);
4356   emitPostUpdateForReductionClause(CGF, S,
4357                                    [](CodeGenFunction &) { return nullptr; });
4358 }
4359 
4360 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
4361     CodeGenModule &CGM, StringRef ParentName,
4362     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4363   // Emit SPMD target teams distribute parallel for simd region as a standalone
4364   // region.
4365   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4366     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4367   };
4368   llvm::Function *Fn;
4369   llvm::Constant *Addr;
4370   // Emit target region as a standalone region.
4371   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4372       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4373   assert(Fn && Addr && "Target device function emission failed.");
4374 }
4375 
4376 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
4377     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4378   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4379     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4380   };
4381   emitCommonOMPTargetDirective(*this, S, CodeGen);
4382 }
4383 
4384 void CodeGenFunction::EmitOMPCancellationPointDirective(
4385     const OMPCancellationPointDirective &S) {
4386   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
4387                                                    S.getCancelRegion());
4388 }
4389 
4390 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
4391   const Expr *IfCond = nullptr;
4392   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4393     if (C->getNameModifier() == OMPD_unknown ||
4394         C->getNameModifier() == OMPD_cancel) {
4395       IfCond = C->getCondition();
4396       break;
4397     }
4398   }
4399   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
4400                                         S.getCancelRegion());
4401 }
4402 
4403 CodeGenFunction::JumpDest
4404 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
4405   if (Kind == OMPD_parallel || Kind == OMPD_task ||
4406       Kind == OMPD_target_parallel)
4407     return ReturnBlock;
4408   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
4409          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
4410          Kind == OMPD_distribute_parallel_for ||
4411          Kind == OMPD_target_parallel_for ||
4412          Kind == OMPD_teams_distribute_parallel_for ||
4413          Kind == OMPD_target_teams_distribute_parallel_for);
4414   return OMPCancelStack.getExitBlock();
4415 }
4416 
4417 void CodeGenFunction::EmitOMPUseDevicePtrClause(
4418     const OMPClause &NC, OMPPrivateScope &PrivateScope,
4419     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
4420   const auto &C = cast<OMPUseDevicePtrClause>(NC);
4421   auto OrigVarIt = C.varlist_begin();
4422   auto InitIt = C.inits().begin();
4423   for (auto PvtVarIt : C.private_copies()) {
4424     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
4425     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
4426     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
4427 
4428     // In order to identify the right initializer we need to match the
4429     // declaration used by the mapping logic. In some cases we may get
4430     // OMPCapturedExprDecl that refers to the original declaration.
4431     const ValueDecl *MatchingVD = OrigVD;
4432     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
4433       // OMPCapturedExprDecl are used to privative fields of the current
4434       // structure.
4435       auto *ME = cast<MemberExpr>(OED->getInit());
4436       assert(isa<CXXThisExpr>(ME->getBase()) &&
4437              "Base should be the current struct!");
4438       MatchingVD = ME->getMemberDecl();
4439     }
4440 
4441     // If we don't have information about the current list item, move on to
4442     // the next one.
4443     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
4444     if (InitAddrIt == CaptureDeviceAddrMap.end())
4445       continue;
4446 
4447     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
4448       // Initialize the temporary initialization variable with the address we
4449       // get from the runtime library. We have to cast the source address
4450       // because it is always a void *. References are materialized in the
4451       // privatization scope, so the initialization here disregards the fact
4452       // the original variable is a reference.
4453       QualType AddrQTy =
4454           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
4455       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
4456       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
4457       setAddrOfLocalVar(InitVD, InitAddr);
4458 
4459       // Emit private declaration, it will be initialized by the value we
4460       // declaration we just added to the local declarations map.
4461       EmitDecl(*PvtVD);
4462 
4463       // The initialization variables reached its purpose in the emission
4464       // ofthe previous declaration, so we don't need it anymore.
4465       LocalDeclMap.erase(InitVD);
4466 
4467       // Return the address of the private variable.
4468       return GetAddrOfLocalVar(PvtVD);
4469     });
4470     assert(IsRegistered && "firstprivate var already registered as private");
4471     // Silence the warning about unused variable.
4472     (void)IsRegistered;
4473 
4474     ++OrigVarIt;
4475     ++InitIt;
4476   }
4477 }
4478 
4479 // Generate the instructions for '#pragma omp target data' directive.
4480 void CodeGenFunction::EmitOMPTargetDataDirective(
4481     const OMPTargetDataDirective &S) {
4482   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4483 
4484   // Create a pre/post action to signal the privatization of the device pointer.
4485   // This action can be replaced by the OpenMP runtime code generation to
4486   // deactivate privatization.
4487   bool PrivatizeDevicePointers = false;
4488   class DevicePointerPrivActionTy : public PrePostActionTy {
4489     bool &PrivatizeDevicePointers;
4490 
4491   public:
4492     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4493         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4494     void Enter(CodeGenFunction &CGF) override {
4495       PrivatizeDevicePointers = true;
4496     }
4497   };
4498   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4499 
4500   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4501                        CodeGenFunction &CGF, PrePostActionTy &Action) {
4502     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4503       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4504     };
4505 
4506     // Codegen that selects wheather to generate the privatization code or not.
4507     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4508                           &InnermostCodeGen](CodeGenFunction &CGF,
4509                                              PrePostActionTy &Action) {
4510       RegionCodeGenTy RCG(InnermostCodeGen);
4511       PrivatizeDevicePointers = false;
4512 
4513       // Call the pre-action to change the status of PrivatizeDevicePointers if
4514       // needed.
4515       Action.Enter(CGF);
4516 
4517       if (PrivatizeDevicePointers) {
4518         OMPPrivateScope PrivateScope(CGF);
4519         // Emit all instances of the use_device_ptr clause.
4520         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4521           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4522                                         Info.CaptureDeviceAddrMap);
4523         (void)PrivateScope.Privatize();
4524         RCG(CGF);
4525       } else
4526         RCG(CGF);
4527     };
4528 
4529     // Forward the provided action to the privatization codegen.
4530     RegionCodeGenTy PrivRCG(PrivCodeGen);
4531     PrivRCG.setAction(Action);
4532 
4533     // Notwithstanding the body of the region is emitted as inlined directive,
4534     // we don't use an inline scope as changes in the references inside the
4535     // region are expected to be visible outside, so we do not privative them.
4536     OMPLexicalScope Scope(CGF, S);
4537     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4538                                                     PrivRCG);
4539   };
4540 
4541   RegionCodeGenTy RCG(CodeGen);
4542 
4543   // If we don't have target devices, don't bother emitting the data mapping
4544   // code.
4545   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4546     RCG(*this);
4547     return;
4548   }
4549 
4550   // Check if we have any if clause associated with the directive.
4551   const Expr *IfCond = nullptr;
4552   if (auto *C = S.getSingleClause<OMPIfClause>())
4553     IfCond = C->getCondition();
4554 
4555   // Check if we have any device clause associated with the directive.
4556   const Expr *Device = nullptr;
4557   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4558     Device = C->getDevice();
4559 
4560   // Set the action to signal privatization of device pointers.
4561   RCG.setAction(PrivAction);
4562 
4563   // Emit region code.
4564   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4565                                              Info);
4566 }
4567 
4568 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4569     const OMPTargetEnterDataDirective &S) {
4570   // If we don't have target devices, don't bother emitting the data mapping
4571   // code.
4572   if (CGM.getLangOpts().OMPTargetTriples.empty())
4573     return;
4574 
4575   // Check if we have any if clause associated with the directive.
4576   const Expr *IfCond = nullptr;
4577   if (auto *C = S.getSingleClause<OMPIfClause>())
4578     IfCond = C->getCondition();
4579 
4580   // Check if we have any device clause associated with the directive.
4581   const Expr *Device = nullptr;
4582   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4583     Device = C->getDevice();
4584 
4585   OMPLexicalScope Scope(*this, S, OMPD_task);
4586   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4587 }
4588 
4589 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4590     const OMPTargetExitDataDirective &S) {
4591   // If we don't have target devices, don't bother emitting the data mapping
4592   // code.
4593   if (CGM.getLangOpts().OMPTargetTriples.empty())
4594     return;
4595 
4596   // Check if we have any if clause associated with the directive.
4597   const Expr *IfCond = nullptr;
4598   if (auto *C = S.getSingleClause<OMPIfClause>())
4599     IfCond = C->getCondition();
4600 
4601   // Check if we have any device clause associated with the directive.
4602   const Expr *Device = nullptr;
4603   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4604     Device = C->getDevice();
4605 
4606   OMPLexicalScope Scope(*this, S, OMPD_task);
4607   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4608 }
4609 
4610 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4611                                      const OMPTargetParallelDirective &S,
4612                                      PrePostActionTy &Action) {
4613   // Get the captured statement associated with the 'parallel' region.
4614   auto *CS = S.getCapturedStmt(OMPD_parallel);
4615   Action.Enter(CGF);
4616   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4617     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4618     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4619     CGF.EmitOMPPrivateClause(S, PrivateScope);
4620     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4621     (void)PrivateScope.Privatize();
4622     Action.Enter(CGF);
4623     // TODO: Add support for clauses.
4624     CGF.EmitStmt(CS->getCapturedStmt());
4625     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4626   };
4627   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4628                                  emitEmptyBoundParameters);
4629   emitPostUpdateForReductionClause(
4630       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4631 }
4632 
4633 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4634     CodeGenModule &CGM, StringRef ParentName,
4635     const OMPTargetParallelDirective &S) {
4636   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4637     emitTargetParallelRegion(CGF, S, Action);
4638   };
4639   llvm::Function *Fn;
4640   llvm::Constant *Addr;
4641   // Emit target region as a standalone region.
4642   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4643       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4644   assert(Fn && Addr && "Target device function emission failed.");
4645 }
4646 
4647 void CodeGenFunction::EmitOMPTargetParallelDirective(
4648     const OMPTargetParallelDirective &S) {
4649   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4650     emitTargetParallelRegion(CGF, S, Action);
4651   };
4652   emitCommonOMPTargetDirective(*this, S, CodeGen);
4653 }
4654 
4655 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4656                                         const OMPTargetParallelForDirective &S,
4657                                         PrePostActionTy &Action) {
4658   Action.Enter(CGF);
4659   // Emit directive as a combined directive that consists of two implicit
4660   // directives: 'parallel' with 'for' directive.
4661   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4662     Action.Enter(CGF);
4663     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4664         CGF, OMPD_target_parallel_for, S.hasCancel());
4665     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4666                                emitDispatchForLoopBounds);
4667   };
4668   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4669                                  emitEmptyBoundParameters);
4670 }
4671 
4672 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4673     CodeGenModule &CGM, StringRef ParentName,
4674     const OMPTargetParallelForDirective &S) {
4675   // Emit SPMD target parallel for region as a standalone region.
4676   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4677     emitTargetParallelForRegion(CGF, S, Action);
4678   };
4679   llvm::Function *Fn;
4680   llvm::Constant *Addr;
4681   // Emit target region as a standalone region.
4682   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4683       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4684   assert(Fn && Addr && "Target device function emission failed.");
4685 }
4686 
4687 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4688     const OMPTargetParallelForDirective &S) {
4689   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4690     emitTargetParallelForRegion(CGF, S, Action);
4691   };
4692   emitCommonOMPTargetDirective(*this, S, CodeGen);
4693 }
4694 
4695 static void
4696 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4697                                 const OMPTargetParallelForSimdDirective &S,
4698                                 PrePostActionTy &Action) {
4699   Action.Enter(CGF);
4700   // Emit directive as a combined directive that consists of two implicit
4701   // directives: 'parallel' with 'for' directive.
4702   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4703     Action.Enter(CGF);
4704     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4705                                emitDispatchForLoopBounds);
4706   };
4707   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4708                                  emitEmptyBoundParameters);
4709 }
4710 
4711 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4712     CodeGenModule &CGM, StringRef ParentName,
4713     const OMPTargetParallelForSimdDirective &S) {
4714   // Emit SPMD target parallel for region as a standalone region.
4715   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4716     emitTargetParallelForSimdRegion(CGF, S, Action);
4717   };
4718   llvm::Function *Fn;
4719   llvm::Constant *Addr;
4720   // Emit target region as a standalone region.
4721   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4722       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4723   assert(Fn && Addr && "Target device function emission failed.");
4724 }
4725 
4726 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4727     const OMPTargetParallelForSimdDirective &S) {
4728   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4729     emitTargetParallelForSimdRegion(CGF, S, Action);
4730   };
4731   emitCommonOMPTargetDirective(*this, S, CodeGen);
4732 }
4733 
4734 /// Emit a helper variable and return corresponding lvalue.
4735 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4736                      const ImplicitParamDecl *PVD,
4737                      CodeGenFunction::OMPPrivateScope &Privates) {
4738   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4739   Privates.addPrivate(
4740       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4741 }
4742 
4743 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4744   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4745   // Emit outlined function for task construct.
4746   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
4747   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4748   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4749   const Expr *IfCond = nullptr;
4750   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4751     if (C->getNameModifier() == OMPD_unknown ||
4752         C->getNameModifier() == OMPD_taskloop) {
4753       IfCond = C->getCondition();
4754       break;
4755     }
4756   }
4757 
4758   OMPTaskDataTy Data;
4759   // Check if taskloop must be emitted without taskgroup.
4760   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4761   // TODO: Check if we should emit tied or untied task.
4762   Data.Tied = true;
4763   // Set scheduling for taskloop
4764   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4765     // grainsize clause
4766     Data.Schedule.setInt(/*IntVal=*/false);
4767     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4768   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4769     // num_tasks clause
4770     Data.Schedule.setInt(/*IntVal=*/true);
4771     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4772   }
4773 
4774   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4775     // if (PreCond) {
4776     //   for (IV in 0..LastIteration) BODY;
4777     //   <Final counter/linear vars updates>;
4778     // }
4779     //
4780 
4781     // Emit: if (PreCond) - begin.
4782     // If the condition constant folds and can be elided, avoid emitting the
4783     // whole loop.
4784     bool CondConstant;
4785     llvm::BasicBlock *ContBlock = nullptr;
4786     OMPLoopScope PreInitScope(CGF, S);
4787     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4788       if (!CondConstant)
4789         return;
4790     } else {
4791       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4792       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4793       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4794                   CGF.getProfileCount(&S));
4795       CGF.EmitBlock(ThenBlock);
4796       CGF.incrementProfileCounter(&S);
4797     }
4798 
4799     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4800       CGF.EmitOMPSimdInit(S);
4801 
4802     OMPPrivateScope LoopScope(CGF);
4803     // Emit helper vars inits.
4804     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4805     auto *I = CS->getCapturedDecl()->param_begin();
4806     auto *LBP = std::next(I, LowerBound);
4807     auto *UBP = std::next(I, UpperBound);
4808     auto *STP = std::next(I, Stride);
4809     auto *LIP = std::next(I, LastIter);
4810     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4811              LoopScope);
4812     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4813              LoopScope);
4814     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4815     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4816              LoopScope);
4817     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4818     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4819     (void)LoopScope.Privatize();
4820     // Emit the loop iteration variable.
4821     const Expr *IVExpr = S.getIterationVariable();
4822     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4823     CGF.EmitVarDecl(*IVDecl);
4824     CGF.EmitIgnoredExpr(S.getInit());
4825 
4826     // Emit the iterations count variable.
4827     // If it is not a variable, Sema decided to calculate iterations count on
4828     // each iteration (e.g., it is foldable into a constant).
4829     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4830       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4831       // Emit calculation of the iterations count.
4832       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4833     }
4834 
4835     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4836                          S.getInc(),
4837                          [&S](CodeGenFunction &CGF) {
4838                            CGF.EmitOMPLoopBody(S, JumpDest());
4839                            CGF.EmitStopPoint(&S);
4840                          },
4841                          [](CodeGenFunction &) {});
4842     // Emit: if (PreCond) - end.
4843     if (ContBlock) {
4844       CGF.EmitBranch(ContBlock);
4845       CGF.EmitBlock(ContBlock, true);
4846     }
4847     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4848     if (HasLastprivateClause) {
4849       CGF.EmitOMPLastprivateClauseFinal(
4850           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4851           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4852               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4853               (*LIP)->getType(), S.getLocStart())));
4854     }
4855   };
4856   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4857                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4858                             const OMPTaskDataTy &Data) {
4859     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4860       OMPLoopScope PreInitScope(CGF, S);
4861       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4862                                                   OutlinedFn, SharedsTy,
4863                                                   CapturedStruct, IfCond, Data);
4864     };
4865     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4866                                                     CodeGen);
4867   };
4868   if (Data.Nogroup) {
4869     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
4870   } else {
4871     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4872         *this,
4873         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4874                                         PrePostActionTy &Action) {
4875           Action.Enter(CGF);
4876           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
4877                                         Data);
4878         },
4879         S.getLocStart());
4880   }
4881 }
4882 
4883 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4884   EmitOMPTaskLoopBasedDirective(S);
4885 }
4886 
4887 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4888     const OMPTaskLoopSimdDirective &S) {
4889   EmitOMPTaskLoopBasedDirective(S);
4890 }
4891 
4892 // Generate the instructions for '#pragma omp target update' directive.
4893 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4894     const OMPTargetUpdateDirective &S) {
4895   // If we don't have target devices, don't bother emitting the data mapping
4896   // code.
4897   if (CGM.getLangOpts().OMPTargetTriples.empty())
4898     return;
4899 
4900   // Check if we have any if clause associated with the directive.
4901   const Expr *IfCond = nullptr;
4902   if (auto *C = S.getSingleClause<OMPIfClause>())
4903     IfCond = C->getCondition();
4904 
4905   // Check if we have any device clause associated with the directive.
4906   const Expr *Device = nullptr;
4907   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4908     Device = C->getDevice();
4909 
4910   OMPLexicalScope Scope(*this, S, OMPD_task);
4911   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4912 }
4913 
4914 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
4915     const OMPExecutableDirective &D) {
4916   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
4917     return;
4918   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
4919     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
4920       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
4921     } else {
4922       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
4923         for (const auto *E : LD->counters()) {
4924           if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
4925                   cast<DeclRefExpr>(E)->getDecl())) {
4926             // Emit only those that were not explicitly referenced in clauses.
4927             if (!CGF.LocalDeclMap.count(VD))
4928               CGF.EmitVarDecl(*VD);
4929           }
4930         }
4931       }
4932       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
4933     }
4934   };
4935   OMPSimdLexicalScope Scope(*this, D);
4936   CGM.getOpenMPRuntime().emitInlinedDirective(
4937       *this,
4938       isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
4939                                                   : D.getDirectiveKind(),
4940       CodeGen);
4941 }
4942