1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(
57       CodeGenFunction &CGF, const OMPExecutableDirective &S,
58       const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
59       const bool EmitPreInitStmt = true)
60       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
61         InlinedShareds(CGF) {
62     if (EmitPreInitStmt)
63       emitPreInitStmt(CGF, S);
64     if (!CapturedRegion.hasValue())
65       return;
66     assert(S.hasAssociatedStmt() &&
67            "Expected associated statement for inlined directive.");
68     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
69     for (auto &C : CS->captures()) {
70       if (C.capturesVariable() || C.capturesVariableByCopy()) {
71         auto *VD = C.getCapturedVar();
72         assert(VD == VD->getCanonicalDecl() &&
73                "Canonical decl must be captured.");
74         DeclRefExpr DRE(
75             const_cast<VarDecl *>(VD),
76             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
77                                        InlinedShareds.isGlobalVarCaptured(VD)),
78             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
79         InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
80           return CGF.EmitLValue(&DRE).getAddress();
81         });
82       }
83     }
84     (void)InlinedShareds.Privatize();
85   }
86 };
87 
88 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
89 /// for captured expressions.
90 class OMPParallelScope final : public OMPLexicalScope {
91   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
92     OpenMPDirectiveKind Kind = S.getDirectiveKind();
93     return !(isOpenMPTargetExecutionDirective(Kind) ||
94              isOpenMPLoopBoundSharingDirective(Kind)) &&
95            isOpenMPParallelDirective(Kind);
96   }
97 
98 public:
99   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
100       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
101                         EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
116                         EmitPreInitStmt(S)) {}
117 };
118 
119 /// Private scope for OpenMP loop-based directives, that supports capturing
120 /// of used expression from loop statement.
121 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
122   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
123     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
124     for (auto *E : S.counters()) {
125       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
126       (void)PreCondScope.addPrivate(VD, [&CGF, VD]() {
127         return CGF.CreateMemTemp(VD->getType().getNonReferenceType());
128       });
129     }
130     (void)PreCondScope.Privatize();
131     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
132       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
133         for (const auto *I : PreInits->decls())
134           CGF.EmitVarDecl(cast<VarDecl>(*I));
135       }
136     }
137   }
138 
139 public:
140   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
141       : CodeGenFunction::RunCleanupsScope(CGF) {
142     emitPreInitStmt(CGF, S);
143   }
144 };
145 
146 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
147   CodeGenFunction::OMPPrivateScope InlinedShareds;
148 
149   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
150     return CGF.LambdaCaptureFields.lookup(VD) ||
151            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
152            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
153             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
154   }
155 
156 public:
157   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
158       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
159         InlinedShareds(CGF) {
160     for (const auto *C : S.clauses()) {
161       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
162         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
163           for (const auto *I : PreInit->decls()) {
164             if (!I->hasAttr<OMPCaptureNoInitAttr>())
165               CGF.EmitVarDecl(cast<VarDecl>(*I));
166             else {
167               CodeGenFunction::AutoVarEmission Emission =
168                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
169               CGF.EmitAutoVarCleanups(Emission);
170             }
171           }
172         }
173       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
174         for (const Expr *E : UDP->varlists()) {
175           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
176           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
177             CGF.EmitVarDecl(*OED);
178         }
179       }
180     }
181     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
182       CGF.EmitOMPPrivateClause(S, InlinedShareds);
183     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
184       if (const Expr *E = TG->getReductionRef())
185         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
186     }
187     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
188     while (CS) {
189       for (auto &C : CS->captures()) {
190         if (C.capturesVariable() || C.capturesVariableByCopy()) {
191           auto *VD = C.getCapturedVar();
192           assert(VD == VD->getCanonicalDecl() &&
193                  "Canonical decl must be captured.");
194           DeclRefExpr DRE(const_cast<VarDecl *>(VD),
195                           isCapturedVar(CGF, VD) ||
196                               (CGF.CapturedStmtInfo &&
197                                InlinedShareds.isGlobalVarCaptured(VD)),
198                           VD->getType().getNonReferenceType(), VK_LValue,
199                           C.getLocation());
200           InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
201             return CGF.EmitLValue(&DRE).getAddress();
202           });
203         }
204       }
205       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
206     }
207     (void)InlinedShareds.Privatize();
208   }
209 };
210 
211 } // namespace
212 
213 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
214                                          const OMPExecutableDirective &S,
215                                          const RegionCodeGenTy &CodeGen);
216 
217 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
218   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
219     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
220       OrigVD = OrigVD->getCanonicalDecl();
221       bool IsCaptured =
222           LambdaCaptureFields.lookup(OrigVD) ||
223           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
224           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
225       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
226                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
227       return EmitLValue(&DRE);
228     }
229   }
230   return EmitLValue(E);
231 }
232 
233 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
234   auto &C = getContext();
235   llvm::Value *Size = nullptr;
236   auto SizeInChars = C.getTypeSizeInChars(Ty);
237   if (SizeInChars.isZero()) {
238     // getTypeSizeInChars() returns 0 for a VLA.
239     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
240       llvm::Value *ArraySize;
241       std::tie(ArraySize, Ty) = getVLASize(VAT);
242       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
243     }
244     SizeInChars = C.getTypeSizeInChars(Ty);
245     if (SizeInChars.isZero())
246       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
247     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
248   } else
249     Size = CGM.getSize(SizeInChars);
250   return Size;
251 }
252 
253 void CodeGenFunction::GenerateOpenMPCapturedVars(
254     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
255   const RecordDecl *RD = S.getCapturedRecordDecl();
256   auto CurField = RD->field_begin();
257   auto CurCap = S.captures().begin();
258   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
259                                                  E = S.capture_init_end();
260        I != E; ++I, ++CurField, ++CurCap) {
261     if (CurField->hasCapturedVLAType()) {
262       auto VAT = CurField->getCapturedVLAType();
263       auto *Val = VLASizeMap[VAT->getSizeExpr()];
264       CapturedVars.push_back(Val);
265     } else if (CurCap->capturesThis())
266       CapturedVars.push_back(CXXThisValue);
267     else if (CurCap->capturesVariableByCopy()) {
268       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
269 
270       // If the field is not a pointer, we need to save the actual value
271       // and load it as a void pointer.
272       if (!CurField->getType()->isAnyPointerType()) {
273         auto &Ctx = getContext();
274         auto DstAddr = CreateMemTemp(
275             Ctx.getUIntPtrType(),
276             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
277         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
278 
279         auto *SrcAddrVal = EmitScalarConversion(
280             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
281             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
282         LValue SrcLV =
283             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
284 
285         // Store the value using the source type pointer.
286         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
287 
288         // Load the value using the destination type pointer.
289         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
290       }
291       CapturedVars.push_back(CV);
292     } else {
293       assert(CurCap->capturesVariable() && "Expected capture by reference.");
294       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
295     }
296   }
297 }
298 
299 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
300                                     QualType DstType, StringRef Name,
301                                     LValue AddrLV,
302                                     bool isReferenceType = false) {
303   ASTContext &Ctx = CGF.getContext();
304 
305   auto *CastedPtr = CGF.EmitScalarConversion(AddrLV.getAddress().getPointer(),
306                                              Ctx.getUIntPtrType(),
307                                              Ctx.getPointerType(DstType), Loc);
308   auto TmpAddr =
309       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
310           .getAddress();
311 
312   // If we are dealing with references we need to return the address of the
313   // reference instead of the reference of the value.
314   if (isReferenceType) {
315     QualType RefType = Ctx.getLValueReferenceType(DstType);
316     auto *RefVal = TmpAddr.getPointer();
317     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
318     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
319     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
320   }
321 
322   return TmpAddr;
323 }
324 
325 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
326   if (T->isLValueReferenceType()) {
327     return C.getLValueReferenceType(
328         getCanonicalParamType(C, T.getNonReferenceType()),
329         /*SpelledAsLValue=*/false);
330   }
331   if (T->isPointerType())
332     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
333   if (auto *A = T->getAsArrayTypeUnsafe()) {
334     if (auto *VLA = dyn_cast<VariableArrayType>(A))
335       return getCanonicalParamType(C, VLA->getElementType());
336     else if (!A->isVariablyModifiedType())
337       return C.getCanonicalType(T);
338   }
339   return C.getCanonicalParamType(T);
340 }
341 
342 namespace {
343   /// Contains required data for proper outlined function codegen.
344   struct FunctionOptions {
345     /// Captured statement for which the function is generated.
346     const CapturedStmt *S = nullptr;
347     /// true if cast to/from  UIntPtr is required for variables captured by
348     /// value.
349     const bool UIntPtrCastRequired = true;
350     /// true if only casted arguments must be registered as local args or VLA
351     /// sizes.
352     const bool RegisterCastedArgsOnly = false;
353     /// Name of the generated function.
354     const StringRef FunctionName;
355     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
356                              bool RegisterCastedArgsOnly,
357                              StringRef FunctionName)
358         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
359           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
360           FunctionName(FunctionName) {}
361   };
362 }
363 
364 static llvm::Function *emitOutlinedFunctionPrologue(
365     CodeGenFunction &CGF, FunctionArgList &Args,
366     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
367         &LocalAddrs,
368     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
369         &VLASizes,
370     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
371   const CapturedDecl *CD = FO.S->getCapturedDecl();
372   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
373   assert(CD->hasBody() && "missing CapturedDecl body");
374 
375   CXXThisValue = nullptr;
376   // Build the argument list.
377   CodeGenModule &CGM = CGF.CGM;
378   ASTContext &Ctx = CGM.getContext();
379   FunctionArgList TargetArgs;
380   Args.append(CD->param_begin(),
381               std::next(CD->param_begin(), CD->getContextParamPosition()));
382   TargetArgs.append(
383       CD->param_begin(),
384       std::next(CD->param_begin(), CD->getContextParamPosition()));
385   auto I = FO.S->captures().begin();
386   FunctionDecl *DebugFunctionDecl = nullptr;
387   if (!FO.UIntPtrCastRequired) {
388     FunctionProtoType::ExtProtoInfo EPI;
389     DebugFunctionDecl = FunctionDecl::Create(
390         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
391         SourceLocation(), DeclarationName(), Ctx.VoidTy,
392         Ctx.getTrivialTypeSourceInfo(
393             Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
394         SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
395   }
396   for (auto *FD : RD->fields()) {
397     QualType ArgType = FD->getType();
398     IdentifierInfo *II = nullptr;
399     VarDecl *CapVar = nullptr;
400 
401     // If this is a capture by copy and the type is not a pointer, the outlined
402     // function argument type should be uintptr and the value properly casted to
403     // uintptr. This is necessary given that the runtime library is only able to
404     // deal with pointers. We can pass in the same way the VLA type sizes to the
405     // outlined function.
406     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
407         I->capturesVariableArrayType()) {
408       if (FO.UIntPtrCastRequired)
409         ArgType = Ctx.getUIntPtrType();
410     }
411 
412     if (I->capturesVariable() || I->capturesVariableByCopy()) {
413       CapVar = I->getCapturedVar();
414       II = CapVar->getIdentifier();
415     } else if (I->capturesThis())
416       II = &Ctx.Idents.get("this");
417     else {
418       assert(I->capturesVariableArrayType());
419       II = &Ctx.Idents.get("vla");
420     }
421     if (ArgType->isVariablyModifiedType())
422       ArgType = getCanonicalParamType(Ctx, ArgType);
423     VarDecl *Arg;
424     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
425       Arg = ParmVarDecl::Create(
426           Ctx, DebugFunctionDecl,
427           CapVar ? CapVar->getLocStart() : FD->getLocStart(),
428           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
429           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
430     } else {
431       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
432                                       II, ArgType, ImplicitParamDecl::Other);
433     }
434     Args.emplace_back(Arg);
435     // Do not cast arguments if we emit function with non-original types.
436     TargetArgs.emplace_back(
437         FO.UIntPtrCastRequired
438             ? Arg
439             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
440     ++I;
441   }
442   Args.append(
443       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
444       CD->param_end());
445   TargetArgs.append(
446       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
447       CD->param_end());
448 
449   // Create the function declaration.
450   const CGFunctionInfo &FuncInfo =
451       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
452   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
453 
454   llvm::Function *F =
455       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
456                              FO.FunctionName, &CGM.getModule());
457   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
458   if (CD->isNothrow())
459     F->setDoesNotThrow();
460 
461   // Generate the function.
462   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
463                     FO.S->getLocStart(), CD->getBody()->getLocStart());
464   unsigned Cnt = CD->getContextParamPosition();
465   I = FO.S->captures().begin();
466   for (auto *FD : RD->fields()) {
467     // Do not map arguments if we emit function with non-original types.
468     Address LocalAddr(Address::invalid());
469     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
470       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
471                                                              TargetArgs[Cnt]);
472     } else {
473       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
474     }
475     // If we are capturing a pointer by copy we don't need to do anything, just
476     // use the value that we get from the arguments.
477     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
478       const VarDecl *CurVD = I->getCapturedVar();
479       // If the variable is a reference we need to materialize it here.
480       if (CurVD->getType()->isReferenceType()) {
481         Address RefAddr = CGF.CreateMemTemp(
482             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
483         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
484                               /*Volatile=*/false, CurVD->getType());
485         LocalAddr = RefAddr;
486       }
487       if (!FO.RegisterCastedArgsOnly)
488         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
489       ++Cnt;
490       ++I;
491       continue;
492     }
493 
494     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
495                                         AlignmentSource::Decl);
496     if (FD->hasCapturedVLAType()) {
497       if (FO.UIntPtrCastRequired) {
498         ArgLVal = CGF.MakeAddrLValue(
499             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
500                                  Args[Cnt]->getName(), ArgLVal),
501             FD->getType(), AlignmentSource::Decl);
502       }
503       auto *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
504       auto VAT = FD->getCapturedVLAType();
505       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
506     } else if (I->capturesVariable()) {
507       auto *Var = I->getCapturedVar();
508       QualType VarTy = Var->getType();
509       Address ArgAddr = ArgLVal.getAddress();
510       if (!VarTy->isReferenceType()) {
511         if (ArgLVal.getType()->isLValueReferenceType()) {
512           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
513         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
514           assert(ArgLVal.getType()->isPointerType());
515           ArgAddr = CGF.EmitLoadOfPointer(
516               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
517         }
518       }
519       if (!FO.RegisterCastedArgsOnly) {
520         LocalAddrs.insert(
521             {Args[Cnt],
522              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
523       }
524     } else if (I->capturesVariableByCopy()) {
525       assert(!FD->getType()->isAnyPointerType() &&
526              "Not expecting a captured pointer.");
527       auto *Var = I->getCapturedVar();
528       QualType VarTy = Var->getType();
529       LocalAddrs.insert(
530           {Args[Cnt],
531            {Var, FO.UIntPtrCastRequired
532                      ? castValueFromUintptr(CGF, I->getLocation(),
533                                             FD->getType(), Args[Cnt]->getName(),
534                                             ArgLVal, VarTy->isReferenceType())
535                      : ArgLVal.getAddress()}});
536     } else {
537       // If 'this' is captured, load it into CXXThisValue.
538       assert(I->capturesThis());
539       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
540       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
541     }
542     ++Cnt;
543     ++I;
544   }
545 
546   return F;
547 }
548 
549 llvm::Function *
550 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
551   assert(
552       CapturedStmtInfo &&
553       "CapturedStmtInfo should be set when generating the captured function");
554   const CapturedDecl *CD = S.getCapturedDecl();
555   // Build the argument list.
556   bool NeedWrapperFunction =
557       getDebugInfo() &&
558       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
559   FunctionArgList Args;
560   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
561   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
562   SmallString<256> Buffer;
563   llvm::raw_svector_ostream Out(Buffer);
564   Out << CapturedStmtInfo->getHelperName();
565   if (NeedWrapperFunction)
566     Out << "_debug__";
567   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
568                      Out.str());
569   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
570                                                    VLASizes, CXXThisValue, FO);
571   for (const auto &LocalAddrPair : LocalAddrs) {
572     if (LocalAddrPair.second.first) {
573       setAddrOfLocalVar(LocalAddrPair.second.first,
574                         LocalAddrPair.second.second);
575     }
576   }
577   for (const auto &VLASizePair : VLASizes)
578     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
579   PGO.assignRegionCounters(GlobalDecl(CD), F);
580   CapturedStmtInfo->EmitBody(*this, CD->getBody());
581   FinishFunction(CD->getBodyRBrace());
582   if (!NeedWrapperFunction)
583     return F;
584 
585   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
586                             /*RegisterCastedArgsOnly=*/true,
587                             CapturedStmtInfo->getHelperName());
588   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
589   Args.clear();
590   LocalAddrs.clear();
591   VLASizes.clear();
592   llvm::Function *WrapperF =
593       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
594                                    WrapperCGF.CXXThisValue, WrapperFO);
595   llvm::SmallVector<llvm::Value *, 4> CallArgs;
596   for (const auto *Arg : Args) {
597     llvm::Value *CallArg;
598     auto I = LocalAddrs.find(Arg);
599     if (I != LocalAddrs.end()) {
600       LValue LV = WrapperCGF.MakeAddrLValue(
601           I->second.second,
602           I->second.first ? I->second.first->getType() : Arg->getType(),
603           AlignmentSource::Decl);
604       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
605     } else {
606       auto EI = VLASizes.find(Arg);
607       if (EI != VLASizes.end())
608         CallArg = EI->second.second;
609       else {
610         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
611                                               Arg->getType(),
612                                               AlignmentSource::Decl);
613         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
614       }
615     }
616     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
617   }
618   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
619                                                   F, CallArgs);
620   WrapperCGF.FinishFunction();
621   return WrapperF;
622 }
623 
624 //===----------------------------------------------------------------------===//
625 //                              OpenMP Directive Emission
626 //===----------------------------------------------------------------------===//
627 void CodeGenFunction::EmitOMPAggregateAssign(
628     Address DestAddr, Address SrcAddr, QualType OriginalType,
629     const llvm::function_ref<void(Address, Address)> &CopyGen) {
630   // Perform element-by-element initialization.
631   QualType ElementTy;
632 
633   // Drill down to the base element type on both arrays.
634   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
635   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
636   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
637 
638   auto SrcBegin = SrcAddr.getPointer();
639   auto DestBegin = DestAddr.getPointer();
640   // Cast from pointer to array type to pointer to single element.
641   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
642   // The basic structure here is a while-do loop.
643   auto BodyBB = createBasicBlock("omp.arraycpy.body");
644   auto DoneBB = createBasicBlock("omp.arraycpy.done");
645   auto IsEmpty =
646       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
647   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
648 
649   // Enter the loop body, making that address the current address.
650   auto EntryBB = Builder.GetInsertBlock();
651   EmitBlock(BodyBB);
652 
653   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
654 
655   llvm::PHINode *SrcElementPHI =
656     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
657   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
658   Address SrcElementCurrent =
659       Address(SrcElementPHI,
660               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
661 
662   llvm::PHINode *DestElementPHI =
663     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
664   DestElementPHI->addIncoming(DestBegin, EntryBB);
665   Address DestElementCurrent =
666     Address(DestElementPHI,
667             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
668 
669   // Emit copy.
670   CopyGen(DestElementCurrent, SrcElementCurrent);
671 
672   // Shift the address forward by one element.
673   auto DestElementNext = Builder.CreateConstGEP1_32(
674       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
675   auto SrcElementNext = Builder.CreateConstGEP1_32(
676       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
677   // Check whether we've reached the end.
678   auto Done =
679       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
680   Builder.CreateCondBr(Done, DoneBB, BodyBB);
681   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
682   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
683 
684   // Done.
685   EmitBlock(DoneBB, /*IsFinished=*/true);
686 }
687 
688 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
689                                   Address SrcAddr, const VarDecl *DestVD,
690                                   const VarDecl *SrcVD, const Expr *Copy) {
691   if (OriginalType->isArrayType()) {
692     auto *BO = dyn_cast<BinaryOperator>(Copy);
693     if (BO && BO->getOpcode() == BO_Assign) {
694       // Perform simple memcpy for simple copying.
695       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
696       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
697       EmitAggregateAssign(Dest, Src, OriginalType);
698     } else {
699       // For arrays with complex element types perform element by element
700       // copying.
701       EmitOMPAggregateAssign(
702           DestAddr, SrcAddr, OriginalType,
703           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
704             // Working with the single array element, so have to remap
705             // destination and source variables to corresponding array
706             // elements.
707             CodeGenFunction::OMPPrivateScope Remap(*this);
708             Remap.addPrivate(DestVD, [DestElement]() -> Address {
709               return DestElement;
710             });
711             Remap.addPrivate(
712                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
713             (void)Remap.Privatize();
714             EmitIgnoredExpr(Copy);
715           });
716     }
717   } else {
718     // Remap pseudo source variable to private copy.
719     CodeGenFunction::OMPPrivateScope Remap(*this);
720     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
721     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
722     (void)Remap.Privatize();
723     // Emit copying of the whole variable.
724     EmitIgnoredExpr(Copy);
725   }
726 }
727 
728 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
729                                                 OMPPrivateScope &PrivateScope) {
730   if (!HaveInsertPoint())
731     return false;
732   bool FirstprivateIsLastprivate = false;
733   llvm::DenseSet<const VarDecl *> Lastprivates;
734   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
735     for (const auto *D : C->varlists())
736       Lastprivates.insert(
737           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
738   }
739   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
740   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
741   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
742   // Force emission of the firstprivate copy if the directive does not emit
743   // outlined function, like omp for, omp simd, omp distribute etc.
744   bool MustEmitFirstprivateCopy =
745       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
746   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
747     auto IRef = C->varlist_begin();
748     auto InitsRef = C->inits().begin();
749     for (auto IInit : C->private_copies()) {
750       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
751       bool ThisFirstprivateIsLastprivate =
752           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
753       auto *FD = CapturedStmtInfo->lookup(OrigVD);
754       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
755           !FD->getType()->isReferenceType()) {
756         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
757         ++IRef;
758         ++InitsRef;
759         continue;
760       }
761       FirstprivateIsLastprivate =
762           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
763       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
764         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
765         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
766         bool IsRegistered;
767         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
768                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
769                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
770         LValue OriginalLVal = EmitLValue(&DRE);
771         Address OriginalAddr = OriginalLVal.getAddress();
772         QualType Type = VD->getType();
773         if (Type->isArrayType()) {
774           // Emit VarDecl with copy init for arrays.
775           // Get the address of the original variable captured in current
776           // captured region.
777           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
778             auto Emission = EmitAutoVarAlloca(*VD);
779             auto *Init = VD->getInit();
780             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
781               // Perform simple memcpy.
782               LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(),
783                                            Type);
784               EmitAggregateAssign(Dest, OriginalLVal, Type);
785             } else {
786               EmitOMPAggregateAssign(
787                   Emission.getAllocatedAddress(), OriginalAddr, Type,
788                   [this, VDInit, Init](Address DestElement,
789                                        Address SrcElement) {
790                     // Clean up any temporaries needed by the initialization.
791                     RunCleanupsScope InitScope(*this);
792                     // Emit initialization for single element.
793                     setAddrOfLocalVar(VDInit, SrcElement);
794                     EmitAnyExprToMem(Init, DestElement,
795                                      Init->getType().getQualifiers(),
796                                      /*IsInitializer*/ false);
797                     LocalDeclMap.erase(VDInit);
798                   });
799             }
800             EmitAutoVarCleanups(Emission);
801             return Emission.getAllocatedAddress();
802           });
803         } else {
804           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
805             // Emit private VarDecl with copy init.
806             // Remap temp VDInit variable to the address of the original
807             // variable
808             // (for proper handling of captured global variables).
809             setAddrOfLocalVar(VDInit, OriginalAddr);
810             EmitDecl(*VD);
811             LocalDeclMap.erase(VDInit);
812             return GetAddrOfLocalVar(VD);
813           });
814         }
815         assert(IsRegistered &&
816                "firstprivate var already registered as private");
817         // Silence the warning about unused variable.
818         (void)IsRegistered;
819       }
820       ++IRef;
821       ++InitsRef;
822     }
823   }
824   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
825 }
826 
827 void CodeGenFunction::EmitOMPPrivateClause(
828     const OMPExecutableDirective &D,
829     CodeGenFunction::OMPPrivateScope &PrivateScope) {
830   if (!HaveInsertPoint())
831     return;
832   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
833   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
834     auto IRef = C->varlist_begin();
835     for (auto IInit : C->private_copies()) {
836       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
837       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
838         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
839         bool IsRegistered =
840             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
841               // Emit private VarDecl with copy init.
842               EmitDecl(*VD);
843               return GetAddrOfLocalVar(VD);
844             });
845         assert(IsRegistered && "private var already registered as private");
846         // Silence the warning about unused variable.
847         (void)IsRegistered;
848       }
849       ++IRef;
850     }
851   }
852 }
853 
854 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
855   if (!HaveInsertPoint())
856     return false;
857   // threadprivate_var1 = master_threadprivate_var1;
858   // operator=(threadprivate_var2, master_threadprivate_var2);
859   // ...
860   // __kmpc_barrier(&loc, global_tid);
861   llvm::DenseSet<const VarDecl *> CopiedVars;
862   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
863   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
864     auto IRef = C->varlist_begin();
865     auto ISrcRef = C->source_exprs().begin();
866     auto IDestRef = C->destination_exprs().begin();
867     for (auto *AssignOp : C->assignment_ops()) {
868       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
869       QualType Type = VD->getType();
870       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
871         // Get the address of the master variable. If we are emitting code with
872         // TLS support, the address is passed from the master as field in the
873         // captured declaration.
874         Address MasterAddr = Address::invalid();
875         if (getLangOpts().OpenMPUseTLS &&
876             getContext().getTargetInfo().isTLSSupported()) {
877           assert(CapturedStmtInfo->lookup(VD) &&
878                  "Copyin threadprivates should have been captured!");
879           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
880                           VK_LValue, (*IRef)->getExprLoc());
881           MasterAddr = EmitLValue(&DRE).getAddress();
882           LocalDeclMap.erase(VD);
883         } else {
884           MasterAddr =
885             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
886                                         : CGM.GetAddrOfGlobal(VD),
887                     getContext().getDeclAlign(VD));
888         }
889         // Get the address of the threadprivate variable.
890         Address PrivateAddr = EmitLValue(*IRef).getAddress();
891         if (CopiedVars.size() == 1) {
892           // At first check if current thread is a master thread. If it is, no
893           // need to copy data.
894           CopyBegin = createBasicBlock("copyin.not.master");
895           CopyEnd = createBasicBlock("copyin.not.master.end");
896           Builder.CreateCondBr(
897               Builder.CreateICmpNE(
898                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
899                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
900               CopyBegin, CopyEnd);
901           EmitBlock(CopyBegin);
902         }
903         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
904         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
905         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
906       }
907       ++IRef;
908       ++ISrcRef;
909       ++IDestRef;
910     }
911   }
912   if (CopyEnd) {
913     // Exit out of copying procedure for non-master thread.
914     EmitBlock(CopyEnd, /*IsFinished=*/true);
915     return true;
916   }
917   return false;
918 }
919 
920 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
921     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
922   if (!HaveInsertPoint())
923     return false;
924   bool HasAtLeastOneLastprivate = false;
925   llvm::DenseSet<const VarDecl *> SIMDLCVs;
926   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
927     auto *LoopDirective = cast<OMPLoopDirective>(&D);
928     for (auto *C : LoopDirective->counters()) {
929       SIMDLCVs.insert(
930           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
931     }
932   }
933   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
934   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
935     HasAtLeastOneLastprivate = true;
936     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
937         !getLangOpts().OpenMPSimd)
938       break;
939     auto IRef = C->varlist_begin();
940     auto IDestRef = C->destination_exprs().begin();
941     for (auto *IInit : C->private_copies()) {
942       // Keep the address of the original variable for future update at the end
943       // of the loop.
944       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
945       // Taskloops do not require additional initialization, it is done in
946       // runtime support library.
947       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
948         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
949         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
950           DeclRefExpr DRE(
951               const_cast<VarDecl *>(OrigVD),
952               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
953                   OrigVD) != nullptr,
954               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
955           return EmitLValue(&DRE).getAddress();
956         });
957         // Check if the variable is also a firstprivate: in this case IInit is
958         // not generated. Initialization of this variable will happen in codegen
959         // for 'firstprivate' clause.
960         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
961           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
962           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
963             // Emit private VarDecl with copy init.
964             EmitDecl(*VD);
965             return GetAddrOfLocalVar(VD);
966           });
967           assert(IsRegistered &&
968                  "lastprivate var already registered as private");
969           (void)IsRegistered;
970         }
971       }
972       ++IRef;
973       ++IDestRef;
974     }
975   }
976   return HasAtLeastOneLastprivate;
977 }
978 
979 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
980     const OMPExecutableDirective &D, bool NoFinals,
981     llvm::Value *IsLastIterCond) {
982   if (!HaveInsertPoint())
983     return;
984   // Emit following code:
985   // if (<IsLastIterCond>) {
986   //   orig_var1 = private_orig_var1;
987   //   ...
988   //   orig_varn = private_orig_varn;
989   // }
990   llvm::BasicBlock *ThenBB = nullptr;
991   llvm::BasicBlock *DoneBB = nullptr;
992   if (IsLastIterCond) {
993     ThenBB = createBasicBlock(".omp.lastprivate.then");
994     DoneBB = createBasicBlock(".omp.lastprivate.done");
995     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
996     EmitBlock(ThenBB);
997   }
998   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
999   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1000   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1001     auto IC = LoopDirective->counters().begin();
1002     for (auto F : LoopDirective->finals()) {
1003       auto *D =
1004           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1005       if (NoFinals)
1006         AlreadyEmittedVars.insert(D);
1007       else
1008         LoopCountersAndUpdates[D] = F;
1009       ++IC;
1010     }
1011   }
1012   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1013     auto IRef = C->varlist_begin();
1014     auto ISrcRef = C->source_exprs().begin();
1015     auto IDestRef = C->destination_exprs().begin();
1016     for (auto *AssignOp : C->assignment_ops()) {
1017       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1018       QualType Type = PrivateVD->getType();
1019       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1020       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1021         // If lastprivate variable is a loop control variable for loop-based
1022         // directive, update its value before copyin back to original
1023         // variable.
1024         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1025           EmitIgnoredExpr(FinalExpr);
1026         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1027         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1028         // Get the address of the original variable.
1029         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1030         // Get the address of the private variable.
1031         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1032         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1033           PrivateAddr =
1034               Address(Builder.CreateLoad(PrivateAddr),
1035                       getNaturalTypeAlignment(RefTy->getPointeeType()));
1036         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1037       }
1038       ++IRef;
1039       ++ISrcRef;
1040       ++IDestRef;
1041     }
1042     if (auto *PostUpdate = C->getPostUpdateExpr())
1043       EmitIgnoredExpr(PostUpdate);
1044   }
1045   if (IsLastIterCond)
1046     EmitBlock(DoneBB, /*IsFinished=*/true);
1047 }
1048 
1049 void CodeGenFunction::EmitOMPReductionClauseInit(
1050     const OMPExecutableDirective &D,
1051     CodeGenFunction::OMPPrivateScope &PrivateScope) {
1052   if (!HaveInsertPoint())
1053     return;
1054   SmallVector<const Expr *, 4> Shareds;
1055   SmallVector<const Expr *, 4> Privates;
1056   SmallVector<const Expr *, 4> ReductionOps;
1057   SmallVector<const Expr *, 4> LHSs;
1058   SmallVector<const Expr *, 4> RHSs;
1059   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1060     auto IPriv = C->privates().begin();
1061     auto IRed = C->reduction_ops().begin();
1062     auto ILHS = C->lhs_exprs().begin();
1063     auto IRHS = C->rhs_exprs().begin();
1064     for (const auto *Ref : C->varlists()) {
1065       Shareds.emplace_back(Ref);
1066       Privates.emplace_back(*IPriv);
1067       ReductionOps.emplace_back(*IRed);
1068       LHSs.emplace_back(*ILHS);
1069       RHSs.emplace_back(*IRHS);
1070       std::advance(IPriv, 1);
1071       std::advance(IRed, 1);
1072       std::advance(ILHS, 1);
1073       std::advance(IRHS, 1);
1074     }
1075   }
1076   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
1077   unsigned Count = 0;
1078   auto ILHS = LHSs.begin();
1079   auto IRHS = RHSs.begin();
1080   auto IPriv = Privates.begin();
1081   for (const auto *IRef : Shareds) {
1082     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1083     // Emit private VarDecl with reduction init.
1084     RedCG.emitSharedLValue(*this, Count);
1085     RedCG.emitAggregateType(*this, Count);
1086     auto Emission = EmitAutoVarAlloca(*PrivateVD);
1087     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1088                              RedCG.getSharedLValue(Count),
1089                              [&Emission](CodeGenFunction &CGF) {
1090                                CGF.EmitAutoVarInit(Emission);
1091                                return true;
1092                              });
1093     EmitAutoVarCleanups(Emission);
1094     Address BaseAddr = RedCG.adjustPrivateAddress(
1095         *this, Count, Emission.getAllocatedAddress());
1096     bool IsRegistered = PrivateScope.addPrivate(
1097         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
1098     assert(IsRegistered && "private var already registered as private");
1099     // Silence the warning about unused variable.
1100     (void)IsRegistered;
1101 
1102     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1103     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1104     QualType Type = PrivateVD->getType();
1105     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1106     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1107       // Store the address of the original variable associated with the LHS
1108       // implicit variable.
1109       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1110         return RedCG.getSharedLValue(Count).getAddress();
1111       });
1112       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1113         return GetAddrOfLocalVar(PrivateVD);
1114       });
1115     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1116                isa<ArraySubscriptExpr>(IRef)) {
1117       // Store the address of the original variable associated with the LHS
1118       // implicit variable.
1119       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1120         return RedCG.getSharedLValue(Count).getAddress();
1121       });
1122       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1123         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1124                                             ConvertTypeForMem(RHSVD->getType()),
1125                                             "rhs.begin");
1126       });
1127     } else {
1128       QualType Type = PrivateVD->getType();
1129       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1130       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1131       // Store the address of the original variable associated with the LHS
1132       // implicit variable.
1133       if (IsArray) {
1134         OriginalAddr = Builder.CreateElementBitCast(
1135             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1136       }
1137       PrivateScope.addPrivate(
1138           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1139       PrivateScope.addPrivate(
1140           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1141             return IsArray
1142                        ? Builder.CreateElementBitCast(
1143                              GetAddrOfLocalVar(PrivateVD),
1144                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1145                        : GetAddrOfLocalVar(PrivateVD);
1146           });
1147     }
1148     ++ILHS;
1149     ++IRHS;
1150     ++IPriv;
1151     ++Count;
1152   }
1153 }
1154 
1155 void CodeGenFunction::EmitOMPReductionClauseFinal(
1156     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1157   if (!HaveInsertPoint())
1158     return;
1159   llvm::SmallVector<const Expr *, 8> Privates;
1160   llvm::SmallVector<const Expr *, 8> LHSExprs;
1161   llvm::SmallVector<const Expr *, 8> RHSExprs;
1162   llvm::SmallVector<const Expr *, 8> ReductionOps;
1163   bool HasAtLeastOneReduction = false;
1164   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1165     HasAtLeastOneReduction = true;
1166     Privates.append(C->privates().begin(), C->privates().end());
1167     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1168     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1169     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1170   }
1171   if (HasAtLeastOneReduction) {
1172     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1173                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1174                       ReductionKind == OMPD_simd;
1175     bool SimpleReduction = ReductionKind == OMPD_simd;
1176     // Emit nowait reduction if nowait clause is present or directive is a
1177     // parallel directive (it always has implicit barrier).
1178     CGM.getOpenMPRuntime().emitReduction(
1179         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1180         {WithNowait, SimpleReduction, ReductionKind});
1181   }
1182 }
1183 
1184 static void emitPostUpdateForReductionClause(
1185     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1186     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1187   if (!CGF.HaveInsertPoint())
1188     return;
1189   llvm::BasicBlock *DoneBB = nullptr;
1190   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1191     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1192       if (!DoneBB) {
1193         if (auto *Cond = CondGen(CGF)) {
1194           // If the first post-update expression is found, emit conditional
1195           // block if it was requested.
1196           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1197           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1198           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1199           CGF.EmitBlock(ThenBB);
1200         }
1201       }
1202       CGF.EmitIgnoredExpr(PostUpdate);
1203     }
1204   }
1205   if (DoneBB)
1206     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1207 }
1208 
1209 namespace {
1210 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1211 /// parallel function. This is necessary for combined constructs such as
1212 /// 'distribute parallel for'
1213 typedef llvm::function_ref<void(CodeGenFunction &,
1214                                 const OMPExecutableDirective &,
1215                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1216     CodeGenBoundParametersTy;
1217 } // anonymous namespace
1218 
1219 static void emitCommonOMPParallelDirective(
1220     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1221     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1222     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1223   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1224   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1225       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1226   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1227     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1228     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1229                                          /*IgnoreResultAssign*/ true);
1230     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1231         CGF, NumThreads, NumThreadsClause->getLocStart());
1232   }
1233   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1234     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1235     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1236         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1237   }
1238   const Expr *IfCond = nullptr;
1239   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1240     if (C->getNameModifier() == OMPD_unknown ||
1241         C->getNameModifier() == OMPD_parallel) {
1242       IfCond = C->getCondition();
1243       break;
1244     }
1245   }
1246 
1247   OMPParallelScope Scope(CGF, S);
1248   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1249   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1250   // lower and upper bounds with the pragma 'for' chunking mechanism.
1251   // The following lambda takes care of appending the lower and upper bound
1252   // parameters when necessary
1253   CodeGenBoundParameters(CGF, S, CapturedVars);
1254   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1255   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1256                                               CapturedVars, IfCond);
1257 }
1258 
1259 static void emitEmptyBoundParameters(CodeGenFunction &,
1260                                      const OMPExecutableDirective &,
1261                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1262 
1263 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1264   // Emit parallel region as a standalone region.
1265   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1266     OMPPrivateScope PrivateScope(CGF);
1267     bool Copyins = CGF.EmitOMPCopyinClause(S);
1268     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1269     if (Copyins) {
1270       // Emit implicit barrier to synchronize threads and avoid data races on
1271       // propagation master's thread values of threadprivate variables to local
1272       // instances of that variables of all other implicit threads.
1273       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1274           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1275           /*ForceSimpleCall=*/true);
1276     }
1277     CGF.EmitOMPPrivateClause(S, PrivateScope);
1278     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1279     (void)PrivateScope.Privatize();
1280     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1281     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1282   };
1283   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1284                                  emitEmptyBoundParameters);
1285   emitPostUpdateForReductionClause(
1286       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1287 }
1288 
1289 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1290                                       JumpDest LoopExit) {
1291   RunCleanupsScope BodyScope(*this);
1292   // Update counters values on current iteration.
1293   for (auto I : D.updates()) {
1294     EmitIgnoredExpr(I);
1295   }
1296   // Update the linear variables.
1297   // In distribute directives only loop counters may be marked as linear, no
1298   // need to generate the code for them.
1299   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1300     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1301       for (auto *U : C->updates())
1302         EmitIgnoredExpr(U);
1303     }
1304   }
1305 
1306   // On a continue in the body, jump to the end.
1307   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1308   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1309   // Emit loop body.
1310   EmitStmt(D.getBody());
1311   // The end (updates/cleanups).
1312   EmitBlock(Continue.getBlock());
1313   BreakContinueStack.pop_back();
1314 }
1315 
1316 void CodeGenFunction::EmitOMPInnerLoop(
1317     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1318     const Expr *IncExpr,
1319     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1320     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1321   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1322 
1323   // Start the loop with a block that tests the condition.
1324   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1325   EmitBlock(CondBlock);
1326   const SourceRange &R = S.getSourceRange();
1327   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1328                  SourceLocToDebugLoc(R.getEnd()));
1329 
1330   // If there are any cleanups between here and the loop-exit scope,
1331   // create a block to stage a loop exit along.
1332   auto ExitBlock = LoopExit.getBlock();
1333   if (RequiresCleanup)
1334     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1335 
1336   auto LoopBody = createBasicBlock("omp.inner.for.body");
1337 
1338   // Emit condition.
1339   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1340   if (ExitBlock != LoopExit.getBlock()) {
1341     EmitBlock(ExitBlock);
1342     EmitBranchThroughCleanup(LoopExit);
1343   }
1344 
1345   EmitBlock(LoopBody);
1346   incrementProfileCounter(&S);
1347 
1348   // Create a block for the increment.
1349   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1350   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1351 
1352   BodyGen(*this);
1353 
1354   // Emit "IV = IV + 1" and a back-edge to the condition block.
1355   EmitBlock(Continue.getBlock());
1356   EmitIgnoredExpr(IncExpr);
1357   PostIncGen(*this);
1358   BreakContinueStack.pop_back();
1359   EmitBranch(CondBlock);
1360   LoopStack.pop();
1361   // Emit the fall-through block.
1362   EmitBlock(LoopExit.getBlock());
1363 }
1364 
1365 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1366   if (!HaveInsertPoint())
1367     return false;
1368   // Emit inits for the linear variables.
1369   bool HasLinears = false;
1370   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1371     for (auto *Init : C->inits()) {
1372       HasLinears = true;
1373       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1374       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1375         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1376         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1377         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1378                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1379                         VD->getInit()->getType(), VK_LValue,
1380                         VD->getInit()->getExprLoc());
1381         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1382                                                 VD->getType()),
1383                        /*capturedByInit=*/false);
1384         EmitAutoVarCleanups(Emission);
1385       } else
1386         EmitVarDecl(*VD);
1387     }
1388     // Emit the linear steps for the linear clauses.
1389     // If a step is not constant, it is pre-calculated before the loop.
1390     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1391       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1392         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1393         // Emit calculation of the linear step.
1394         EmitIgnoredExpr(CS);
1395       }
1396   }
1397   return HasLinears;
1398 }
1399 
1400 void CodeGenFunction::EmitOMPLinearClauseFinal(
1401     const OMPLoopDirective &D,
1402     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1403   if (!HaveInsertPoint())
1404     return;
1405   llvm::BasicBlock *DoneBB = nullptr;
1406   // Emit the final values of the linear variables.
1407   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1408     auto IC = C->varlist_begin();
1409     for (auto *F : C->finals()) {
1410       if (!DoneBB) {
1411         if (auto *Cond = CondGen(*this)) {
1412           // If the first post-update expression is found, emit conditional
1413           // block if it was requested.
1414           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1415           DoneBB = createBasicBlock(".omp.linear.pu.done");
1416           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1417           EmitBlock(ThenBB);
1418         }
1419       }
1420       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1421       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1422                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1423                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1424       Address OrigAddr = EmitLValue(&DRE).getAddress();
1425       CodeGenFunction::OMPPrivateScope VarScope(*this);
1426       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1427       (void)VarScope.Privatize();
1428       EmitIgnoredExpr(F);
1429       ++IC;
1430     }
1431     if (auto *PostUpdate = C->getPostUpdateExpr())
1432       EmitIgnoredExpr(PostUpdate);
1433   }
1434   if (DoneBB)
1435     EmitBlock(DoneBB, /*IsFinished=*/true);
1436 }
1437 
1438 static void emitAlignedClause(CodeGenFunction &CGF,
1439                               const OMPExecutableDirective &D) {
1440   if (!CGF.HaveInsertPoint())
1441     return;
1442   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1443     unsigned ClauseAlignment = 0;
1444     if (auto AlignmentExpr = Clause->getAlignment()) {
1445       auto AlignmentCI =
1446           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1447       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1448     }
1449     for (auto E : Clause->varlists()) {
1450       unsigned Alignment = ClauseAlignment;
1451       if (Alignment == 0) {
1452         // OpenMP [2.8.1, Description]
1453         // If no optional parameter is specified, implementation-defined default
1454         // alignments for SIMD instructions on the target platforms are assumed.
1455         Alignment =
1456             CGF.getContext()
1457                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1458                     E->getType()->getPointeeType()))
1459                 .getQuantity();
1460       }
1461       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1462              "alignment is not power of 2");
1463       if (Alignment != 0) {
1464         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1465         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1466       }
1467     }
1468   }
1469 }
1470 
1471 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1472     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1473   if (!HaveInsertPoint())
1474     return;
1475   auto I = S.private_counters().begin();
1476   for (auto *E : S.counters()) {
1477     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1478     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1479     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1480       // Emit var without initialization.
1481       if (!LocalDeclMap.count(PrivateVD)) {
1482         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1483         EmitAutoVarCleanups(VarEmission);
1484       }
1485       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1486                       /*RefersToEnclosingVariableOrCapture=*/false,
1487                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1488       return EmitLValue(&DRE).getAddress();
1489     });
1490     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1491         VD->hasGlobalStorage()) {
1492       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1493         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1494                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1495                         E->getType(), VK_LValue, E->getExprLoc());
1496         return EmitLValue(&DRE).getAddress();
1497       });
1498     }
1499     ++I;
1500   }
1501 }
1502 
1503 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1504                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1505                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1506   if (!CGF.HaveInsertPoint())
1507     return;
1508   {
1509     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1510     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1511     (void)PreCondScope.Privatize();
1512     // Get initial values of real counters.
1513     for (auto I : S.inits()) {
1514       CGF.EmitIgnoredExpr(I);
1515     }
1516   }
1517   // Check that loop is executed at least one time.
1518   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1519 }
1520 
1521 void CodeGenFunction::EmitOMPLinearClause(
1522     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1523   if (!HaveInsertPoint())
1524     return;
1525   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1526   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1527     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1528     for (auto *C : LoopDirective->counters()) {
1529       SIMDLCVs.insert(
1530           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1531     }
1532   }
1533   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1534     auto CurPrivate = C->privates().begin();
1535     for (auto *E : C->varlists()) {
1536       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1537       auto *PrivateVD =
1538           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1539       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1540         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1541           // Emit private VarDecl with copy init.
1542           EmitVarDecl(*PrivateVD);
1543           return GetAddrOfLocalVar(PrivateVD);
1544         });
1545         assert(IsRegistered && "linear var already registered as private");
1546         // Silence the warning about unused variable.
1547         (void)IsRegistered;
1548       } else
1549         EmitVarDecl(*PrivateVD);
1550       ++CurPrivate;
1551     }
1552   }
1553 }
1554 
1555 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1556                                      const OMPExecutableDirective &D,
1557                                      bool IsMonotonic) {
1558   if (!CGF.HaveInsertPoint())
1559     return;
1560   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1561     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1562                                  /*ignoreResult=*/true);
1563     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1564     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1565     // In presence of finite 'safelen', it may be unsafe to mark all
1566     // the memory instructions parallel, because loop-carried
1567     // dependences of 'safelen' iterations are possible.
1568     if (!IsMonotonic)
1569       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1570   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1571     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1572                                  /*ignoreResult=*/true);
1573     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1574     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1575     // In presence of finite 'safelen', it may be unsafe to mark all
1576     // the memory instructions parallel, because loop-carried
1577     // dependences of 'safelen' iterations are possible.
1578     CGF.LoopStack.setParallel(false);
1579   }
1580 }
1581 
1582 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1583                                       bool IsMonotonic) {
1584   // Walk clauses and process safelen/lastprivate.
1585   LoopStack.setParallel(!IsMonotonic);
1586   LoopStack.setVectorizeEnable(true);
1587   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1588 }
1589 
1590 void CodeGenFunction::EmitOMPSimdFinal(
1591     const OMPLoopDirective &D,
1592     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1593   if (!HaveInsertPoint())
1594     return;
1595   llvm::BasicBlock *DoneBB = nullptr;
1596   auto IC = D.counters().begin();
1597   auto IPC = D.private_counters().begin();
1598   for (auto F : D.finals()) {
1599     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1600     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1601     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1602     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1603         OrigVD->hasGlobalStorage() || CED) {
1604       if (!DoneBB) {
1605         if (auto *Cond = CondGen(*this)) {
1606           // If the first post-update expression is found, emit conditional
1607           // block if it was requested.
1608           auto *ThenBB = createBasicBlock(".omp.final.then");
1609           DoneBB = createBasicBlock(".omp.final.done");
1610           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1611           EmitBlock(ThenBB);
1612         }
1613       }
1614       Address OrigAddr = Address::invalid();
1615       if (CED)
1616         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1617       else {
1618         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1619                         /*RefersToEnclosingVariableOrCapture=*/false,
1620                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1621         OrigAddr = EmitLValue(&DRE).getAddress();
1622       }
1623       OMPPrivateScope VarScope(*this);
1624       VarScope.addPrivate(OrigVD,
1625                           [OrigAddr]() -> Address { return OrigAddr; });
1626       (void)VarScope.Privatize();
1627       EmitIgnoredExpr(F);
1628     }
1629     ++IC;
1630     ++IPC;
1631   }
1632   if (DoneBB)
1633     EmitBlock(DoneBB, /*IsFinished=*/true);
1634 }
1635 
1636 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1637                                          const OMPLoopDirective &S,
1638                                          CodeGenFunction::JumpDest LoopExit) {
1639   CGF.EmitOMPLoopBody(S, LoopExit);
1640   CGF.EmitStopPoint(&S);
1641 }
1642 
1643 /// Emit a helper variable and return corresponding lvalue.
1644 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1645                                const DeclRefExpr *Helper) {
1646   auto VDecl = cast<VarDecl>(Helper->getDecl());
1647   CGF.EmitVarDecl(*VDecl);
1648   return CGF.EmitLValue(Helper);
1649 }
1650 
1651 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1652                               PrePostActionTy &Action) {
1653   Action.Enter(CGF);
1654   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1655          "Expected simd directive");
1656   OMPLoopScope PreInitScope(CGF, S);
1657   // if (PreCond) {
1658   //   for (IV in 0..LastIteration) BODY;
1659   //   <Final counter/linear vars updates>;
1660   // }
1661   //
1662   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
1663       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
1664       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
1665     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1666     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1667   }
1668 
1669   // Emit: if (PreCond) - begin.
1670   // If the condition constant folds and can be elided, avoid emitting the
1671   // whole loop.
1672   bool CondConstant;
1673   llvm::BasicBlock *ContBlock = nullptr;
1674   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1675     if (!CondConstant)
1676       return;
1677   } else {
1678     auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1679     ContBlock = CGF.createBasicBlock("simd.if.end");
1680     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1681                 CGF.getProfileCount(&S));
1682     CGF.EmitBlock(ThenBlock);
1683     CGF.incrementProfileCounter(&S);
1684   }
1685 
1686   // Emit the loop iteration variable.
1687   const Expr *IVExpr = S.getIterationVariable();
1688   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1689   CGF.EmitVarDecl(*IVDecl);
1690   CGF.EmitIgnoredExpr(S.getInit());
1691 
1692   // Emit the iterations count variable.
1693   // If it is not a variable, Sema decided to calculate iterations count on
1694   // each iteration (e.g., it is foldable into a constant).
1695   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1696     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1697     // Emit calculation of the iterations count.
1698     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1699   }
1700 
1701   CGF.EmitOMPSimdInit(S);
1702 
1703   emitAlignedClause(CGF, S);
1704   (void)CGF.EmitOMPLinearClauseInit(S);
1705   {
1706     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1707     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1708     CGF.EmitOMPLinearClause(S, LoopScope);
1709     CGF.EmitOMPPrivateClause(S, LoopScope);
1710     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1711     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1712     (void)LoopScope.Privatize();
1713     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1714                          S.getInc(),
1715                          [&S](CodeGenFunction &CGF) {
1716                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1717                            CGF.EmitStopPoint(&S);
1718                          },
1719                          [](CodeGenFunction &) {});
1720     CGF.EmitOMPSimdFinal(
1721         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1722     // Emit final copy of the lastprivate variables at the end of loops.
1723     if (HasLastprivateClause)
1724       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1725     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1726     emitPostUpdateForReductionClause(
1727         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1728   }
1729   CGF.EmitOMPLinearClauseFinal(
1730       S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1731   // Emit: if (PreCond) - end.
1732   if (ContBlock) {
1733     CGF.EmitBranch(ContBlock);
1734     CGF.EmitBlock(ContBlock, true);
1735   }
1736 }
1737 
1738 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1739   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1740     emitOMPSimdRegion(CGF, S, Action);
1741   };
1742   OMPLexicalScope Scope(*this, S, OMPD_unknown);
1743   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1744 }
1745 
1746 void CodeGenFunction::EmitOMPOuterLoop(
1747     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1748     CodeGenFunction::OMPPrivateScope &LoopScope,
1749     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1750     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1751     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1752   auto &RT = CGM.getOpenMPRuntime();
1753 
1754   const Expr *IVExpr = S.getIterationVariable();
1755   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1756   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1757 
1758   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1759 
1760   // Start the loop with a block that tests the condition.
1761   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1762   EmitBlock(CondBlock);
1763   const SourceRange &R = S.getSourceRange();
1764   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1765                  SourceLocToDebugLoc(R.getEnd()));
1766 
1767   llvm::Value *BoolCondVal = nullptr;
1768   if (!DynamicOrOrdered) {
1769     // UB = min(UB, GlobalUB) or
1770     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1771     // 'distribute parallel for')
1772     EmitIgnoredExpr(LoopArgs.EUB);
1773     // IV = LB
1774     EmitIgnoredExpr(LoopArgs.Init);
1775     // IV < UB
1776     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1777   } else {
1778     BoolCondVal =
1779         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1780                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1781   }
1782 
1783   // If there are any cleanups between here and the loop-exit scope,
1784   // create a block to stage a loop exit along.
1785   auto ExitBlock = LoopExit.getBlock();
1786   if (LoopScope.requiresCleanups())
1787     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1788 
1789   auto LoopBody = createBasicBlock("omp.dispatch.body");
1790   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1791   if (ExitBlock != LoopExit.getBlock()) {
1792     EmitBlock(ExitBlock);
1793     EmitBranchThroughCleanup(LoopExit);
1794   }
1795   EmitBlock(LoopBody);
1796 
1797   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1798   // LB for loop condition and emitted it above).
1799   if (DynamicOrOrdered)
1800     EmitIgnoredExpr(LoopArgs.Init);
1801 
1802   // Create a block for the increment.
1803   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1804   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1805 
1806   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1807   // with dynamic/guided scheduling and without ordered clause.
1808   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1809     LoopStack.setParallel(!IsMonotonic);
1810   else
1811     EmitOMPSimdInit(S, IsMonotonic);
1812 
1813   SourceLocation Loc = S.getLocStart();
1814 
1815   // when 'distribute' is not combined with a 'for':
1816   // while (idx <= UB) { BODY; ++idx; }
1817   // when 'distribute' is combined with a 'for'
1818   // (e.g. 'distribute parallel for')
1819   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1820   EmitOMPInnerLoop(
1821       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1822       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1823         CodeGenLoop(CGF, S, LoopExit);
1824       },
1825       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1826         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1827       });
1828 
1829   EmitBlock(Continue.getBlock());
1830   BreakContinueStack.pop_back();
1831   if (!DynamicOrOrdered) {
1832     // Emit "LB = LB + Stride", "UB = UB + Stride".
1833     EmitIgnoredExpr(LoopArgs.NextLB);
1834     EmitIgnoredExpr(LoopArgs.NextUB);
1835   }
1836 
1837   EmitBranch(CondBlock);
1838   LoopStack.pop();
1839   // Emit the fall-through block.
1840   EmitBlock(LoopExit.getBlock());
1841 
1842   // Tell the runtime we are done.
1843   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1844     if (!DynamicOrOrdered)
1845       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1846                                                      S.getDirectiveKind());
1847   };
1848   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1849 }
1850 
1851 void CodeGenFunction::EmitOMPForOuterLoop(
1852     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1853     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1854     const OMPLoopArguments &LoopArgs,
1855     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1856   auto &RT = CGM.getOpenMPRuntime();
1857 
1858   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1859   const bool DynamicOrOrdered =
1860       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1861 
1862   assert((Ordered ||
1863           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1864                                  LoopArgs.Chunk != nullptr)) &&
1865          "static non-chunked schedule does not need outer loop");
1866 
1867   // Emit outer loop.
1868   //
1869   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1870   // When schedule(dynamic,chunk_size) is specified, the iterations are
1871   // distributed to threads in the team in chunks as the threads request them.
1872   // Each thread executes a chunk of iterations, then requests another chunk,
1873   // until no chunks remain to be distributed. Each chunk contains chunk_size
1874   // iterations, except for the last chunk to be distributed, which may have
1875   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1876   //
1877   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1878   // to threads in the team in chunks as the executing threads request them.
1879   // Each thread executes a chunk of iterations, then requests another chunk,
1880   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1881   // each chunk is proportional to the number of unassigned iterations divided
1882   // by the number of threads in the team, decreasing to 1. For a chunk_size
1883   // with value k (greater than 1), the size of each chunk is determined in the
1884   // same way, with the restriction that the chunks do not contain fewer than k
1885   // iterations (except for the last chunk to be assigned, which may have fewer
1886   // than k iterations).
1887   //
1888   // When schedule(auto) is specified, the decision regarding scheduling is
1889   // delegated to the compiler and/or runtime system. The programmer gives the
1890   // implementation the freedom to choose any possible mapping of iterations to
1891   // threads in the team.
1892   //
1893   // When schedule(runtime) is specified, the decision regarding scheduling is
1894   // deferred until run time, and the schedule and chunk size are taken from the
1895   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1896   // implementation defined
1897   //
1898   // while(__kmpc_dispatch_next(&LB, &UB)) {
1899   //   idx = LB;
1900   //   while (idx <= UB) { BODY; ++idx;
1901   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1902   //   } // inner loop
1903   // }
1904   //
1905   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1906   // When schedule(static, chunk_size) is specified, iterations are divided into
1907   // chunks of size chunk_size, and the chunks are assigned to the threads in
1908   // the team in a round-robin fashion in the order of the thread number.
1909   //
1910   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1911   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1912   //   LB = LB + ST;
1913   //   UB = UB + ST;
1914   // }
1915   //
1916 
1917   const Expr *IVExpr = S.getIterationVariable();
1918   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1919   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1920 
1921   if (DynamicOrOrdered) {
1922     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1923     llvm::Value *LBVal = DispatchBounds.first;
1924     llvm::Value *UBVal = DispatchBounds.second;
1925     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1926                                                              LoopArgs.Chunk};
1927     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1928                            IVSigned, Ordered, DipatchRTInputValues);
1929   } else {
1930     CGOpenMPRuntime::StaticRTInput StaticInit(
1931         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1932         LoopArgs.ST, LoopArgs.Chunk);
1933     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1934                          ScheduleKind, StaticInit);
1935   }
1936 
1937   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1938                                     const unsigned IVSize,
1939                                     const bool IVSigned) {
1940     if (Ordered) {
1941       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1942                                                             IVSigned);
1943     }
1944   };
1945 
1946   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1947                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1948   OuterLoopArgs.IncExpr = S.getInc();
1949   OuterLoopArgs.Init = S.getInit();
1950   OuterLoopArgs.Cond = S.getCond();
1951   OuterLoopArgs.NextLB = S.getNextLowerBound();
1952   OuterLoopArgs.NextUB = S.getNextUpperBound();
1953   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1954                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1955 }
1956 
1957 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1958                              const unsigned IVSize, const bool IVSigned) {}
1959 
1960 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1961     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1962     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1963     const CodeGenLoopTy &CodeGenLoopContent) {
1964 
1965   auto &RT = CGM.getOpenMPRuntime();
1966 
1967   // Emit outer loop.
1968   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1969   // dynamic
1970   //
1971 
1972   const Expr *IVExpr = S.getIterationVariable();
1973   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1974   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1975 
1976   CGOpenMPRuntime::StaticRTInput StaticInit(
1977       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1978       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1979   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1980 
1981   // for combined 'distribute' and 'for' the increment expression of distribute
1982   // is store in DistInc. For 'distribute' alone, it is in Inc.
1983   Expr *IncExpr;
1984   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1985     IncExpr = S.getDistInc();
1986   else
1987     IncExpr = S.getInc();
1988 
1989   // this routine is shared by 'omp distribute parallel for' and
1990   // 'omp distribute': select the right EUB expression depending on the
1991   // directive
1992   OMPLoopArguments OuterLoopArgs;
1993   OuterLoopArgs.LB = LoopArgs.LB;
1994   OuterLoopArgs.UB = LoopArgs.UB;
1995   OuterLoopArgs.ST = LoopArgs.ST;
1996   OuterLoopArgs.IL = LoopArgs.IL;
1997   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1998   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1999                           ? S.getCombinedEnsureUpperBound()
2000                           : S.getEnsureUpperBound();
2001   OuterLoopArgs.IncExpr = IncExpr;
2002   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2003                            ? S.getCombinedInit()
2004                            : S.getInit();
2005   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2006                            ? S.getCombinedCond()
2007                            : S.getCond();
2008   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2009                              ? S.getCombinedNextLowerBound()
2010                              : S.getNextLowerBound();
2011   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2012                              ? S.getCombinedNextUpperBound()
2013                              : S.getNextUpperBound();
2014 
2015   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2016                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
2017                    emitEmptyOrdered);
2018 }
2019 
2020 static std::pair<LValue, LValue>
2021 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2022                                      const OMPExecutableDirective &S) {
2023   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2024   LValue LB =
2025       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2026   LValue UB =
2027       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2028 
2029   // When composing 'distribute' with 'for' (e.g. as in 'distribute
2030   // parallel for') we need to use the 'distribute'
2031   // chunk lower and upper bounds rather than the whole loop iteration
2032   // space. These are parameters to the outlined function for 'parallel'
2033   // and we copy the bounds of the previous schedule into the
2034   // the current ones.
2035   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2036   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2037   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2038       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2039   PrevLBVal = CGF.EmitScalarConversion(
2040       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2041       LS.getIterationVariable()->getType(),
2042       LS.getPrevLowerBoundVariable()->getExprLoc());
2043   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2044       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2045   PrevUBVal = CGF.EmitScalarConversion(
2046       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2047       LS.getIterationVariable()->getType(),
2048       LS.getPrevUpperBoundVariable()->getExprLoc());
2049 
2050   CGF.EmitStoreOfScalar(PrevLBVal, LB);
2051   CGF.EmitStoreOfScalar(PrevUBVal, UB);
2052 
2053   return {LB, UB};
2054 }
2055 
2056 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2057 /// we need to use the LB and UB expressions generated by the worksharing
2058 /// code generation support, whereas in non combined situations we would
2059 /// just emit 0 and the LastIteration expression
2060 /// This function is necessary due to the difference of the LB and UB
2061 /// types for the RT emission routines for 'for_static_init' and
2062 /// 'for_dispatch_init'
2063 static std::pair<llvm::Value *, llvm::Value *>
2064 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2065                                         const OMPExecutableDirective &S,
2066                                         Address LB, Address UB) {
2067   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2068   const Expr *IVExpr = LS.getIterationVariable();
2069   // when implementing a dynamic schedule for a 'for' combined with a
2070   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2071   // is not normalized as each team only executes its own assigned
2072   // distribute chunk
2073   QualType IteratorTy = IVExpr->getType();
2074   llvm::Value *LBVal =
2075       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart());
2076   llvm::Value *UBVal =
2077       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart());
2078   return {LBVal, UBVal};
2079 }
2080 
2081 static void emitDistributeParallelForDistributeInnerBoundParams(
2082     CodeGenFunction &CGF, const OMPExecutableDirective &S,
2083     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2084   const auto &Dir = cast<OMPLoopDirective>(S);
2085   LValue LB =
2086       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2087   auto LBCast = CGF.Builder.CreateIntCast(
2088       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2089   CapturedVars.push_back(LBCast);
2090   LValue UB =
2091       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2092 
2093   auto UBCast = CGF.Builder.CreateIntCast(
2094       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2095   CapturedVars.push_back(UBCast);
2096 }
2097 
2098 static void
2099 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2100                                  const OMPLoopDirective &S,
2101                                  CodeGenFunction::JumpDest LoopExit) {
2102   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2103                                          PrePostActionTy &) {
2104     bool HasCancel = false;
2105     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2106       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2107         HasCancel = D->hasCancel();
2108       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2109         HasCancel = D->hasCancel();
2110       else if (const auto *D =
2111                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2112         HasCancel = D->hasCancel();
2113     }
2114     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2115                                                      HasCancel);
2116     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2117                                emitDistributeParallelForInnerBounds,
2118                                emitDistributeParallelForDispatchBounds);
2119   };
2120 
2121   emitCommonOMPParallelDirective(
2122       CGF, S,
2123       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2124       CGInlinedWorksharingLoop,
2125       emitDistributeParallelForDistributeInnerBoundParams);
2126 }
2127 
2128 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2129     const OMPDistributeParallelForDirective &S) {
2130   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2131     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2132                               S.getDistInc());
2133   };
2134   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2135   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2136 }
2137 
2138 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2139     const OMPDistributeParallelForSimdDirective &S) {
2140   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2141     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2142                               S.getDistInc());
2143   };
2144   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2145   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2146 }
2147 
2148 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2149     const OMPDistributeSimdDirective &S) {
2150   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2151     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2152   };
2153   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2154   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2155 }
2156 
2157 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2158     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2159   // Emit SPMD target parallel for region as a standalone region.
2160   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2161     emitOMPSimdRegion(CGF, S, Action);
2162   };
2163   llvm::Function *Fn;
2164   llvm::Constant *Addr;
2165   // Emit target region as a standalone region.
2166   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2167       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2168   assert(Fn && Addr && "Target device function emission failed.");
2169 }
2170 
2171 void CodeGenFunction::EmitOMPTargetSimdDirective(
2172     const OMPTargetSimdDirective &S) {
2173   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2174     emitOMPSimdRegion(CGF, S, Action);
2175   };
2176   emitCommonOMPTargetDirective(*this, S, CodeGen);
2177 }
2178 
2179 namespace {
2180   struct ScheduleKindModifiersTy {
2181     OpenMPScheduleClauseKind Kind;
2182     OpenMPScheduleClauseModifier M1;
2183     OpenMPScheduleClauseModifier M2;
2184     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2185                             OpenMPScheduleClauseModifier M1,
2186                             OpenMPScheduleClauseModifier M2)
2187         : Kind(Kind), M1(M1), M2(M2) {}
2188   };
2189 } // namespace
2190 
2191 bool CodeGenFunction::EmitOMPWorksharingLoop(
2192     const OMPLoopDirective &S, Expr *EUB,
2193     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2194     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2195   // Emit the loop iteration variable.
2196   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2197   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2198   EmitVarDecl(*IVDecl);
2199 
2200   // Emit the iterations count variable.
2201   // If it is not a variable, Sema decided to calculate iterations count on each
2202   // iteration (e.g., it is foldable into a constant).
2203   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2204     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2205     // Emit calculation of the iterations count.
2206     EmitIgnoredExpr(S.getCalcLastIteration());
2207   }
2208 
2209   auto &RT = CGM.getOpenMPRuntime();
2210 
2211   bool HasLastprivateClause;
2212   // Check pre-condition.
2213   {
2214     OMPLoopScope PreInitScope(*this, S);
2215     // Skip the entire loop if we don't meet the precondition.
2216     // If the condition constant folds and can be elided, avoid emitting the
2217     // whole loop.
2218     bool CondConstant;
2219     llvm::BasicBlock *ContBlock = nullptr;
2220     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2221       if (!CondConstant)
2222         return false;
2223     } else {
2224       auto *ThenBlock = createBasicBlock("omp.precond.then");
2225       ContBlock = createBasicBlock("omp.precond.end");
2226       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2227                   getProfileCount(&S));
2228       EmitBlock(ThenBlock);
2229       incrementProfileCounter(&S);
2230     }
2231 
2232     bool Ordered = false;
2233     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2234       if (OrderedClause->getNumForLoops())
2235         RT.emitDoacrossInit(*this, S);
2236       else
2237         Ordered = true;
2238     }
2239 
2240     llvm::DenseSet<const Expr *> EmittedFinals;
2241     emitAlignedClause(*this, S);
2242     bool HasLinears = EmitOMPLinearClauseInit(S);
2243     // Emit helper vars inits.
2244 
2245     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2246     LValue LB = Bounds.first;
2247     LValue UB = Bounds.second;
2248     LValue ST =
2249         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2250     LValue IL =
2251         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2252 
2253     // Emit 'then' code.
2254     {
2255       OMPPrivateScope LoopScope(*this);
2256       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2257         // Emit implicit barrier to synchronize threads and avoid data races on
2258         // initialization of firstprivate variables and post-update of
2259         // lastprivate variables.
2260         CGM.getOpenMPRuntime().emitBarrierCall(
2261             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2262             /*ForceSimpleCall=*/true);
2263       }
2264       EmitOMPPrivateClause(S, LoopScope);
2265       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2266       EmitOMPReductionClauseInit(S, LoopScope);
2267       EmitOMPPrivateLoopCounters(S, LoopScope);
2268       EmitOMPLinearClause(S, LoopScope);
2269       (void)LoopScope.Privatize();
2270 
2271       // Detect the loop schedule kind and chunk.
2272       llvm::Value *Chunk = nullptr;
2273       OpenMPScheduleTy ScheduleKind;
2274       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2275         ScheduleKind.Schedule = C->getScheduleKind();
2276         ScheduleKind.M1 = C->getFirstScheduleModifier();
2277         ScheduleKind.M2 = C->getSecondScheduleModifier();
2278         if (const auto *Ch = C->getChunkSize()) {
2279           Chunk = EmitScalarExpr(Ch);
2280           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2281                                        S.getIterationVariable()->getType(),
2282                                        S.getLocStart());
2283         }
2284       }
2285       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2286       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2287       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2288       // If the static schedule kind is specified or if the ordered clause is
2289       // specified, and if no monotonic modifier is specified, the effect will
2290       // be as if the monotonic modifier was specified.
2291       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2292                                 /* Chunked */ Chunk != nullptr) &&
2293           !Ordered) {
2294         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2295           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2296         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2297         // When no chunk_size is specified, the iteration space is divided into
2298         // chunks that are approximately equal in size, and at most one chunk is
2299         // distributed to each thread. Note that the size of the chunks is
2300         // unspecified in this case.
2301         CGOpenMPRuntime::StaticRTInput StaticInit(
2302             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2303             UB.getAddress(), ST.getAddress());
2304         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2305                              ScheduleKind, StaticInit);
2306         auto LoopExit =
2307             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2308         // UB = min(UB, GlobalUB);
2309         EmitIgnoredExpr(S.getEnsureUpperBound());
2310         // IV = LB;
2311         EmitIgnoredExpr(S.getInit());
2312         // while (idx <= UB) { BODY; ++idx; }
2313         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2314                          S.getInc(),
2315                          [&S, LoopExit](CodeGenFunction &CGF) {
2316                            CGF.EmitOMPLoopBody(S, LoopExit);
2317                            CGF.EmitStopPoint(&S);
2318                          },
2319                          [](CodeGenFunction &) {});
2320         EmitBlock(LoopExit.getBlock());
2321         // Tell the runtime we are done.
2322         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2323           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2324                                                          S.getDirectiveKind());
2325         };
2326         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2327       } else {
2328         const bool IsMonotonic =
2329             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2330             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2331             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2332             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2333         // Emit the outer loop, which requests its work chunk [LB..UB] from
2334         // runtime and runs the inner loop to process it.
2335         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2336                                              ST.getAddress(), IL.getAddress(),
2337                                              Chunk, EUB);
2338         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2339                             LoopArguments, CGDispatchBounds);
2340       }
2341       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2342         EmitOMPSimdFinal(S,
2343                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2344                            return CGF.Builder.CreateIsNotNull(
2345                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2346                          });
2347       }
2348       EmitOMPReductionClauseFinal(
2349           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2350                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2351                  : /*Parallel only*/ OMPD_parallel);
2352       // Emit post-update of the reduction variables if IsLastIter != 0.
2353       emitPostUpdateForReductionClause(
2354           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2355             return CGF.Builder.CreateIsNotNull(
2356                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2357           });
2358       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2359       if (HasLastprivateClause)
2360         EmitOMPLastprivateClauseFinal(
2361             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2362             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2363     }
2364     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2365       return CGF.Builder.CreateIsNotNull(
2366           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2367     });
2368     // We're now done with the loop, so jump to the continuation block.
2369     if (ContBlock) {
2370       EmitBranch(ContBlock);
2371       EmitBlock(ContBlock, true);
2372     }
2373   }
2374   return HasLastprivateClause;
2375 }
2376 
2377 /// The following two functions generate expressions for the loop lower
2378 /// and upper bounds in case of static and dynamic (dispatch) schedule
2379 /// of the associated 'for' or 'distribute' loop.
2380 static std::pair<LValue, LValue>
2381 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2382   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2383   LValue LB =
2384       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2385   LValue UB =
2386       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2387   return {LB, UB};
2388 }
2389 
2390 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2391 /// consider the lower and upper bound expressions generated by the
2392 /// worksharing loop support, but we use 0 and the iteration space size as
2393 /// constants
2394 static std::pair<llvm::Value *, llvm::Value *>
2395 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2396                           Address LB, Address UB) {
2397   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2398   const Expr *IVExpr = LS.getIterationVariable();
2399   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2400   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2401   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2402   return {LBVal, UBVal};
2403 }
2404 
2405 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2406   bool HasLastprivates = false;
2407   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2408                                           PrePostActionTy &) {
2409     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2410     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2411                                                  emitForLoopBounds,
2412                                                  emitDispatchForLoopBounds);
2413   };
2414   {
2415     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2416     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2417                                                 S.hasCancel());
2418   }
2419 
2420   // Emit an implicit barrier at the end.
2421   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2422     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2423   }
2424 }
2425 
2426 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2427   bool HasLastprivates = false;
2428   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2429                                           PrePostActionTy &) {
2430     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2431                                                  emitForLoopBounds,
2432                                                  emitDispatchForLoopBounds);
2433   };
2434   {
2435     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2436     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2437   }
2438 
2439   // Emit an implicit barrier at the end.
2440   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2441     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2442   }
2443 }
2444 
2445 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2446                                 const Twine &Name,
2447                                 llvm::Value *Init = nullptr) {
2448   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2449   if (Init)
2450     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2451   return LVal;
2452 }
2453 
2454 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2455   const Stmt *Stmt = S.getInnermostCapturedStmt()->getCapturedStmt();
2456   const auto *CS = dyn_cast<CompoundStmt>(Stmt);
2457   bool HasLastprivates = false;
2458   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2459                                                     PrePostActionTy &) {
2460     auto &C = CGF.CGM.getContext();
2461     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2462     // Emit helper vars inits.
2463     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2464                                   CGF.Builder.getInt32(0));
2465     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2466                                       : CGF.Builder.getInt32(0);
2467     LValue UB =
2468         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2469     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2470                                   CGF.Builder.getInt32(1));
2471     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2472                                   CGF.Builder.getInt32(0));
2473     // Loop counter.
2474     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2475     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2476     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2477     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2478     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2479     // Generate condition for loop.
2480     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2481                         OK_Ordinary, S.getLocStart(), FPOptions());
2482     // Increment for loop counter.
2483     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2484                       S.getLocStart(), true);
2485     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2486       // Iterate through all sections and emit a switch construct:
2487       // switch (IV) {
2488       //   case 0:
2489       //     <SectionStmt[0]>;
2490       //     break;
2491       // ...
2492       //   case <NumSection> - 1:
2493       //     <SectionStmt[<NumSection> - 1]>;
2494       //     break;
2495       // }
2496       // .omp.sections.exit:
2497       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2498       auto *SwitchStmt =
2499           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()),
2500                                    ExitBB, CS == nullptr ? 1 : CS->size());
2501       if (CS) {
2502         unsigned CaseNumber = 0;
2503         for (auto *SubStmt : CS->children()) {
2504           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2505           CGF.EmitBlock(CaseBB);
2506           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2507           CGF.EmitStmt(SubStmt);
2508           CGF.EmitBranch(ExitBB);
2509           ++CaseNumber;
2510         }
2511       } else {
2512         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2513         CGF.EmitBlock(CaseBB);
2514         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2515         CGF.EmitStmt(Stmt);
2516         CGF.EmitBranch(ExitBB);
2517       }
2518       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2519     };
2520 
2521     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2522     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2523       // Emit implicit barrier to synchronize threads and avoid data races on
2524       // initialization of firstprivate variables and post-update of lastprivate
2525       // variables.
2526       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2527           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2528           /*ForceSimpleCall=*/true);
2529     }
2530     CGF.EmitOMPPrivateClause(S, LoopScope);
2531     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2532     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2533     (void)LoopScope.Privatize();
2534 
2535     // Emit static non-chunked loop.
2536     OpenMPScheduleTy ScheduleKind;
2537     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2538     CGOpenMPRuntime::StaticRTInput StaticInit(
2539         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2540         LB.getAddress(), UB.getAddress(), ST.getAddress());
2541     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2542         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2543     // UB = min(UB, GlobalUB);
2544     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2545     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2546         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2547     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2548     // IV = LB;
2549     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2550     // while (idx <= UB) { BODY; ++idx; }
2551     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2552                          [](CodeGenFunction &) {});
2553     // Tell the runtime we are done.
2554     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2555       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2556                                                      S.getDirectiveKind());
2557     };
2558     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2559     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2560     // Emit post-update of the reduction variables if IsLastIter != 0.
2561     emitPostUpdateForReductionClause(
2562         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2563           return CGF.Builder.CreateIsNotNull(
2564               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2565         });
2566 
2567     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2568     if (HasLastprivates)
2569       CGF.EmitOMPLastprivateClauseFinal(
2570           S, /*NoFinals=*/false,
2571           CGF.Builder.CreateIsNotNull(
2572               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2573   };
2574 
2575   bool HasCancel = false;
2576   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2577     HasCancel = OSD->hasCancel();
2578   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2579     HasCancel = OPSD->hasCancel();
2580   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2581   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2582                                               HasCancel);
2583   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2584   // clause. Otherwise the barrier will be generated by the codegen for the
2585   // directive.
2586   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2587     // Emit implicit barrier to synchronize threads and avoid data races on
2588     // initialization of firstprivate variables.
2589     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2590                                            OMPD_unknown);
2591   }
2592 }
2593 
2594 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2595   {
2596     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2597     EmitSections(S);
2598   }
2599   // Emit an implicit barrier at the end.
2600   if (!S.getSingleClause<OMPNowaitClause>()) {
2601     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2602                                            OMPD_sections);
2603   }
2604 }
2605 
2606 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2607   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2608     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2609   };
2610   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2611   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2612                                               S.hasCancel());
2613 }
2614 
2615 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2616   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2617   llvm::SmallVector<const Expr *, 8> DestExprs;
2618   llvm::SmallVector<const Expr *, 8> SrcExprs;
2619   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2620   // Check if there are any 'copyprivate' clauses associated with this
2621   // 'single' construct.
2622   // Build a list of copyprivate variables along with helper expressions
2623   // (<source>, <destination>, <destination>=<source> expressions)
2624   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2625     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2626     DestExprs.append(C->destination_exprs().begin(),
2627                      C->destination_exprs().end());
2628     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2629     AssignmentOps.append(C->assignment_ops().begin(),
2630                          C->assignment_ops().end());
2631   }
2632   // Emit code for 'single' region along with 'copyprivate' clauses
2633   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2634     Action.Enter(CGF);
2635     OMPPrivateScope SingleScope(CGF);
2636     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2637     CGF.EmitOMPPrivateClause(S, SingleScope);
2638     (void)SingleScope.Privatize();
2639     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2640   };
2641   {
2642     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2643     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2644                                             CopyprivateVars, DestExprs,
2645                                             SrcExprs, AssignmentOps);
2646   }
2647   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2648   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2649   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2650     CGM.getOpenMPRuntime().emitBarrierCall(
2651         *this, S.getLocStart(),
2652         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2653   }
2654 }
2655 
2656 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2657   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2658     Action.Enter(CGF);
2659     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2660   };
2661   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2662   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2663 }
2664 
2665 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2666   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2667     Action.Enter(CGF);
2668     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2669   };
2670   Expr *Hint = nullptr;
2671   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2672     Hint = HintClause->getHint();
2673   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2674   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2675                                             S.getDirectiveName().getAsString(),
2676                                             CodeGen, S.getLocStart(), Hint);
2677 }
2678 
2679 void CodeGenFunction::EmitOMPParallelForDirective(
2680     const OMPParallelForDirective &S) {
2681   // Emit directive as a combined directive that consists of two implicit
2682   // directives: 'parallel' with 'for' directive.
2683   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2684     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2685     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2686                                emitDispatchForLoopBounds);
2687   };
2688   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2689                                  emitEmptyBoundParameters);
2690 }
2691 
2692 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2693     const OMPParallelForSimdDirective &S) {
2694   // Emit directive as a combined directive that consists of two implicit
2695   // directives: 'parallel' with 'for' directive.
2696   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2697     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2698                                emitDispatchForLoopBounds);
2699   };
2700   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2701                                  emitEmptyBoundParameters);
2702 }
2703 
2704 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2705     const OMPParallelSectionsDirective &S) {
2706   // Emit directive as a combined directive that consists of two implicit
2707   // directives: 'parallel' with 'sections' directive.
2708   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2709     CGF.EmitSections(S);
2710   };
2711   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2712                                  emitEmptyBoundParameters);
2713 }
2714 
2715 void CodeGenFunction::EmitOMPTaskBasedDirective(
2716     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
2717     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
2718     OMPTaskDataTy &Data) {
2719   // Emit outlined function for task construct.
2720   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
2721   auto *I = CS->getCapturedDecl()->param_begin();
2722   auto *PartId = std::next(I);
2723   auto *TaskT = std::next(I, 4);
2724   // Check if the task is final
2725   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2726     // If the condition constant folds and can be elided, try to avoid emitting
2727     // the condition and the dead arm of the if/else.
2728     auto *Cond = Clause->getCondition();
2729     bool CondConstant;
2730     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2731       Data.Final.setInt(CondConstant);
2732     else
2733       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2734   } else {
2735     // By default the task is not final.
2736     Data.Final.setInt(/*IntVal=*/false);
2737   }
2738   // Check if the task has 'priority' clause.
2739   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2740     auto *Prio = Clause->getPriority();
2741     Data.Priority.setInt(/*IntVal=*/true);
2742     Data.Priority.setPointer(EmitScalarConversion(
2743         EmitScalarExpr(Prio), Prio->getType(),
2744         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2745         Prio->getExprLoc()));
2746   }
2747   // The first function argument for tasks is a thread id, the second one is a
2748   // part id (0 for tied tasks, >=0 for untied task).
2749   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2750   // Get list of private variables.
2751   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2752     auto IRef = C->varlist_begin();
2753     for (auto *IInit : C->private_copies()) {
2754       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2755       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2756         Data.PrivateVars.push_back(*IRef);
2757         Data.PrivateCopies.push_back(IInit);
2758       }
2759       ++IRef;
2760     }
2761   }
2762   EmittedAsPrivate.clear();
2763   // Get list of firstprivate variables.
2764   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2765     auto IRef = C->varlist_begin();
2766     auto IElemInitRef = C->inits().begin();
2767     for (auto *IInit : C->private_copies()) {
2768       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2769       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2770         Data.FirstprivateVars.push_back(*IRef);
2771         Data.FirstprivateCopies.push_back(IInit);
2772         Data.FirstprivateInits.push_back(*IElemInitRef);
2773       }
2774       ++IRef;
2775       ++IElemInitRef;
2776     }
2777   }
2778   // Get list of lastprivate variables (for taskloops).
2779   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2780   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2781     auto IRef = C->varlist_begin();
2782     auto ID = C->destination_exprs().begin();
2783     for (auto *IInit : C->private_copies()) {
2784       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2785       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2786         Data.LastprivateVars.push_back(*IRef);
2787         Data.LastprivateCopies.push_back(IInit);
2788       }
2789       LastprivateDstsOrigs.insert(
2790           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2791            cast<DeclRefExpr>(*IRef)});
2792       ++IRef;
2793       ++ID;
2794     }
2795   }
2796   SmallVector<const Expr *, 4> LHSs;
2797   SmallVector<const Expr *, 4> RHSs;
2798   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2799     auto IPriv = C->privates().begin();
2800     auto IRed = C->reduction_ops().begin();
2801     auto ILHS = C->lhs_exprs().begin();
2802     auto IRHS = C->rhs_exprs().begin();
2803     for (const auto *Ref : C->varlists()) {
2804       Data.ReductionVars.emplace_back(Ref);
2805       Data.ReductionCopies.emplace_back(*IPriv);
2806       Data.ReductionOps.emplace_back(*IRed);
2807       LHSs.emplace_back(*ILHS);
2808       RHSs.emplace_back(*IRHS);
2809       std::advance(IPriv, 1);
2810       std::advance(IRed, 1);
2811       std::advance(ILHS, 1);
2812       std::advance(IRHS, 1);
2813     }
2814   }
2815   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2816       *this, S.getLocStart(), LHSs, RHSs, Data);
2817   // Build list of dependences.
2818   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2819     for (auto *IRef : C->varlists())
2820       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2821   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
2822                     CapturedRegion](CodeGenFunction &CGF,
2823                                     PrePostActionTy &Action) {
2824     // Set proper addresses for generated private copies.
2825     OMPPrivateScope Scope(CGF);
2826     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2827         !Data.LastprivateVars.empty()) {
2828       enum { PrivatesParam = 2, CopyFnParam = 3 };
2829       auto *CopyFn = CGF.Builder.CreateLoad(
2830           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2831       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2832           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2833       // Map privates.
2834       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2835       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2836       CallArgs.push_back(PrivatesPtr);
2837       for (auto *E : Data.PrivateVars) {
2838         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2839         Address PrivatePtr = CGF.CreateMemTemp(
2840             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2841         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2842         CallArgs.push_back(PrivatePtr.getPointer());
2843       }
2844       for (auto *E : Data.FirstprivateVars) {
2845         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2846         Address PrivatePtr =
2847             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2848                               ".firstpriv.ptr.addr");
2849         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2850         CallArgs.push_back(PrivatePtr.getPointer());
2851       }
2852       for (auto *E : Data.LastprivateVars) {
2853         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2854         Address PrivatePtr =
2855             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2856                               ".lastpriv.ptr.addr");
2857         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2858         CallArgs.push_back(PrivatePtr.getPointer());
2859       }
2860       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2861                                                           CopyFn, CallArgs);
2862       for (auto &&Pair : LastprivateDstsOrigs) {
2863         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2864         DeclRefExpr DRE(
2865             const_cast<VarDecl *>(OrigVD),
2866             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2867                 OrigVD) != nullptr,
2868             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2869         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2870           return CGF.EmitLValue(&DRE).getAddress();
2871         });
2872       }
2873       for (auto &&Pair : PrivatePtrs) {
2874         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2875                             CGF.getContext().getDeclAlign(Pair.first));
2876         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2877       }
2878     }
2879     if (Data.Reductions) {
2880       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
2881       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2882                              Data.ReductionOps);
2883       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2884           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2885       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2886         RedCG.emitSharedLValue(CGF, Cnt);
2887         RedCG.emitAggregateType(CGF, Cnt);
2888         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2889             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2890         Replacement =
2891             Address(CGF.EmitScalarConversion(
2892                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2893                         CGF.getContext().getPointerType(
2894                             Data.ReductionCopies[Cnt]->getType()),
2895                         Data.ReductionCopies[Cnt]->getExprLoc()),
2896                     Replacement.getAlignment());
2897         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2898         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2899                          [Replacement]() { return Replacement; });
2900         // FIXME: This must removed once the runtime library is fixed.
2901         // Emit required threadprivate variables for
2902         // initilizer/combiner/finalizer.
2903         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2904                                                            RedCG, Cnt);
2905       }
2906     }
2907     // Privatize all private variables except for in_reduction items.
2908     (void)Scope.Privatize();
2909     SmallVector<const Expr *, 4> InRedVars;
2910     SmallVector<const Expr *, 4> InRedPrivs;
2911     SmallVector<const Expr *, 4> InRedOps;
2912     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2913     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2914       auto IPriv = C->privates().begin();
2915       auto IRed = C->reduction_ops().begin();
2916       auto ITD = C->taskgroup_descriptors().begin();
2917       for (const auto *Ref : C->varlists()) {
2918         InRedVars.emplace_back(Ref);
2919         InRedPrivs.emplace_back(*IPriv);
2920         InRedOps.emplace_back(*IRed);
2921         TaskgroupDescriptors.emplace_back(*ITD);
2922         std::advance(IPriv, 1);
2923         std::advance(IRed, 1);
2924         std::advance(ITD, 1);
2925       }
2926     }
2927     // Privatize in_reduction items here, because taskgroup descriptors must be
2928     // privatized earlier.
2929     OMPPrivateScope InRedScope(CGF);
2930     if (!InRedVars.empty()) {
2931       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2932       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2933         RedCG.emitSharedLValue(CGF, Cnt);
2934         RedCG.emitAggregateType(CGF, Cnt);
2935         // The taskgroup descriptor variable is always implicit firstprivate and
2936         // privatized already during procoessing of the firstprivates.
2937         llvm::Value *ReductionsPtr =
2938             CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
2939                                  TaskgroupDescriptors[Cnt]->getExprLoc());
2940         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2941             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2942         Replacement = Address(
2943             CGF.EmitScalarConversion(
2944                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2945                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2946                 InRedPrivs[Cnt]->getExprLoc()),
2947             Replacement.getAlignment());
2948         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2949         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2950                               [Replacement]() { return Replacement; });
2951         // FIXME: This must removed once the runtime library is fixed.
2952         // Emit required threadprivate variables for
2953         // initilizer/combiner/finalizer.
2954         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2955                                                            RedCG, Cnt);
2956       }
2957     }
2958     (void)InRedScope.Privatize();
2959 
2960     Action.Enter(CGF);
2961     BodyGen(CGF);
2962   };
2963   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2964       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2965       Data.NumberOfParts);
2966   OMPLexicalScope Scope(*this, S);
2967   TaskGen(*this, OutlinedFn, Data);
2968 }
2969 
2970 static ImplicitParamDecl *
2971 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
2972                                   QualType Ty, CapturedDecl *CD,
2973                                   SourceLocation Loc) {
2974   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
2975                                            ImplicitParamDecl::Other);
2976   auto *OrigRef = DeclRefExpr::Create(
2977       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
2978       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
2979   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
2980                                               ImplicitParamDecl::Other);
2981   auto *PrivateRef = DeclRefExpr::Create(
2982       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
2983       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
2984   QualType ElemType = C.getBaseElementType(Ty);
2985   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
2986                                            ImplicitParamDecl::Other);
2987   auto *InitRef = DeclRefExpr::Create(
2988       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
2989       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
2990   PrivateVD->setInitStyle(VarDecl::CInit);
2991   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
2992                                               InitRef, /*BasePath=*/nullptr,
2993                                               VK_RValue));
2994   Data.FirstprivateVars.emplace_back(OrigRef);
2995   Data.FirstprivateCopies.emplace_back(PrivateRef);
2996   Data.FirstprivateInits.emplace_back(InitRef);
2997   return OrigVD;
2998 }
2999 
3000 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
3001     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
3002     OMPTargetDataInfo &InputInfo) {
3003   // Emit outlined function for task construct.
3004   auto CS = S.getCapturedStmt(OMPD_task);
3005   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3006   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3007   auto *I = CS->getCapturedDecl()->param_begin();
3008   auto *PartId = std::next(I);
3009   auto *TaskT = std::next(I, 4);
3010   OMPTaskDataTy Data;
3011   // The task is not final.
3012   Data.Final.setInt(/*IntVal=*/false);
3013   // Get list of firstprivate variables.
3014   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3015     auto IRef = C->varlist_begin();
3016     auto IElemInitRef = C->inits().begin();
3017     for (auto *IInit : C->private_copies()) {
3018       Data.FirstprivateVars.push_back(*IRef);
3019       Data.FirstprivateCopies.push_back(IInit);
3020       Data.FirstprivateInits.push_back(*IElemInitRef);
3021       ++IRef;
3022       ++IElemInitRef;
3023     }
3024   }
3025   OMPPrivateScope TargetScope(*this);
3026   VarDecl *BPVD = nullptr;
3027   VarDecl *PVD = nullptr;
3028   VarDecl *SVD = nullptr;
3029   if (InputInfo.NumberOfTargetItems > 0) {
3030     auto *CD = CapturedDecl::Create(
3031         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
3032     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
3033     QualType BaseAndPointersType = getContext().getConstantArrayType(
3034         getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
3035         /*IndexTypeQuals=*/0);
3036     BPVD = createImplicitFirstprivateForType(
3037         getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
3038     PVD = createImplicitFirstprivateForType(
3039         getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
3040     QualType SizesType = getContext().getConstantArrayType(
3041         getContext().getSizeType(), ArrSize, ArrayType::Normal,
3042         /*IndexTypeQuals=*/0);
3043     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
3044                                             S.getLocStart());
3045     TargetScope.addPrivate(
3046         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
3047     TargetScope.addPrivate(PVD,
3048                            [&InputInfo]() { return InputInfo.PointersArray; });
3049     TargetScope.addPrivate(SVD,
3050                            [&InputInfo]() { return InputInfo.SizesArray; });
3051   }
3052   (void)TargetScope.Privatize();
3053   // Build list of dependences.
3054   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
3055     for (auto *IRef : C->varlists())
3056       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
3057   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
3058                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
3059     // Set proper addresses for generated private copies.
3060     OMPPrivateScope Scope(CGF);
3061     if (!Data.FirstprivateVars.empty()) {
3062       enum { PrivatesParam = 2, CopyFnParam = 3 };
3063       auto *CopyFn = CGF.Builder.CreateLoad(
3064           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
3065       auto *PrivatesPtr = CGF.Builder.CreateLoad(
3066           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
3067       // Map privates.
3068       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3069       llvm::SmallVector<llvm::Value *, 16> CallArgs;
3070       CallArgs.push_back(PrivatesPtr);
3071       for (auto *E : Data.FirstprivateVars) {
3072         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3073         Address PrivatePtr =
3074             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3075                               ".firstpriv.ptr.addr");
3076         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
3077         CallArgs.push_back(PrivatePtr.getPointer());
3078       }
3079       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3080                                                           CopyFn, CallArgs);
3081       for (auto &&Pair : PrivatePtrs) {
3082         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3083                             CGF.getContext().getDeclAlign(Pair.first));
3084         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3085       }
3086     }
3087     // Privatize all private variables except for in_reduction items.
3088     (void)Scope.Privatize();
3089     if (InputInfo.NumberOfTargetItems > 0) {
3090       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
3091           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
3092       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
3093           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
3094       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
3095           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
3096     }
3097 
3098     Action.Enter(CGF);
3099     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
3100     BodyGen(CGF);
3101   };
3102   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3103       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
3104       Data.NumberOfParts);
3105   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
3106   IntegerLiteral IfCond(getContext(), TrueOrFalse,
3107                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3108                         SourceLocation());
3109 
3110   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn,
3111                                       SharedsTy, CapturedStruct, &IfCond, Data);
3112 }
3113 
3114 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
3115   // Emit outlined function for task construct.
3116   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3117   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3118   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3119   const Expr *IfCond = nullptr;
3120   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3121     if (C->getNameModifier() == OMPD_unknown ||
3122         C->getNameModifier() == OMPD_task) {
3123       IfCond = C->getCondition();
3124       break;
3125     }
3126   }
3127 
3128   OMPTaskDataTy Data;
3129   // Check if we should emit tied or untied task.
3130   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
3131   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3132     CGF.EmitStmt(CS->getCapturedStmt());
3133   };
3134   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3135                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3136                             const OMPTaskDataTy &Data) {
3137     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
3138                                             SharedsTy, CapturedStruct, IfCond,
3139                                             Data);
3140   };
3141   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
3142 }
3143 
3144 void CodeGenFunction::EmitOMPTaskyieldDirective(
3145     const OMPTaskyieldDirective &S) {
3146   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
3147 }
3148 
3149 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3150   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
3151 }
3152 
3153 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3154   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
3155 }
3156 
3157 void CodeGenFunction::EmitOMPTaskgroupDirective(
3158     const OMPTaskgroupDirective &S) {
3159   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3160     Action.Enter(CGF);
3161     if (const Expr *E = S.getReductionRef()) {
3162       SmallVector<const Expr *, 4> LHSs;
3163       SmallVector<const Expr *, 4> RHSs;
3164       OMPTaskDataTy Data;
3165       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
3166         auto IPriv = C->privates().begin();
3167         auto IRed = C->reduction_ops().begin();
3168         auto ILHS = C->lhs_exprs().begin();
3169         auto IRHS = C->rhs_exprs().begin();
3170         for (const auto *Ref : C->varlists()) {
3171           Data.ReductionVars.emplace_back(Ref);
3172           Data.ReductionCopies.emplace_back(*IPriv);
3173           Data.ReductionOps.emplace_back(*IRed);
3174           LHSs.emplace_back(*ILHS);
3175           RHSs.emplace_back(*IRHS);
3176           std::advance(IPriv, 1);
3177           std::advance(IRed, 1);
3178           std::advance(ILHS, 1);
3179           std::advance(IRHS, 1);
3180         }
3181       }
3182       llvm::Value *ReductionDesc =
3183           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
3184                                                            LHSs, RHSs, Data);
3185       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3186       CGF.EmitVarDecl(*VD);
3187       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3188                             /*Volatile=*/false, E->getType());
3189     }
3190     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3191   };
3192   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3193   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3194 }
3195 
3196 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3197   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3198     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3199       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3200                                 FlushClause->varlist_end());
3201     }
3202     return llvm::None;
3203   }(), S.getLocStart());
3204 }
3205 
3206 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3207                                             const CodeGenLoopTy &CodeGenLoop,
3208                                             Expr *IncExpr) {
3209   // Emit the loop iteration variable.
3210   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3211   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3212   EmitVarDecl(*IVDecl);
3213 
3214   // Emit the iterations count variable.
3215   // If it is not a variable, Sema decided to calculate iterations count on each
3216   // iteration (e.g., it is foldable into a constant).
3217   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3218     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3219     // Emit calculation of the iterations count.
3220     EmitIgnoredExpr(S.getCalcLastIteration());
3221   }
3222 
3223   auto &RT = CGM.getOpenMPRuntime();
3224 
3225   bool HasLastprivateClause = false;
3226   // Check pre-condition.
3227   {
3228     OMPLoopScope PreInitScope(*this, S);
3229     // Skip the entire loop if we don't meet the precondition.
3230     // If the condition constant folds and can be elided, avoid emitting the
3231     // whole loop.
3232     bool CondConstant;
3233     llvm::BasicBlock *ContBlock = nullptr;
3234     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3235       if (!CondConstant)
3236         return;
3237     } else {
3238       auto *ThenBlock = createBasicBlock("omp.precond.then");
3239       ContBlock = createBasicBlock("omp.precond.end");
3240       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3241                   getProfileCount(&S));
3242       EmitBlock(ThenBlock);
3243       incrementProfileCounter(&S);
3244     }
3245 
3246     emitAlignedClause(*this, S);
3247     // Emit 'then' code.
3248     {
3249       // Emit helper vars inits.
3250 
3251       LValue LB = EmitOMPHelperVar(
3252           *this, cast<DeclRefExpr>(
3253                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3254                           ? S.getCombinedLowerBoundVariable()
3255                           : S.getLowerBoundVariable())));
3256       LValue UB = EmitOMPHelperVar(
3257           *this, cast<DeclRefExpr>(
3258                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3259                           ? S.getCombinedUpperBoundVariable()
3260                           : S.getUpperBoundVariable())));
3261       LValue ST =
3262           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3263       LValue IL =
3264           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3265 
3266       OMPPrivateScope LoopScope(*this);
3267       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3268         // Emit implicit barrier to synchronize threads and avoid data races
3269         // on initialization of firstprivate variables and post-update of
3270         // lastprivate variables.
3271         CGM.getOpenMPRuntime().emitBarrierCall(
3272             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3273             /*ForceSimpleCall=*/true);
3274       }
3275       EmitOMPPrivateClause(S, LoopScope);
3276       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3277           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3278           !isOpenMPTeamsDirective(S.getDirectiveKind()))
3279         EmitOMPReductionClauseInit(S, LoopScope);
3280       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3281       EmitOMPPrivateLoopCounters(S, LoopScope);
3282       (void)LoopScope.Privatize();
3283 
3284       // Detect the distribute schedule kind and chunk.
3285       llvm::Value *Chunk = nullptr;
3286       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3287       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3288         ScheduleKind = C->getDistScheduleKind();
3289         if (const auto *Ch = C->getChunkSize()) {
3290           Chunk = EmitScalarExpr(Ch);
3291           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3292                                        S.getIterationVariable()->getType(),
3293                                        S.getLocStart());
3294         }
3295       }
3296       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3297       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3298 
3299       // OpenMP [2.10.8, distribute Construct, Description]
3300       // If dist_schedule is specified, kind must be static. If specified,
3301       // iterations are divided into chunks of size chunk_size, chunks are
3302       // assigned to the teams of the league in a round-robin fashion in the
3303       // order of the team number. When no chunk_size is specified, the
3304       // iteration space is divided into chunks that are approximately equal
3305       // in size, and at most one chunk is distributed to each team of the
3306       // league. The size of the chunks is unspecified in this case.
3307       if (RT.isStaticNonchunked(ScheduleKind,
3308                                 /* Chunked */ Chunk != nullptr)) {
3309         if (isOpenMPSimdDirective(S.getDirectiveKind()))
3310           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
3311         CGOpenMPRuntime::StaticRTInput StaticInit(
3312             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3313             LB.getAddress(), UB.getAddress(), ST.getAddress());
3314         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3315                                     StaticInit);
3316         auto LoopExit =
3317             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3318         // UB = min(UB, GlobalUB);
3319         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3320                             ? S.getCombinedEnsureUpperBound()
3321                             : S.getEnsureUpperBound());
3322         // IV = LB;
3323         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3324                             ? S.getCombinedInit()
3325                             : S.getInit());
3326 
3327         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3328                          ? S.getCombinedCond()
3329                          : S.getCond();
3330 
3331         // for distribute alone,  codegen
3332         // while (idx <= UB) { BODY; ++idx; }
3333         // when combined with 'for' (e.g. as in 'distribute parallel for')
3334         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3335         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3336                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3337                            CodeGenLoop(CGF, S, LoopExit);
3338                          },
3339                          [](CodeGenFunction &) {});
3340         EmitBlock(LoopExit.getBlock());
3341         // Tell the runtime we are done.
3342         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3343       } else {
3344         // Emit the outer loop, which requests its work chunk [LB..UB] from
3345         // runtime and runs the inner loop to process it.
3346         const OMPLoopArguments LoopArguments = {
3347             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3348             Chunk};
3349         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3350                                    CodeGenLoop);
3351       }
3352       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3353         EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3354           return CGF.Builder.CreateIsNotNull(
3355               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3356         });
3357       }
3358       OpenMPDirectiveKind ReductionKind = OMPD_unknown;
3359       if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
3360           isOpenMPSimdDirective(S.getDirectiveKind())) {
3361         ReductionKind = OMPD_parallel_for_simd;
3362       } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3363         ReductionKind = OMPD_parallel_for;
3364       } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3365         ReductionKind = OMPD_simd;
3366       } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
3367                  S.hasClausesOfKind<OMPReductionClause>()) {
3368         llvm_unreachable(
3369             "No reduction clauses is allowed in distribute directive.");
3370       }
3371       EmitOMPReductionClauseFinal(S, ReductionKind);
3372       // Emit post-update of the reduction variables if IsLastIter != 0.
3373       emitPostUpdateForReductionClause(
3374           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3375             return CGF.Builder.CreateIsNotNull(
3376                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3377           });
3378       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3379       if (HasLastprivateClause) {
3380         EmitOMPLastprivateClauseFinal(
3381             S, /*NoFinals=*/false,
3382             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
3383       }
3384     }
3385 
3386     // We're now done with the loop, so jump to the continuation block.
3387     if (ContBlock) {
3388       EmitBranch(ContBlock);
3389       EmitBlock(ContBlock, true);
3390     }
3391   }
3392 }
3393 
3394 void CodeGenFunction::EmitOMPDistributeDirective(
3395     const OMPDistributeDirective &S) {
3396   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3397 
3398     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3399   };
3400   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3401   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3402 }
3403 
3404 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3405                                                    const CapturedStmt *S) {
3406   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3407   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3408   CGF.CapturedStmtInfo = &CapStmtInfo;
3409   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3410   Fn->addFnAttr(llvm::Attribute::NoInline);
3411   return Fn;
3412 }
3413 
3414 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3415   if (S.hasClausesOfKind<OMPDependClause>()) {
3416     assert(!S.getAssociatedStmt() &&
3417            "No associated statement must be in ordered depend construct.");
3418     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3419       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3420     return;
3421   }
3422   auto *C = S.getSingleClause<OMPSIMDClause>();
3423   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3424                                  PrePostActionTy &Action) {
3425     const CapturedStmt *CS = S.getInnermostCapturedStmt();
3426     if (C) {
3427       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3428       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3429       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3430       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3431                                                       OutlinedFn, CapturedVars);
3432     } else {
3433       Action.Enter(CGF);
3434       CGF.EmitStmt(CS->getCapturedStmt());
3435     }
3436   };
3437   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3438   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3439 }
3440 
3441 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3442                                          QualType SrcType, QualType DestType,
3443                                          SourceLocation Loc) {
3444   assert(CGF.hasScalarEvaluationKind(DestType) &&
3445          "DestType must have scalar evaluation kind.");
3446   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3447   return Val.isScalar()
3448              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3449                                         Loc)
3450              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3451                                                  DestType, Loc);
3452 }
3453 
3454 static CodeGenFunction::ComplexPairTy
3455 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3456                       QualType DestType, SourceLocation Loc) {
3457   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3458          "DestType must have complex evaluation kind.");
3459   CodeGenFunction::ComplexPairTy ComplexVal;
3460   if (Val.isScalar()) {
3461     // Convert the input element to the element type of the complex.
3462     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3463     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3464                                               DestElementType, Loc);
3465     ComplexVal = CodeGenFunction::ComplexPairTy(
3466         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3467   } else {
3468     assert(Val.isComplex() && "Must be a scalar or complex.");
3469     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3470     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3471     ComplexVal.first = CGF.EmitScalarConversion(
3472         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3473     ComplexVal.second = CGF.EmitScalarConversion(
3474         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3475   }
3476   return ComplexVal;
3477 }
3478 
3479 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3480                                   LValue LVal, RValue RVal) {
3481   if (LVal.isGlobalReg()) {
3482     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3483   } else {
3484     CGF.EmitAtomicStore(RVal, LVal,
3485                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3486                                  : llvm::AtomicOrdering::Monotonic,
3487                         LVal.isVolatile(), /*IsInit=*/false);
3488   }
3489 }
3490 
3491 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3492                                          QualType RValTy, SourceLocation Loc) {
3493   switch (getEvaluationKind(LVal.getType())) {
3494   case TEK_Scalar:
3495     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3496                                *this, RVal, RValTy, LVal.getType(), Loc)),
3497                            LVal);
3498     break;
3499   case TEK_Complex:
3500     EmitStoreOfComplex(
3501         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3502         /*isInit=*/false);
3503     break;
3504   case TEK_Aggregate:
3505     llvm_unreachable("Must be a scalar or complex.");
3506   }
3507 }
3508 
3509 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3510                                   const Expr *X, const Expr *V,
3511                                   SourceLocation Loc) {
3512   // v = x;
3513   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3514   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3515   LValue XLValue = CGF.EmitLValue(X);
3516   LValue VLValue = CGF.EmitLValue(V);
3517   RValue Res = XLValue.isGlobalReg()
3518                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3519                    : CGF.EmitAtomicLoad(
3520                          XLValue, Loc,
3521                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3522                                   : llvm::AtomicOrdering::Monotonic,
3523                          XLValue.isVolatile());
3524   // OpenMP, 2.12.6, atomic Construct
3525   // Any atomic construct with a seq_cst clause forces the atomically
3526   // performed operation to include an implicit flush operation without a
3527   // list.
3528   if (IsSeqCst)
3529     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3530   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3531 }
3532 
3533 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3534                                    const Expr *X, const Expr *E,
3535                                    SourceLocation Loc) {
3536   // x = expr;
3537   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3538   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3539   // OpenMP, 2.12.6, atomic Construct
3540   // Any atomic construct with a seq_cst clause forces the atomically
3541   // performed operation to include an implicit flush operation without a
3542   // list.
3543   if (IsSeqCst)
3544     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3545 }
3546 
3547 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3548                                                 RValue Update,
3549                                                 BinaryOperatorKind BO,
3550                                                 llvm::AtomicOrdering AO,
3551                                                 bool IsXLHSInRHSPart) {
3552   auto &Context = CGF.CGM.getContext();
3553   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3554   // expression is simple and atomic is allowed for the given type for the
3555   // target platform.
3556   if (BO == BO_Comma || !Update.isScalar() ||
3557       !Update.getScalarVal()->getType()->isIntegerTy() ||
3558       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3559                         (Update.getScalarVal()->getType() !=
3560                          X.getAddress().getElementType())) ||
3561       !X.getAddress().getElementType()->isIntegerTy() ||
3562       !Context.getTargetInfo().hasBuiltinAtomic(
3563           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3564     return std::make_pair(false, RValue::get(nullptr));
3565 
3566   llvm::AtomicRMWInst::BinOp RMWOp;
3567   switch (BO) {
3568   case BO_Add:
3569     RMWOp = llvm::AtomicRMWInst::Add;
3570     break;
3571   case BO_Sub:
3572     if (!IsXLHSInRHSPart)
3573       return std::make_pair(false, RValue::get(nullptr));
3574     RMWOp = llvm::AtomicRMWInst::Sub;
3575     break;
3576   case BO_And:
3577     RMWOp = llvm::AtomicRMWInst::And;
3578     break;
3579   case BO_Or:
3580     RMWOp = llvm::AtomicRMWInst::Or;
3581     break;
3582   case BO_Xor:
3583     RMWOp = llvm::AtomicRMWInst::Xor;
3584     break;
3585   case BO_LT:
3586     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3587                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3588                                    : llvm::AtomicRMWInst::Max)
3589                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3590                                    : llvm::AtomicRMWInst::UMax);
3591     break;
3592   case BO_GT:
3593     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3594                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3595                                    : llvm::AtomicRMWInst::Min)
3596                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3597                                    : llvm::AtomicRMWInst::UMin);
3598     break;
3599   case BO_Assign:
3600     RMWOp = llvm::AtomicRMWInst::Xchg;
3601     break;
3602   case BO_Mul:
3603   case BO_Div:
3604   case BO_Rem:
3605   case BO_Shl:
3606   case BO_Shr:
3607   case BO_LAnd:
3608   case BO_LOr:
3609     return std::make_pair(false, RValue::get(nullptr));
3610   case BO_PtrMemD:
3611   case BO_PtrMemI:
3612   case BO_LE:
3613   case BO_GE:
3614   case BO_EQ:
3615   case BO_NE:
3616   case BO_Cmp:
3617   case BO_AddAssign:
3618   case BO_SubAssign:
3619   case BO_AndAssign:
3620   case BO_OrAssign:
3621   case BO_XorAssign:
3622   case BO_MulAssign:
3623   case BO_DivAssign:
3624   case BO_RemAssign:
3625   case BO_ShlAssign:
3626   case BO_ShrAssign:
3627   case BO_Comma:
3628     llvm_unreachable("Unsupported atomic update operation");
3629   }
3630   auto *UpdateVal = Update.getScalarVal();
3631   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3632     UpdateVal = CGF.Builder.CreateIntCast(
3633         IC, X.getAddress().getElementType(),
3634         X.getType()->hasSignedIntegerRepresentation());
3635   }
3636   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3637   return std::make_pair(true, RValue::get(Res));
3638 }
3639 
3640 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3641     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3642     llvm::AtomicOrdering AO, SourceLocation Loc,
3643     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3644   // Update expressions are allowed to have the following forms:
3645   // x binop= expr; -> xrval + expr;
3646   // x++, ++x -> xrval + 1;
3647   // x--, --x -> xrval - 1;
3648   // x = x binop expr; -> xrval binop expr
3649   // x = expr Op x; - > expr binop xrval;
3650   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3651   if (!Res.first) {
3652     if (X.isGlobalReg()) {
3653       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3654       // 'xrval'.
3655       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3656     } else {
3657       // Perform compare-and-swap procedure.
3658       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3659     }
3660   }
3661   return Res;
3662 }
3663 
3664 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3665                                     const Expr *X, const Expr *E,
3666                                     const Expr *UE, bool IsXLHSInRHSPart,
3667                                     SourceLocation Loc) {
3668   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3669          "Update expr in 'atomic update' must be a binary operator.");
3670   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3671   // Update expressions are allowed to have the following forms:
3672   // x binop= expr; -> xrval + expr;
3673   // x++, ++x -> xrval + 1;
3674   // x--, --x -> xrval - 1;
3675   // x = x binop expr; -> xrval binop expr
3676   // x = expr Op x; - > expr binop xrval;
3677   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3678   LValue XLValue = CGF.EmitLValue(X);
3679   RValue ExprRValue = CGF.EmitAnyExpr(E);
3680   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3681                      : llvm::AtomicOrdering::Monotonic;
3682   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3683   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3684   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3685   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3686   auto Gen =
3687       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3688         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3689         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3690         return CGF.EmitAnyExpr(UE);
3691       };
3692   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3693       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3694   // OpenMP, 2.12.6, atomic Construct
3695   // Any atomic construct with a seq_cst clause forces the atomically
3696   // performed operation to include an implicit flush operation without a
3697   // list.
3698   if (IsSeqCst)
3699     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3700 }
3701 
3702 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3703                             QualType SourceType, QualType ResType,
3704                             SourceLocation Loc) {
3705   switch (CGF.getEvaluationKind(ResType)) {
3706   case TEK_Scalar:
3707     return RValue::get(
3708         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3709   case TEK_Complex: {
3710     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3711     return RValue::getComplex(Res.first, Res.second);
3712   }
3713   case TEK_Aggregate:
3714     break;
3715   }
3716   llvm_unreachable("Must be a scalar or complex.");
3717 }
3718 
3719 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3720                                      bool IsPostfixUpdate, const Expr *V,
3721                                      const Expr *X, const Expr *E,
3722                                      const Expr *UE, bool IsXLHSInRHSPart,
3723                                      SourceLocation Loc) {
3724   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3725   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3726   RValue NewVVal;
3727   LValue VLValue = CGF.EmitLValue(V);
3728   LValue XLValue = CGF.EmitLValue(X);
3729   RValue ExprRValue = CGF.EmitAnyExpr(E);
3730   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3731                      : llvm::AtomicOrdering::Monotonic;
3732   QualType NewVValType;
3733   if (UE) {
3734     // 'x' is updated with some additional value.
3735     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3736            "Update expr in 'atomic capture' must be a binary operator.");
3737     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3738     // Update expressions are allowed to have the following forms:
3739     // x binop= expr; -> xrval + expr;
3740     // x++, ++x -> xrval + 1;
3741     // x--, --x -> xrval - 1;
3742     // x = x binop expr; -> xrval binop expr
3743     // x = expr Op x; - > expr binop xrval;
3744     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3745     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3746     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3747     NewVValType = XRValExpr->getType();
3748     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3749     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3750                   IsPostfixUpdate](RValue XRValue) -> RValue {
3751       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3752       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3753       RValue Res = CGF.EmitAnyExpr(UE);
3754       NewVVal = IsPostfixUpdate ? XRValue : Res;
3755       return Res;
3756     };
3757     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3758         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3759     if (Res.first) {
3760       // 'atomicrmw' instruction was generated.
3761       if (IsPostfixUpdate) {
3762         // Use old value from 'atomicrmw'.
3763         NewVVal = Res.second;
3764       } else {
3765         // 'atomicrmw' does not provide new value, so evaluate it using old
3766         // value of 'x'.
3767         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3768         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3769         NewVVal = CGF.EmitAnyExpr(UE);
3770       }
3771     }
3772   } else {
3773     // 'x' is simply rewritten with some 'expr'.
3774     NewVValType = X->getType().getNonReferenceType();
3775     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3776                                X->getType().getNonReferenceType(), Loc);
3777     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3778       NewVVal = XRValue;
3779       return ExprRValue;
3780     };
3781     // Try to perform atomicrmw xchg, otherwise simple exchange.
3782     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3783         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3784         Loc, Gen);
3785     if (Res.first) {
3786       // 'atomicrmw' instruction was generated.
3787       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3788     }
3789   }
3790   // Emit post-update store to 'v' of old/new 'x' value.
3791   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3792   // OpenMP, 2.12.6, atomic Construct
3793   // Any atomic construct with a seq_cst clause forces the atomically
3794   // performed operation to include an implicit flush operation without a
3795   // list.
3796   if (IsSeqCst)
3797     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3798 }
3799 
3800 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3801                               bool IsSeqCst, bool IsPostfixUpdate,
3802                               const Expr *X, const Expr *V, const Expr *E,
3803                               const Expr *UE, bool IsXLHSInRHSPart,
3804                               SourceLocation Loc) {
3805   switch (Kind) {
3806   case OMPC_read:
3807     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3808     break;
3809   case OMPC_write:
3810     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3811     break;
3812   case OMPC_unknown:
3813   case OMPC_update:
3814     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3815     break;
3816   case OMPC_capture:
3817     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3818                              IsXLHSInRHSPart, Loc);
3819     break;
3820   case OMPC_if:
3821   case OMPC_final:
3822   case OMPC_num_threads:
3823   case OMPC_private:
3824   case OMPC_firstprivate:
3825   case OMPC_lastprivate:
3826   case OMPC_reduction:
3827   case OMPC_task_reduction:
3828   case OMPC_in_reduction:
3829   case OMPC_safelen:
3830   case OMPC_simdlen:
3831   case OMPC_collapse:
3832   case OMPC_default:
3833   case OMPC_seq_cst:
3834   case OMPC_shared:
3835   case OMPC_linear:
3836   case OMPC_aligned:
3837   case OMPC_copyin:
3838   case OMPC_copyprivate:
3839   case OMPC_flush:
3840   case OMPC_proc_bind:
3841   case OMPC_schedule:
3842   case OMPC_ordered:
3843   case OMPC_nowait:
3844   case OMPC_untied:
3845   case OMPC_threadprivate:
3846   case OMPC_depend:
3847   case OMPC_mergeable:
3848   case OMPC_device:
3849   case OMPC_threads:
3850   case OMPC_simd:
3851   case OMPC_map:
3852   case OMPC_num_teams:
3853   case OMPC_thread_limit:
3854   case OMPC_priority:
3855   case OMPC_grainsize:
3856   case OMPC_nogroup:
3857   case OMPC_num_tasks:
3858   case OMPC_hint:
3859   case OMPC_dist_schedule:
3860   case OMPC_defaultmap:
3861   case OMPC_uniform:
3862   case OMPC_to:
3863   case OMPC_from:
3864   case OMPC_use_device_ptr:
3865   case OMPC_is_device_ptr:
3866     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3867   }
3868 }
3869 
3870 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3871   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3872   OpenMPClauseKind Kind = OMPC_unknown;
3873   for (auto *C : S.clauses()) {
3874     // Find first clause (skip seq_cst clause, if it is first).
3875     if (C->getClauseKind() != OMPC_seq_cst) {
3876       Kind = C->getClauseKind();
3877       break;
3878     }
3879   }
3880 
3881   const auto *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
3882   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3883     enterFullExpression(EWC);
3884   }
3885   // Processing for statements under 'atomic capture'.
3886   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3887     for (const auto *C : Compound->body()) {
3888       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3889         enterFullExpression(EWC);
3890       }
3891     }
3892   }
3893 
3894   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3895                                             PrePostActionTy &) {
3896     CGF.EmitStopPoint(CS);
3897     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3898                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3899                       S.isXLHSInRHSPart(), S.getLocStart());
3900   };
3901   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3902   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3903 }
3904 
3905 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3906                                          const OMPExecutableDirective &S,
3907                                          const RegionCodeGenTy &CodeGen) {
3908   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3909   CodeGenModule &CGM = CGF.CGM;
3910 
3911   llvm::Function *Fn = nullptr;
3912   llvm::Constant *FnID = nullptr;
3913 
3914   const Expr *IfCond = nullptr;
3915   // Check for the at most one if clause associated with the target region.
3916   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3917     if (C->getNameModifier() == OMPD_unknown ||
3918         C->getNameModifier() == OMPD_target) {
3919       IfCond = C->getCondition();
3920       break;
3921     }
3922   }
3923 
3924   // Check if we have any device clause associated with the directive.
3925   const Expr *Device = nullptr;
3926   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3927     Device = C->getDevice();
3928   }
3929 
3930   // Check if we have an if clause whose conditional always evaluates to false
3931   // or if we do not have any targets specified. If so the target region is not
3932   // an offload entry point.
3933   bool IsOffloadEntry = true;
3934   if (IfCond) {
3935     bool Val;
3936     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3937       IsOffloadEntry = false;
3938   }
3939   if (CGM.getLangOpts().OMPTargetTriples.empty())
3940     IsOffloadEntry = false;
3941 
3942   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3943   StringRef ParentName;
3944   // In case we have Ctors/Dtors we use the complete type variant to produce
3945   // the mangling of the device outlined kernel.
3946   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3947     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3948   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3949     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3950   else
3951     ParentName =
3952         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3953 
3954   // Emit target region as a standalone region.
3955   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3956                                                     IsOffloadEntry, CodeGen);
3957   OMPLexicalScope Scope(CGF, S, OMPD_task);
3958   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
3959 }
3960 
3961 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3962                              PrePostActionTy &Action) {
3963   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3964   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3965   CGF.EmitOMPPrivateClause(S, PrivateScope);
3966   (void)PrivateScope.Privatize();
3967 
3968   Action.Enter(CGF);
3969   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
3970 }
3971 
3972 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3973                                                   StringRef ParentName,
3974                                                   const OMPTargetDirective &S) {
3975   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3976     emitTargetRegion(CGF, S, Action);
3977   };
3978   llvm::Function *Fn;
3979   llvm::Constant *Addr;
3980   // Emit target region as a standalone region.
3981   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3982       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3983   assert(Fn && Addr && "Target device function emission failed.");
3984 }
3985 
3986 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3987   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3988     emitTargetRegion(CGF, S, Action);
3989   };
3990   emitCommonOMPTargetDirective(*this, S, CodeGen);
3991 }
3992 
3993 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3994                                         const OMPExecutableDirective &S,
3995                                         OpenMPDirectiveKind InnermostKind,
3996                                         const RegionCodeGenTy &CodeGen) {
3997   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3998   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3999       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
4000 
4001   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
4002   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
4003   if (NT || TL) {
4004     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
4005     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
4006 
4007     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
4008                                                   S.getLocStart());
4009   }
4010 
4011   OMPTeamsScope Scope(CGF, S);
4012   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4013   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4014   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
4015                                            CapturedVars);
4016 }
4017 
4018 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
4019   // Emit teams region as a standalone region.
4020   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4021     OMPPrivateScope PrivateScope(CGF);
4022     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4023     CGF.EmitOMPPrivateClause(S, PrivateScope);
4024     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4025     (void)PrivateScope.Privatize();
4026     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
4027     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4028   };
4029   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4030   emitPostUpdateForReductionClause(
4031       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4032 }
4033 
4034 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4035                                   const OMPTargetTeamsDirective &S) {
4036   auto *CS = S.getCapturedStmt(OMPD_teams);
4037   Action.Enter(CGF);
4038   // Emit teams region as a standalone region.
4039   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4040     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4041     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4042     CGF.EmitOMPPrivateClause(S, PrivateScope);
4043     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4044     (void)PrivateScope.Privatize();
4045     Action.Enter(CGF);
4046     CGF.EmitStmt(CS->getCapturedStmt());
4047     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4048   };
4049   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
4050   emitPostUpdateForReductionClause(
4051       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4052 }
4053 
4054 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
4055     CodeGenModule &CGM, StringRef ParentName,
4056     const OMPTargetTeamsDirective &S) {
4057   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4058     emitTargetTeamsRegion(CGF, Action, S);
4059   };
4060   llvm::Function *Fn;
4061   llvm::Constant *Addr;
4062   // Emit target region as a standalone region.
4063   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4064       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4065   assert(Fn && Addr && "Target device function emission failed.");
4066 }
4067 
4068 void CodeGenFunction::EmitOMPTargetTeamsDirective(
4069     const OMPTargetTeamsDirective &S) {
4070   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4071     emitTargetTeamsRegion(CGF, Action, S);
4072   };
4073   emitCommonOMPTargetDirective(*this, S, CodeGen);
4074 }
4075 
4076 static void
4077 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4078                                 const OMPTargetTeamsDistributeDirective &S) {
4079   Action.Enter(CGF);
4080   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4081     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4082   };
4083 
4084   // Emit teams region as a standalone region.
4085   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4086                                             PrePostActionTy &) {
4087     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4088     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4089     (void)PrivateScope.Privatize();
4090     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4091                                                     CodeGenDistribute);
4092     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4093   };
4094   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
4095   emitPostUpdateForReductionClause(CGF, S,
4096                                    [](CodeGenFunction &) { return nullptr; });
4097 }
4098 
4099 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
4100     CodeGenModule &CGM, StringRef ParentName,
4101     const OMPTargetTeamsDistributeDirective &S) {
4102   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4103     emitTargetTeamsDistributeRegion(CGF, Action, S);
4104   };
4105   llvm::Function *Fn;
4106   llvm::Constant *Addr;
4107   // Emit target region as a standalone region.
4108   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4109       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4110   assert(Fn && Addr && "Target device function emission failed.");
4111 }
4112 
4113 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
4114     const OMPTargetTeamsDistributeDirective &S) {
4115   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4116     emitTargetTeamsDistributeRegion(CGF, Action, S);
4117   };
4118   emitCommonOMPTargetDirective(*this, S, CodeGen);
4119 }
4120 
4121 static void emitTargetTeamsDistributeSimdRegion(
4122     CodeGenFunction &CGF, PrePostActionTy &Action,
4123     const OMPTargetTeamsDistributeSimdDirective &S) {
4124   Action.Enter(CGF);
4125   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4126     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4127   };
4128 
4129   // Emit teams region as a standalone region.
4130   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4131                                             PrePostActionTy &) {
4132     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4133     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4134     (void)PrivateScope.Privatize();
4135     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4136                                                     CodeGenDistribute);
4137     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4138   };
4139   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
4140   emitPostUpdateForReductionClause(CGF, S,
4141                                    [](CodeGenFunction &) { return nullptr; });
4142 }
4143 
4144 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
4145     CodeGenModule &CGM, StringRef ParentName,
4146     const OMPTargetTeamsDistributeSimdDirective &S) {
4147   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4148     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4149   };
4150   llvm::Function *Fn;
4151   llvm::Constant *Addr;
4152   // Emit target region as a standalone region.
4153   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4154       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4155   assert(Fn && Addr && "Target device function emission failed.");
4156 }
4157 
4158 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
4159     const OMPTargetTeamsDistributeSimdDirective &S) {
4160   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4161     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4162   };
4163   emitCommonOMPTargetDirective(*this, S, CodeGen);
4164 }
4165 
4166 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
4167     const OMPTeamsDistributeDirective &S) {
4168 
4169   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4170     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4171   };
4172 
4173   // Emit teams region as a standalone region.
4174   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4175                                             PrePostActionTy &) {
4176     OMPPrivateScope PrivateScope(CGF);
4177     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4178     (void)PrivateScope.Privatize();
4179     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4180                                                     CodeGenDistribute);
4181     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4182   };
4183   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4184   emitPostUpdateForReductionClause(*this, S,
4185                                    [](CodeGenFunction &) { return nullptr; });
4186 }
4187 
4188 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
4189     const OMPTeamsDistributeSimdDirective &S) {
4190   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4191     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4192   };
4193 
4194   // Emit teams region as a standalone region.
4195   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4196                                             PrePostActionTy &) {
4197     OMPPrivateScope PrivateScope(CGF);
4198     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4199     (void)PrivateScope.Privatize();
4200     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
4201                                                     CodeGenDistribute);
4202     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4203   };
4204   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
4205   emitPostUpdateForReductionClause(*this, S,
4206                                    [](CodeGenFunction &) { return nullptr; });
4207 }
4208 
4209 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
4210     const OMPTeamsDistributeParallelForDirective &S) {
4211   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4212     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4213                               S.getDistInc());
4214   };
4215 
4216   // Emit teams region as a standalone region.
4217   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4218                                             PrePostActionTy &) {
4219     OMPPrivateScope PrivateScope(CGF);
4220     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4221     (void)PrivateScope.Privatize();
4222     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4223                                                     CodeGenDistribute);
4224     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4225   };
4226   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4227   emitPostUpdateForReductionClause(*this, S,
4228                                    [](CodeGenFunction &) { return nullptr; });
4229 }
4230 
4231 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
4232     const OMPTeamsDistributeParallelForSimdDirective &S) {
4233   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4234     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4235                               S.getDistInc());
4236   };
4237 
4238   // Emit teams region as a standalone region.
4239   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4240                                             PrePostActionTy &) {
4241     OMPPrivateScope PrivateScope(CGF);
4242     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4243     (void)PrivateScope.Privatize();
4244     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4245         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4246     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4247   };
4248   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4249   emitPostUpdateForReductionClause(*this, S,
4250                                    [](CodeGenFunction &) { return nullptr; });
4251 }
4252 
4253 static void emitTargetTeamsDistributeParallelForRegion(
4254     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
4255     PrePostActionTy &Action) {
4256   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4257     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4258                               S.getDistInc());
4259   };
4260 
4261   // Emit teams region as a standalone region.
4262   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4263                                                  PrePostActionTy &) {
4264     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4265     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4266     (void)PrivateScope.Privatize();
4267     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4268         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4269     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4270   };
4271 
4272   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
4273                               CodeGenTeams);
4274   emitPostUpdateForReductionClause(CGF, S,
4275                                    [](CodeGenFunction &) { return nullptr; });
4276 }
4277 
4278 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
4279     CodeGenModule &CGM, StringRef ParentName,
4280     const OMPTargetTeamsDistributeParallelForDirective &S) {
4281   // Emit SPMD target teams distribute parallel for region as a standalone
4282   // region.
4283   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4284     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4285   };
4286   llvm::Function *Fn;
4287   llvm::Constant *Addr;
4288   // Emit target region as a standalone region.
4289   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4290       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4291   assert(Fn && Addr && "Target device function emission failed.");
4292 }
4293 
4294 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
4295     const OMPTargetTeamsDistributeParallelForDirective &S) {
4296   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4297     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4298   };
4299   emitCommonOMPTargetDirective(*this, S, CodeGen);
4300 }
4301 
4302 static void emitTargetTeamsDistributeParallelForSimdRegion(
4303     CodeGenFunction &CGF,
4304     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
4305     PrePostActionTy &Action) {
4306   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4307     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4308                               S.getDistInc());
4309   };
4310 
4311   // Emit teams region as a standalone region.
4312   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4313                                                  PrePostActionTy &) {
4314     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4315     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4316     (void)PrivateScope.Privatize();
4317     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4318         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4319     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4320   };
4321 
4322   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
4323                               CodeGenTeams);
4324   emitPostUpdateForReductionClause(CGF, S,
4325                                    [](CodeGenFunction &) { return nullptr; });
4326 }
4327 
4328 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
4329     CodeGenModule &CGM, StringRef ParentName,
4330     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4331   // Emit SPMD target teams distribute parallel for simd region as a standalone
4332   // region.
4333   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4334     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4335   };
4336   llvm::Function *Fn;
4337   llvm::Constant *Addr;
4338   // Emit target region as a standalone region.
4339   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4340       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4341   assert(Fn && Addr && "Target device function emission failed.");
4342 }
4343 
4344 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
4345     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4346   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4347     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4348   };
4349   emitCommonOMPTargetDirective(*this, S, CodeGen);
4350 }
4351 
4352 void CodeGenFunction::EmitOMPCancellationPointDirective(
4353     const OMPCancellationPointDirective &S) {
4354   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
4355                                                    S.getCancelRegion());
4356 }
4357 
4358 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
4359   const Expr *IfCond = nullptr;
4360   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4361     if (C->getNameModifier() == OMPD_unknown ||
4362         C->getNameModifier() == OMPD_cancel) {
4363       IfCond = C->getCondition();
4364       break;
4365     }
4366   }
4367   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
4368                                         S.getCancelRegion());
4369 }
4370 
4371 CodeGenFunction::JumpDest
4372 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
4373   if (Kind == OMPD_parallel || Kind == OMPD_task ||
4374       Kind == OMPD_target_parallel)
4375     return ReturnBlock;
4376   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
4377          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
4378          Kind == OMPD_distribute_parallel_for ||
4379          Kind == OMPD_target_parallel_for ||
4380          Kind == OMPD_teams_distribute_parallel_for ||
4381          Kind == OMPD_target_teams_distribute_parallel_for);
4382   return OMPCancelStack.getExitBlock();
4383 }
4384 
4385 void CodeGenFunction::EmitOMPUseDevicePtrClause(
4386     const OMPClause &NC, OMPPrivateScope &PrivateScope,
4387     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
4388   const auto &C = cast<OMPUseDevicePtrClause>(NC);
4389   auto OrigVarIt = C.varlist_begin();
4390   auto InitIt = C.inits().begin();
4391   for (auto PvtVarIt : C.private_copies()) {
4392     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
4393     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
4394     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
4395 
4396     // In order to identify the right initializer we need to match the
4397     // declaration used by the mapping logic. In some cases we may get
4398     // OMPCapturedExprDecl that refers to the original declaration.
4399     const ValueDecl *MatchingVD = OrigVD;
4400     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
4401       // OMPCapturedExprDecl are used to privative fields of the current
4402       // structure.
4403       auto *ME = cast<MemberExpr>(OED->getInit());
4404       assert(isa<CXXThisExpr>(ME->getBase()) &&
4405              "Base should be the current struct!");
4406       MatchingVD = ME->getMemberDecl();
4407     }
4408 
4409     // If we don't have information about the current list item, move on to
4410     // the next one.
4411     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
4412     if (InitAddrIt == CaptureDeviceAddrMap.end())
4413       continue;
4414 
4415     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
4416       // Initialize the temporary initialization variable with the address we
4417       // get from the runtime library. We have to cast the source address
4418       // because it is always a void *. References are materialized in the
4419       // privatization scope, so the initialization here disregards the fact
4420       // the original variable is a reference.
4421       QualType AddrQTy =
4422           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
4423       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
4424       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
4425       setAddrOfLocalVar(InitVD, InitAddr);
4426 
4427       // Emit private declaration, it will be initialized by the value we
4428       // declaration we just added to the local declarations map.
4429       EmitDecl(*PvtVD);
4430 
4431       // The initialization variables reached its purpose in the emission
4432       // ofthe previous declaration, so we don't need it anymore.
4433       LocalDeclMap.erase(InitVD);
4434 
4435       // Return the address of the private variable.
4436       return GetAddrOfLocalVar(PvtVD);
4437     });
4438     assert(IsRegistered && "firstprivate var already registered as private");
4439     // Silence the warning about unused variable.
4440     (void)IsRegistered;
4441 
4442     ++OrigVarIt;
4443     ++InitIt;
4444   }
4445 }
4446 
4447 // Generate the instructions for '#pragma omp target data' directive.
4448 void CodeGenFunction::EmitOMPTargetDataDirective(
4449     const OMPTargetDataDirective &S) {
4450   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4451 
4452   // Create a pre/post action to signal the privatization of the device pointer.
4453   // This action can be replaced by the OpenMP runtime code generation to
4454   // deactivate privatization.
4455   bool PrivatizeDevicePointers = false;
4456   class DevicePointerPrivActionTy : public PrePostActionTy {
4457     bool &PrivatizeDevicePointers;
4458 
4459   public:
4460     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4461         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4462     void Enter(CodeGenFunction &CGF) override {
4463       PrivatizeDevicePointers = true;
4464     }
4465   };
4466   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4467 
4468   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4469                        CodeGenFunction &CGF, PrePostActionTy &Action) {
4470     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4471       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4472     };
4473 
4474     // Codegen that selects wheather to generate the privatization code or not.
4475     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4476                           &InnermostCodeGen](CodeGenFunction &CGF,
4477                                              PrePostActionTy &Action) {
4478       RegionCodeGenTy RCG(InnermostCodeGen);
4479       PrivatizeDevicePointers = false;
4480 
4481       // Call the pre-action to change the status of PrivatizeDevicePointers if
4482       // needed.
4483       Action.Enter(CGF);
4484 
4485       if (PrivatizeDevicePointers) {
4486         OMPPrivateScope PrivateScope(CGF);
4487         // Emit all instances of the use_device_ptr clause.
4488         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4489           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4490                                         Info.CaptureDeviceAddrMap);
4491         (void)PrivateScope.Privatize();
4492         RCG(CGF);
4493       } else
4494         RCG(CGF);
4495     };
4496 
4497     // Forward the provided action to the privatization codegen.
4498     RegionCodeGenTy PrivRCG(PrivCodeGen);
4499     PrivRCG.setAction(Action);
4500 
4501     // Notwithstanding the body of the region is emitted as inlined directive,
4502     // we don't use an inline scope as changes in the references inside the
4503     // region are expected to be visible outside, so we do not privative them.
4504     OMPLexicalScope Scope(CGF, S);
4505     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4506                                                     PrivRCG);
4507   };
4508 
4509   RegionCodeGenTy RCG(CodeGen);
4510 
4511   // If we don't have target devices, don't bother emitting the data mapping
4512   // code.
4513   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4514     RCG(*this);
4515     return;
4516   }
4517 
4518   // Check if we have any if clause associated with the directive.
4519   const Expr *IfCond = nullptr;
4520   if (auto *C = S.getSingleClause<OMPIfClause>())
4521     IfCond = C->getCondition();
4522 
4523   // Check if we have any device clause associated with the directive.
4524   const Expr *Device = nullptr;
4525   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4526     Device = C->getDevice();
4527 
4528   // Set the action to signal privatization of device pointers.
4529   RCG.setAction(PrivAction);
4530 
4531   // Emit region code.
4532   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4533                                              Info);
4534 }
4535 
4536 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4537     const OMPTargetEnterDataDirective &S) {
4538   // If we don't have target devices, don't bother emitting the data mapping
4539   // code.
4540   if (CGM.getLangOpts().OMPTargetTriples.empty())
4541     return;
4542 
4543   // Check if we have any if clause associated with the directive.
4544   const Expr *IfCond = nullptr;
4545   if (auto *C = S.getSingleClause<OMPIfClause>())
4546     IfCond = C->getCondition();
4547 
4548   // Check if we have any device clause associated with the directive.
4549   const Expr *Device = nullptr;
4550   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4551     Device = C->getDevice();
4552 
4553   OMPLexicalScope Scope(*this, S, OMPD_task);
4554   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4555 }
4556 
4557 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4558     const OMPTargetExitDataDirective &S) {
4559   // If we don't have target devices, don't bother emitting the data mapping
4560   // code.
4561   if (CGM.getLangOpts().OMPTargetTriples.empty())
4562     return;
4563 
4564   // Check if we have any if clause associated with the directive.
4565   const Expr *IfCond = nullptr;
4566   if (auto *C = S.getSingleClause<OMPIfClause>())
4567     IfCond = C->getCondition();
4568 
4569   // Check if we have any device clause associated with the directive.
4570   const Expr *Device = nullptr;
4571   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4572     Device = C->getDevice();
4573 
4574   OMPLexicalScope Scope(*this, S, OMPD_task);
4575   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4576 }
4577 
4578 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4579                                      const OMPTargetParallelDirective &S,
4580                                      PrePostActionTy &Action) {
4581   // Get the captured statement associated with the 'parallel' region.
4582   auto *CS = S.getCapturedStmt(OMPD_parallel);
4583   Action.Enter(CGF);
4584   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4585     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4586     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4587     CGF.EmitOMPPrivateClause(S, PrivateScope);
4588     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4589     (void)PrivateScope.Privatize();
4590     // TODO: Add support for clauses.
4591     CGF.EmitStmt(CS->getCapturedStmt());
4592     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4593   };
4594   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4595                                  emitEmptyBoundParameters);
4596   emitPostUpdateForReductionClause(
4597       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4598 }
4599 
4600 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4601     CodeGenModule &CGM, StringRef ParentName,
4602     const OMPTargetParallelDirective &S) {
4603   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4604     emitTargetParallelRegion(CGF, S, Action);
4605   };
4606   llvm::Function *Fn;
4607   llvm::Constant *Addr;
4608   // Emit target region as a standalone region.
4609   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4610       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4611   assert(Fn && Addr && "Target device function emission failed.");
4612 }
4613 
4614 void CodeGenFunction::EmitOMPTargetParallelDirective(
4615     const OMPTargetParallelDirective &S) {
4616   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4617     emitTargetParallelRegion(CGF, S, Action);
4618   };
4619   emitCommonOMPTargetDirective(*this, S, CodeGen);
4620 }
4621 
4622 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4623                                         const OMPTargetParallelForDirective &S,
4624                                         PrePostActionTy &Action) {
4625   Action.Enter(CGF);
4626   // Emit directive as a combined directive that consists of two implicit
4627   // directives: 'parallel' with 'for' directive.
4628   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4629     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4630         CGF, OMPD_target_parallel_for, S.hasCancel());
4631     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4632                                emitDispatchForLoopBounds);
4633   };
4634   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4635                                  emitEmptyBoundParameters);
4636 }
4637 
4638 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4639     CodeGenModule &CGM, StringRef ParentName,
4640     const OMPTargetParallelForDirective &S) {
4641   // Emit SPMD target parallel for region as a standalone region.
4642   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4643     emitTargetParallelForRegion(CGF, S, Action);
4644   };
4645   llvm::Function *Fn;
4646   llvm::Constant *Addr;
4647   // Emit target region as a standalone region.
4648   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4649       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4650   assert(Fn && Addr && "Target device function emission failed.");
4651 }
4652 
4653 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4654     const OMPTargetParallelForDirective &S) {
4655   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4656     emitTargetParallelForRegion(CGF, S, Action);
4657   };
4658   emitCommonOMPTargetDirective(*this, S, CodeGen);
4659 }
4660 
4661 static void
4662 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4663                                 const OMPTargetParallelForSimdDirective &S,
4664                                 PrePostActionTy &Action) {
4665   Action.Enter(CGF);
4666   // Emit directive as a combined directive that consists of two implicit
4667   // directives: 'parallel' with 'for' directive.
4668   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4669     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4670                                emitDispatchForLoopBounds);
4671   };
4672   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4673                                  emitEmptyBoundParameters);
4674 }
4675 
4676 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4677     CodeGenModule &CGM, StringRef ParentName,
4678     const OMPTargetParallelForSimdDirective &S) {
4679   // Emit SPMD target parallel for region as a standalone region.
4680   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4681     emitTargetParallelForSimdRegion(CGF, S, Action);
4682   };
4683   llvm::Function *Fn;
4684   llvm::Constant *Addr;
4685   // Emit target region as a standalone region.
4686   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4687       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4688   assert(Fn && Addr && "Target device function emission failed.");
4689 }
4690 
4691 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4692     const OMPTargetParallelForSimdDirective &S) {
4693   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4694     emitTargetParallelForSimdRegion(CGF, S, Action);
4695   };
4696   emitCommonOMPTargetDirective(*this, S, CodeGen);
4697 }
4698 
4699 /// Emit a helper variable and return corresponding lvalue.
4700 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4701                      const ImplicitParamDecl *PVD,
4702                      CodeGenFunction::OMPPrivateScope &Privates) {
4703   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4704   Privates.addPrivate(
4705       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4706 }
4707 
4708 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4709   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4710   // Emit outlined function for task construct.
4711   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
4712   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4713   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4714   const Expr *IfCond = nullptr;
4715   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4716     if (C->getNameModifier() == OMPD_unknown ||
4717         C->getNameModifier() == OMPD_taskloop) {
4718       IfCond = C->getCondition();
4719       break;
4720     }
4721   }
4722 
4723   OMPTaskDataTy Data;
4724   // Check if taskloop must be emitted without taskgroup.
4725   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4726   // TODO: Check if we should emit tied or untied task.
4727   Data.Tied = true;
4728   // Set scheduling for taskloop
4729   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4730     // grainsize clause
4731     Data.Schedule.setInt(/*IntVal=*/false);
4732     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4733   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4734     // num_tasks clause
4735     Data.Schedule.setInt(/*IntVal=*/true);
4736     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4737   }
4738 
4739   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4740     // if (PreCond) {
4741     //   for (IV in 0..LastIteration) BODY;
4742     //   <Final counter/linear vars updates>;
4743     // }
4744     //
4745 
4746     // Emit: if (PreCond) - begin.
4747     // If the condition constant folds and can be elided, avoid emitting the
4748     // whole loop.
4749     bool CondConstant;
4750     llvm::BasicBlock *ContBlock = nullptr;
4751     OMPLoopScope PreInitScope(CGF, S);
4752     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4753       if (!CondConstant)
4754         return;
4755     } else {
4756       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4757       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4758       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4759                   CGF.getProfileCount(&S));
4760       CGF.EmitBlock(ThenBlock);
4761       CGF.incrementProfileCounter(&S);
4762     }
4763 
4764     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4765       CGF.EmitOMPSimdInit(S);
4766 
4767     OMPPrivateScope LoopScope(CGF);
4768     // Emit helper vars inits.
4769     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4770     auto *I = CS->getCapturedDecl()->param_begin();
4771     auto *LBP = std::next(I, LowerBound);
4772     auto *UBP = std::next(I, UpperBound);
4773     auto *STP = std::next(I, Stride);
4774     auto *LIP = std::next(I, LastIter);
4775     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4776              LoopScope);
4777     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4778              LoopScope);
4779     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4780     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4781              LoopScope);
4782     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4783     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4784     (void)LoopScope.Privatize();
4785     // Emit the loop iteration variable.
4786     const Expr *IVExpr = S.getIterationVariable();
4787     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4788     CGF.EmitVarDecl(*IVDecl);
4789     CGF.EmitIgnoredExpr(S.getInit());
4790 
4791     // Emit the iterations count variable.
4792     // If it is not a variable, Sema decided to calculate iterations count on
4793     // each iteration (e.g., it is foldable into a constant).
4794     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4795       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4796       // Emit calculation of the iterations count.
4797       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4798     }
4799 
4800     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4801                          S.getInc(),
4802                          [&S](CodeGenFunction &CGF) {
4803                            CGF.EmitOMPLoopBody(S, JumpDest());
4804                            CGF.EmitStopPoint(&S);
4805                          },
4806                          [](CodeGenFunction &) {});
4807     // Emit: if (PreCond) - end.
4808     if (ContBlock) {
4809       CGF.EmitBranch(ContBlock);
4810       CGF.EmitBlock(ContBlock, true);
4811     }
4812     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4813     if (HasLastprivateClause) {
4814       CGF.EmitOMPLastprivateClauseFinal(
4815           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4816           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4817               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4818               (*LIP)->getType(), S.getLocStart())));
4819     }
4820   };
4821   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4822                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4823                             const OMPTaskDataTy &Data) {
4824     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4825       OMPLoopScope PreInitScope(CGF, S);
4826       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4827                                                   OutlinedFn, SharedsTy,
4828                                                   CapturedStruct, IfCond, Data);
4829     };
4830     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4831                                                     CodeGen);
4832   };
4833   if (Data.Nogroup) {
4834     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
4835   } else {
4836     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4837         *this,
4838         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4839                                         PrePostActionTy &Action) {
4840           Action.Enter(CGF);
4841           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
4842                                         Data);
4843         },
4844         S.getLocStart());
4845   }
4846 }
4847 
4848 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4849   EmitOMPTaskLoopBasedDirective(S);
4850 }
4851 
4852 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4853     const OMPTaskLoopSimdDirective &S) {
4854   EmitOMPTaskLoopBasedDirective(S);
4855 }
4856 
4857 // Generate the instructions for '#pragma omp target update' directive.
4858 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4859     const OMPTargetUpdateDirective &S) {
4860   // If we don't have target devices, don't bother emitting the data mapping
4861   // code.
4862   if (CGM.getLangOpts().OMPTargetTriples.empty())
4863     return;
4864 
4865   // Check if we have any if clause associated with the directive.
4866   const Expr *IfCond = nullptr;
4867   if (auto *C = S.getSingleClause<OMPIfClause>())
4868     IfCond = C->getCondition();
4869 
4870   // Check if we have any device clause associated with the directive.
4871   const Expr *Device = nullptr;
4872   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4873     Device = C->getDevice();
4874 
4875   OMPLexicalScope Scope(*this, S, OMPD_task);
4876   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4877 }
4878 
4879 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
4880     const OMPExecutableDirective &D) {
4881   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
4882     return;
4883   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
4884     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
4885       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
4886     } else {
4887       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
4888         for (const auto *E : LD->counters()) {
4889           if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
4890                   cast<DeclRefExpr>(E)->getDecl())) {
4891             // Emit only those that were not explicitly referenced in clauses.
4892             if (!CGF.LocalDeclMap.count(VD))
4893               CGF.EmitVarDecl(*VD);
4894           }
4895         }
4896       }
4897       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
4898     }
4899   };
4900   OMPSimdLexicalScope Scope(*this, D);
4901   CGM.getOpenMPRuntime().emitInlinedDirective(
4902       *this,
4903       isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
4904                                                   : D.getDirectiveKind(),
4905       CodeGen);
4906 }
4907