1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(
57       CodeGenFunction &CGF, const OMPExecutableDirective &S,
58       const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
59       const bool EmitPreInitStmt = true)
60       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
61         InlinedShareds(CGF) {
62     if (EmitPreInitStmt)
63       emitPreInitStmt(CGF, S);
64     if (!CapturedRegion.hasValue())
65       return;
66     assert(S.hasAssociatedStmt() &&
67            "Expected associated statement for inlined directive.");
68     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
69     for (auto &C : CS->captures()) {
70       if (C.capturesVariable() || C.capturesVariableByCopy()) {
71         auto *VD = C.getCapturedVar();
72         assert(VD == VD->getCanonicalDecl() &&
73                "Canonical decl must be captured.");
74         DeclRefExpr DRE(
75             const_cast<VarDecl *>(VD),
76             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
77                                        InlinedShareds.isGlobalVarCaptured(VD)),
78             VD->getType().getNonReferenceType(), VK_LValue, SourceLocation());
79         InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
80           return CGF.EmitLValue(&DRE).getAddress();
81         });
82       }
83     }
84     (void)InlinedShareds.Privatize();
85   }
86 };
87 
88 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
89 /// for captured expressions.
90 class OMPParallelScope final : public OMPLexicalScope {
91   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
92     OpenMPDirectiveKind Kind = S.getDirectiveKind();
93     return !(isOpenMPTargetExecutionDirective(Kind) ||
94              isOpenMPLoopBoundSharingDirective(Kind)) &&
95            isOpenMPParallelDirective(Kind);
96   }
97 
98 public:
99   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
100       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
101                         EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
116                         EmitPreInitStmt(S)) {}
117 };
118 
119 /// Private scope for OpenMP loop-based directives, that supports capturing
120 /// of used expression from loop statement.
121 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
122   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
123     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
124     for (auto *E : S.counters()) {
125       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
126       (void)PreCondScope.addPrivate(VD, [&CGF, VD]() {
127         return CGF.CreateMemTemp(VD->getType().getNonReferenceType());
128       });
129     }
130     (void)PreCondScope.Privatize();
131     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
132       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
133         for (const auto *I : PreInits->decls())
134           CGF.EmitVarDecl(cast<VarDecl>(*I));
135       }
136     }
137   }
138 
139 public:
140   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
141       : CodeGenFunction::RunCleanupsScope(CGF) {
142     emitPreInitStmt(CGF, S);
143   }
144 };
145 
146 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
147   CodeGenFunction::OMPPrivateScope InlinedShareds;
148 
149   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
150     return CGF.LambdaCaptureFields.lookup(VD) ||
151            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
152            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
153             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
154   }
155 
156 public:
157   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
158       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
159         InlinedShareds(CGF) {
160     for (const auto *C : S.clauses()) {
161       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
162         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
163           for (const auto *I : PreInit->decls()) {
164             if (!I->hasAttr<OMPCaptureNoInitAttr>())
165               CGF.EmitVarDecl(cast<VarDecl>(*I));
166             else {
167               CodeGenFunction::AutoVarEmission Emission =
168                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
169               CGF.EmitAutoVarCleanups(Emission);
170             }
171           }
172         }
173       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
174         for (const Expr *E : UDP->varlists()) {
175           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
176           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
177             CGF.EmitVarDecl(*OED);
178         }
179       }
180     }
181     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
182       CGF.EmitOMPPrivateClause(S, InlinedShareds);
183     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
184       if (const Expr *E = TG->getReductionRef())
185         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
186     }
187     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
188     while (CS) {
189       for (auto &C : CS->captures()) {
190         if (C.capturesVariable() || C.capturesVariableByCopy()) {
191           auto *VD = C.getCapturedVar();
192           assert(VD == VD->getCanonicalDecl() &&
193                  "Canonical decl must be captured.");
194           DeclRefExpr DRE(const_cast<VarDecl *>(VD),
195                           isCapturedVar(CGF, VD) ||
196                               (CGF.CapturedStmtInfo &&
197                                InlinedShareds.isGlobalVarCaptured(VD)),
198                           VD->getType().getNonReferenceType(), VK_LValue,
199                           SourceLocation());
200           InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
201             return CGF.EmitLValue(&DRE).getAddress();
202           });
203         }
204       }
205       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
206     }
207     (void)InlinedShareds.Privatize();
208   }
209 };
210 
211 } // namespace
212 
213 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
214                                          const OMPExecutableDirective &S,
215                                          const RegionCodeGenTy &CodeGen);
216 
217 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
218   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
219     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
220       OrigVD = OrigVD->getCanonicalDecl();
221       bool IsCaptured =
222           LambdaCaptureFields.lookup(OrigVD) ||
223           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
224           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
225       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
226                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
227       return EmitLValue(&DRE);
228     }
229   }
230   return EmitLValue(E);
231 }
232 
233 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
234   auto &C = getContext();
235   llvm::Value *Size = nullptr;
236   auto SizeInChars = C.getTypeSizeInChars(Ty);
237   if (SizeInChars.isZero()) {
238     // getTypeSizeInChars() returns 0 for a VLA.
239     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
240       llvm::Value *ArraySize;
241       std::tie(ArraySize, Ty) = getVLASize(VAT);
242       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
243     }
244     SizeInChars = C.getTypeSizeInChars(Ty);
245     if (SizeInChars.isZero())
246       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
247     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
248   } else
249     Size = CGM.getSize(SizeInChars);
250   return Size;
251 }
252 
253 void CodeGenFunction::GenerateOpenMPCapturedVars(
254     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
255   const RecordDecl *RD = S.getCapturedRecordDecl();
256   auto CurField = RD->field_begin();
257   auto CurCap = S.captures().begin();
258   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
259                                                  E = S.capture_init_end();
260        I != E; ++I, ++CurField, ++CurCap) {
261     if (CurField->hasCapturedVLAType()) {
262       auto VAT = CurField->getCapturedVLAType();
263       auto *Val = VLASizeMap[VAT->getSizeExpr()];
264       CapturedVars.push_back(Val);
265     } else if (CurCap->capturesThis())
266       CapturedVars.push_back(CXXThisValue);
267     else if (CurCap->capturesVariableByCopy()) {
268       llvm::Value *CV =
269           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
270 
271       // If the field is not a pointer, we need to save the actual value
272       // and load it as a void pointer.
273       if (!CurField->getType()->isAnyPointerType()) {
274         auto &Ctx = getContext();
275         auto DstAddr = CreateMemTemp(
276             Ctx.getUIntPtrType(),
277             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
278         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
279 
280         auto *SrcAddrVal = EmitScalarConversion(
281             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
282             Ctx.getPointerType(CurField->getType()), SourceLocation());
283         LValue SrcLV =
284             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
285 
286         // Store the value using the source type pointer.
287         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
288 
289         // Load the value using the destination type pointer.
290         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
291       }
292       CapturedVars.push_back(CV);
293     } else {
294       assert(CurCap->capturesVariable() && "Expected capture by reference.");
295       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
296     }
297   }
298 }
299 
300 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
301                                     StringRef Name, LValue AddrLV,
302                                     bool isReferenceType = false) {
303   ASTContext &Ctx = CGF.getContext();
304 
305   auto *CastedPtr = CGF.EmitScalarConversion(
306       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
307       Ctx.getPointerType(DstType), SourceLocation());
308   auto TmpAddr =
309       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
310           .getAddress();
311 
312   // If we are dealing with references we need to return the address of the
313   // reference instead of the reference of the value.
314   if (isReferenceType) {
315     QualType RefType = Ctx.getLValueReferenceType(DstType);
316     auto *RefVal = TmpAddr.getPointer();
317     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
318     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
319     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
320   }
321 
322   return TmpAddr;
323 }
324 
325 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
326   if (T->isLValueReferenceType()) {
327     return C.getLValueReferenceType(
328         getCanonicalParamType(C, T.getNonReferenceType()),
329         /*SpelledAsLValue=*/false);
330   }
331   if (T->isPointerType())
332     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
333   if (auto *A = T->getAsArrayTypeUnsafe()) {
334     if (auto *VLA = dyn_cast<VariableArrayType>(A))
335       return getCanonicalParamType(C, VLA->getElementType());
336     else if (!A->isVariablyModifiedType())
337       return C.getCanonicalType(T);
338   }
339   return C.getCanonicalParamType(T);
340 }
341 
342 namespace {
343   /// Contains required data for proper outlined function codegen.
344   struct FunctionOptions {
345     /// Captured statement for which the function is generated.
346     const CapturedStmt *S = nullptr;
347     /// true if cast to/from  UIntPtr is required for variables captured by
348     /// value.
349     const bool UIntPtrCastRequired = true;
350     /// true if only casted arguments must be registered as local args or VLA
351     /// sizes.
352     const bool RegisterCastedArgsOnly = false;
353     /// Name of the generated function.
354     const StringRef FunctionName;
355     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
356                              bool RegisterCastedArgsOnly,
357                              StringRef FunctionName)
358         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
359           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
360           FunctionName(FunctionName) {}
361   };
362 }
363 
364 static llvm::Function *emitOutlinedFunctionPrologue(
365     CodeGenFunction &CGF, FunctionArgList &Args,
366     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
367         &LocalAddrs,
368     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
369         &VLASizes,
370     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
371   const CapturedDecl *CD = FO.S->getCapturedDecl();
372   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
373   assert(CD->hasBody() && "missing CapturedDecl body");
374 
375   CXXThisValue = nullptr;
376   // Build the argument list.
377   CodeGenModule &CGM = CGF.CGM;
378   ASTContext &Ctx = CGM.getContext();
379   FunctionArgList TargetArgs;
380   Args.append(CD->param_begin(),
381               std::next(CD->param_begin(), CD->getContextParamPosition()));
382   TargetArgs.append(
383       CD->param_begin(),
384       std::next(CD->param_begin(), CD->getContextParamPosition()));
385   auto I = FO.S->captures().begin();
386   FunctionDecl *DebugFunctionDecl = nullptr;
387   if (!FO.UIntPtrCastRequired) {
388     FunctionProtoType::ExtProtoInfo EPI;
389     DebugFunctionDecl = FunctionDecl::Create(
390         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
391         SourceLocation(), DeclarationName(), Ctx.VoidTy,
392         Ctx.getTrivialTypeSourceInfo(
393             Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
394         SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
395   }
396   for (auto *FD : RD->fields()) {
397     QualType ArgType = FD->getType();
398     IdentifierInfo *II = nullptr;
399     VarDecl *CapVar = nullptr;
400 
401     // If this is a capture by copy and the type is not a pointer, the outlined
402     // function argument type should be uintptr and the value properly casted to
403     // uintptr. This is necessary given that the runtime library is only able to
404     // deal with pointers. We can pass in the same way the VLA type sizes to the
405     // outlined function.
406     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
407         I->capturesVariableArrayType()) {
408       if (FO.UIntPtrCastRequired)
409         ArgType = Ctx.getUIntPtrType();
410     }
411 
412     if (I->capturesVariable() || I->capturesVariableByCopy()) {
413       CapVar = I->getCapturedVar();
414       II = CapVar->getIdentifier();
415     } else if (I->capturesThis())
416       II = &Ctx.Idents.get("this");
417     else {
418       assert(I->capturesVariableArrayType());
419       II = &Ctx.Idents.get("vla");
420     }
421     if (ArgType->isVariablyModifiedType())
422       ArgType = getCanonicalParamType(Ctx, ArgType);
423     VarDecl *Arg;
424     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
425       Arg = ParmVarDecl::Create(
426           Ctx, DebugFunctionDecl,
427           CapVar ? CapVar->getLocStart() : FD->getLocStart(),
428           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
429           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
430     } else {
431       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
432                                       II, ArgType, ImplicitParamDecl::Other);
433     }
434     Args.emplace_back(Arg);
435     // Do not cast arguments if we emit function with non-original types.
436     TargetArgs.emplace_back(
437         FO.UIntPtrCastRequired
438             ? Arg
439             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
440     ++I;
441   }
442   Args.append(
443       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
444       CD->param_end());
445   TargetArgs.append(
446       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
447       CD->param_end());
448 
449   // Create the function declaration.
450   const CGFunctionInfo &FuncInfo =
451       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
452   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
453 
454   llvm::Function *F =
455       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
456                              FO.FunctionName, &CGM.getModule());
457   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
458   if (CD->isNothrow())
459     F->setDoesNotThrow();
460 
461   // Generate the function.
462   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
463                     FO.S->getLocStart(), CD->getBody()->getLocStart());
464   unsigned Cnt = CD->getContextParamPosition();
465   I = FO.S->captures().begin();
466   for (auto *FD : RD->fields()) {
467     // Do not map arguments if we emit function with non-original types.
468     Address LocalAddr(Address::invalid());
469     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
470       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
471                                                              TargetArgs[Cnt]);
472     } else {
473       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
474     }
475     // If we are capturing a pointer by copy we don't need to do anything, just
476     // use the value that we get from the arguments.
477     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
478       const VarDecl *CurVD = I->getCapturedVar();
479       // If the variable is a reference we need to materialize it here.
480       if (CurVD->getType()->isReferenceType()) {
481         Address RefAddr = CGF.CreateMemTemp(
482             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
483         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
484                               /*Volatile=*/false, CurVD->getType());
485         LocalAddr = RefAddr;
486       }
487       if (!FO.RegisterCastedArgsOnly)
488         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
489       ++Cnt;
490       ++I;
491       continue;
492     }
493 
494     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
495                                         AlignmentSource::Decl);
496     if (FD->hasCapturedVLAType()) {
497       if (FO.UIntPtrCastRequired) {
498         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
499                                                           Args[Cnt]->getName(),
500                                                           ArgLVal),
501                                      FD->getType(), AlignmentSource::Decl);
502       }
503       auto *ExprArg =
504           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
505       auto VAT = FD->getCapturedVLAType();
506       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
507     } else if (I->capturesVariable()) {
508       auto *Var = I->getCapturedVar();
509       QualType VarTy = Var->getType();
510       Address ArgAddr = ArgLVal.getAddress();
511       if (!VarTy->isReferenceType()) {
512         if (ArgLVal.getType()->isLValueReferenceType()) {
513           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
514         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
515           assert(ArgLVal.getType()->isPointerType());
516           ArgAddr = CGF.EmitLoadOfPointer(
517               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
518         }
519       }
520       if (!FO.RegisterCastedArgsOnly) {
521         LocalAddrs.insert(
522             {Args[Cnt],
523              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
524       }
525     } else if (I->capturesVariableByCopy()) {
526       assert(!FD->getType()->isAnyPointerType() &&
527              "Not expecting a captured pointer.");
528       auto *Var = I->getCapturedVar();
529       QualType VarTy = Var->getType();
530       LocalAddrs.insert(
531           {Args[Cnt],
532            {Var,
533             FO.UIntPtrCastRequired
534                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
535                                        ArgLVal, VarTy->isReferenceType())
536                 : ArgLVal.getAddress()}});
537     } else {
538       // If 'this' is captured, load it into CXXThisValue.
539       assert(I->capturesThis());
540       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
541                          .getScalarVal();
542       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
543     }
544     ++Cnt;
545     ++I;
546   }
547 
548   return F;
549 }
550 
551 llvm::Function *
552 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
553   assert(
554       CapturedStmtInfo &&
555       "CapturedStmtInfo should be set when generating the captured function");
556   const CapturedDecl *CD = S.getCapturedDecl();
557   // Build the argument list.
558   bool NeedWrapperFunction =
559       getDebugInfo() &&
560       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
561   FunctionArgList Args;
562   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
563   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
564   SmallString<256> Buffer;
565   llvm::raw_svector_ostream Out(Buffer);
566   Out << CapturedStmtInfo->getHelperName();
567   if (NeedWrapperFunction)
568     Out << "_debug__";
569   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
570                      Out.str());
571   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
572                                                    VLASizes, CXXThisValue, FO);
573   for (const auto &LocalAddrPair : LocalAddrs) {
574     if (LocalAddrPair.second.first) {
575       setAddrOfLocalVar(LocalAddrPair.second.first,
576                         LocalAddrPair.second.second);
577     }
578   }
579   for (const auto &VLASizePair : VLASizes)
580     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
581   PGO.assignRegionCounters(GlobalDecl(CD), F);
582   CapturedStmtInfo->EmitBody(*this, CD->getBody());
583   FinishFunction(CD->getBodyRBrace());
584   if (!NeedWrapperFunction)
585     return F;
586 
587   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
588                             /*RegisterCastedArgsOnly=*/true,
589                             CapturedStmtInfo->getHelperName());
590   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
591   Args.clear();
592   LocalAddrs.clear();
593   VLASizes.clear();
594   llvm::Function *WrapperF =
595       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
596                                    WrapperCGF.CXXThisValue, WrapperFO);
597   llvm::SmallVector<llvm::Value *, 4> CallArgs;
598   for (const auto *Arg : Args) {
599     llvm::Value *CallArg;
600     auto I = LocalAddrs.find(Arg);
601     if (I != LocalAddrs.end()) {
602       LValue LV = WrapperCGF.MakeAddrLValue(
603           I->second.second,
604           I->second.first ? I->second.first->getType() : Arg->getType(),
605           AlignmentSource::Decl);
606       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
607     } else {
608       auto EI = VLASizes.find(Arg);
609       if (EI != VLASizes.end())
610         CallArg = EI->second.second;
611       else {
612         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
613                                               Arg->getType(),
614                                               AlignmentSource::Decl);
615         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
616       }
617     }
618     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
619   }
620   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
621                                                   F, CallArgs);
622   WrapperCGF.FinishFunction();
623   return WrapperF;
624 }
625 
626 //===----------------------------------------------------------------------===//
627 //                              OpenMP Directive Emission
628 //===----------------------------------------------------------------------===//
629 void CodeGenFunction::EmitOMPAggregateAssign(
630     Address DestAddr, Address SrcAddr, QualType OriginalType,
631     const llvm::function_ref<void(Address, Address)> &CopyGen) {
632   // Perform element-by-element initialization.
633   QualType ElementTy;
634 
635   // Drill down to the base element type on both arrays.
636   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
637   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
638   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
639 
640   auto SrcBegin = SrcAddr.getPointer();
641   auto DestBegin = DestAddr.getPointer();
642   // Cast from pointer to array type to pointer to single element.
643   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
644   // The basic structure here is a while-do loop.
645   auto BodyBB = createBasicBlock("omp.arraycpy.body");
646   auto DoneBB = createBasicBlock("omp.arraycpy.done");
647   auto IsEmpty =
648       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
649   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
650 
651   // Enter the loop body, making that address the current address.
652   auto EntryBB = Builder.GetInsertBlock();
653   EmitBlock(BodyBB);
654 
655   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
656 
657   llvm::PHINode *SrcElementPHI =
658     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
659   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
660   Address SrcElementCurrent =
661       Address(SrcElementPHI,
662               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
663 
664   llvm::PHINode *DestElementPHI =
665     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
666   DestElementPHI->addIncoming(DestBegin, EntryBB);
667   Address DestElementCurrent =
668     Address(DestElementPHI,
669             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
670 
671   // Emit copy.
672   CopyGen(DestElementCurrent, SrcElementCurrent);
673 
674   // Shift the address forward by one element.
675   auto DestElementNext = Builder.CreateConstGEP1_32(
676       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
677   auto SrcElementNext = Builder.CreateConstGEP1_32(
678       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
679   // Check whether we've reached the end.
680   auto Done =
681       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
682   Builder.CreateCondBr(Done, DoneBB, BodyBB);
683   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
684   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
685 
686   // Done.
687   EmitBlock(DoneBB, /*IsFinished=*/true);
688 }
689 
690 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
691                                   Address SrcAddr, const VarDecl *DestVD,
692                                   const VarDecl *SrcVD, const Expr *Copy) {
693   if (OriginalType->isArrayType()) {
694     auto *BO = dyn_cast<BinaryOperator>(Copy);
695     if (BO && BO->getOpcode() == BO_Assign) {
696       // Perform simple memcpy for simple copying.
697       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
698     } else {
699       // For arrays with complex element types perform element by element
700       // copying.
701       EmitOMPAggregateAssign(
702           DestAddr, SrcAddr, OriginalType,
703           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
704             // Working with the single array element, so have to remap
705             // destination and source variables to corresponding array
706             // elements.
707             CodeGenFunction::OMPPrivateScope Remap(*this);
708             Remap.addPrivate(DestVD, [DestElement]() -> Address {
709               return DestElement;
710             });
711             Remap.addPrivate(
712                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
713             (void)Remap.Privatize();
714             EmitIgnoredExpr(Copy);
715           });
716     }
717   } else {
718     // Remap pseudo source variable to private copy.
719     CodeGenFunction::OMPPrivateScope Remap(*this);
720     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
721     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
722     (void)Remap.Privatize();
723     // Emit copying of the whole variable.
724     EmitIgnoredExpr(Copy);
725   }
726 }
727 
728 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
729                                                 OMPPrivateScope &PrivateScope) {
730   if (!HaveInsertPoint())
731     return false;
732   bool FirstprivateIsLastprivate = false;
733   llvm::DenseSet<const VarDecl *> Lastprivates;
734   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
735     for (const auto *D : C->varlists())
736       Lastprivates.insert(
737           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
738   }
739   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
740   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
741   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
742   // Force emission of the firstprivate copy if the directive does not emit
743   // outlined function, like omp for, omp simd, omp distribute etc.
744   bool MustEmitFirstprivateCopy =
745       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
746   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
747     auto IRef = C->varlist_begin();
748     auto InitsRef = C->inits().begin();
749     for (auto IInit : C->private_copies()) {
750       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
751       bool ThisFirstprivateIsLastprivate =
752           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
753       auto *FD = CapturedStmtInfo->lookup(OrigVD);
754       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
755           !FD->getType()->isReferenceType()) {
756         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
757         ++IRef;
758         ++InitsRef;
759         continue;
760       }
761       FirstprivateIsLastprivate =
762           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
763       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
764         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
765         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
766         bool IsRegistered;
767         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
768                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
769                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
770         Address OriginalAddr = EmitLValue(&DRE).getAddress();
771         QualType Type = VD->getType();
772         if (Type->isArrayType()) {
773           // Emit VarDecl with copy init for arrays.
774           // Get the address of the original variable captured in current
775           // captured region.
776           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
777             auto Emission = EmitAutoVarAlloca(*VD);
778             auto *Init = VD->getInit();
779             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
780               // Perform simple memcpy.
781               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
782                                   Type);
783             } else {
784               EmitOMPAggregateAssign(
785                   Emission.getAllocatedAddress(), OriginalAddr, Type,
786                   [this, VDInit, Init](Address DestElement,
787                                        Address SrcElement) {
788                     // Clean up any temporaries needed by the initialization.
789                     RunCleanupsScope InitScope(*this);
790                     // Emit initialization for single element.
791                     setAddrOfLocalVar(VDInit, SrcElement);
792                     EmitAnyExprToMem(Init, DestElement,
793                                      Init->getType().getQualifiers(),
794                                      /*IsInitializer*/ false);
795                     LocalDeclMap.erase(VDInit);
796                   });
797             }
798             EmitAutoVarCleanups(Emission);
799             return Emission.getAllocatedAddress();
800           });
801         } else {
802           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
803             // Emit private VarDecl with copy init.
804             // Remap temp VDInit variable to the address of the original
805             // variable
806             // (for proper handling of captured global variables).
807             setAddrOfLocalVar(VDInit, OriginalAddr);
808             EmitDecl(*VD);
809             LocalDeclMap.erase(VDInit);
810             return GetAddrOfLocalVar(VD);
811           });
812         }
813         assert(IsRegistered &&
814                "firstprivate var already registered as private");
815         // Silence the warning about unused variable.
816         (void)IsRegistered;
817       }
818       ++IRef;
819       ++InitsRef;
820     }
821   }
822   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
823 }
824 
825 void CodeGenFunction::EmitOMPPrivateClause(
826     const OMPExecutableDirective &D,
827     CodeGenFunction::OMPPrivateScope &PrivateScope) {
828   if (!HaveInsertPoint())
829     return;
830   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
831   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
832     auto IRef = C->varlist_begin();
833     for (auto IInit : C->private_copies()) {
834       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
835       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
836         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
837         bool IsRegistered =
838             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
839               // Emit private VarDecl with copy init.
840               EmitDecl(*VD);
841               return GetAddrOfLocalVar(VD);
842             });
843         assert(IsRegistered && "private var already registered as private");
844         // Silence the warning about unused variable.
845         (void)IsRegistered;
846       }
847       ++IRef;
848     }
849   }
850 }
851 
852 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
853   if (!HaveInsertPoint())
854     return false;
855   // threadprivate_var1 = master_threadprivate_var1;
856   // operator=(threadprivate_var2, master_threadprivate_var2);
857   // ...
858   // __kmpc_barrier(&loc, global_tid);
859   llvm::DenseSet<const VarDecl *> CopiedVars;
860   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
861   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
862     auto IRef = C->varlist_begin();
863     auto ISrcRef = C->source_exprs().begin();
864     auto IDestRef = C->destination_exprs().begin();
865     for (auto *AssignOp : C->assignment_ops()) {
866       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
867       QualType Type = VD->getType();
868       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
869         // Get the address of the master variable. If we are emitting code with
870         // TLS support, the address is passed from the master as field in the
871         // captured declaration.
872         Address MasterAddr = Address::invalid();
873         if (getLangOpts().OpenMPUseTLS &&
874             getContext().getTargetInfo().isTLSSupported()) {
875           assert(CapturedStmtInfo->lookup(VD) &&
876                  "Copyin threadprivates should have been captured!");
877           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
878                           VK_LValue, (*IRef)->getExprLoc());
879           MasterAddr = EmitLValue(&DRE).getAddress();
880           LocalDeclMap.erase(VD);
881         } else {
882           MasterAddr =
883             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
884                                         : CGM.GetAddrOfGlobal(VD),
885                     getContext().getDeclAlign(VD));
886         }
887         // Get the address of the threadprivate variable.
888         Address PrivateAddr = EmitLValue(*IRef).getAddress();
889         if (CopiedVars.size() == 1) {
890           // At first check if current thread is a master thread. If it is, no
891           // need to copy data.
892           CopyBegin = createBasicBlock("copyin.not.master");
893           CopyEnd = createBasicBlock("copyin.not.master.end");
894           Builder.CreateCondBr(
895               Builder.CreateICmpNE(
896                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
897                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
898               CopyBegin, CopyEnd);
899           EmitBlock(CopyBegin);
900         }
901         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
902         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
903         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
904       }
905       ++IRef;
906       ++ISrcRef;
907       ++IDestRef;
908     }
909   }
910   if (CopyEnd) {
911     // Exit out of copying procedure for non-master thread.
912     EmitBlock(CopyEnd, /*IsFinished=*/true);
913     return true;
914   }
915   return false;
916 }
917 
918 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
919     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
920   if (!HaveInsertPoint())
921     return false;
922   bool HasAtLeastOneLastprivate = false;
923   llvm::DenseSet<const VarDecl *> SIMDLCVs;
924   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
925     auto *LoopDirective = cast<OMPLoopDirective>(&D);
926     for (auto *C : LoopDirective->counters()) {
927       SIMDLCVs.insert(
928           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
929     }
930   }
931   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
932   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
933     HasAtLeastOneLastprivate = true;
934     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
935         !getLangOpts().OpenMPSimd)
936       break;
937     auto IRef = C->varlist_begin();
938     auto IDestRef = C->destination_exprs().begin();
939     for (auto *IInit : C->private_copies()) {
940       // Keep the address of the original variable for future update at the end
941       // of the loop.
942       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
943       // Taskloops do not require additional initialization, it is done in
944       // runtime support library.
945       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
946         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
947         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
948           DeclRefExpr DRE(
949               const_cast<VarDecl *>(OrigVD),
950               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
951                   OrigVD) != nullptr,
952               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
953           return EmitLValue(&DRE).getAddress();
954         });
955         // Check if the variable is also a firstprivate: in this case IInit is
956         // not generated. Initialization of this variable will happen in codegen
957         // for 'firstprivate' clause.
958         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
959           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
960           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
961             // Emit private VarDecl with copy init.
962             EmitDecl(*VD);
963             return GetAddrOfLocalVar(VD);
964           });
965           assert(IsRegistered &&
966                  "lastprivate var already registered as private");
967           (void)IsRegistered;
968         }
969       }
970       ++IRef;
971       ++IDestRef;
972     }
973   }
974   return HasAtLeastOneLastprivate;
975 }
976 
977 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
978     const OMPExecutableDirective &D, bool NoFinals,
979     llvm::Value *IsLastIterCond) {
980   if (!HaveInsertPoint())
981     return;
982   // Emit following code:
983   // if (<IsLastIterCond>) {
984   //   orig_var1 = private_orig_var1;
985   //   ...
986   //   orig_varn = private_orig_varn;
987   // }
988   llvm::BasicBlock *ThenBB = nullptr;
989   llvm::BasicBlock *DoneBB = nullptr;
990   if (IsLastIterCond) {
991     ThenBB = createBasicBlock(".omp.lastprivate.then");
992     DoneBB = createBasicBlock(".omp.lastprivate.done");
993     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
994     EmitBlock(ThenBB);
995   }
996   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
997   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
998   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
999     auto IC = LoopDirective->counters().begin();
1000     for (auto F : LoopDirective->finals()) {
1001       auto *D =
1002           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1003       if (NoFinals)
1004         AlreadyEmittedVars.insert(D);
1005       else
1006         LoopCountersAndUpdates[D] = F;
1007       ++IC;
1008     }
1009   }
1010   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1011     auto IRef = C->varlist_begin();
1012     auto ISrcRef = C->source_exprs().begin();
1013     auto IDestRef = C->destination_exprs().begin();
1014     for (auto *AssignOp : C->assignment_ops()) {
1015       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1016       QualType Type = PrivateVD->getType();
1017       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1018       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1019         // If lastprivate variable is a loop control variable for loop-based
1020         // directive, update its value before copyin back to original
1021         // variable.
1022         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1023           EmitIgnoredExpr(FinalExpr);
1024         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1025         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1026         // Get the address of the original variable.
1027         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1028         // Get the address of the private variable.
1029         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1030         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1031           PrivateAddr =
1032               Address(Builder.CreateLoad(PrivateAddr),
1033                       getNaturalTypeAlignment(RefTy->getPointeeType()));
1034         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1035       }
1036       ++IRef;
1037       ++ISrcRef;
1038       ++IDestRef;
1039     }
1040     if (auto *PostUpdate = C->getPostUpdateExpr())
1041       EmitIgnoredExpr(PostUpdate);
1042   }
1043   if (IsLastIterCond)
1044     EmitBlock(DoneBB, /*IsFinished=*/true);
1045 }
1046 
1047 void CodeGenFunction::EmitOMPReductionClauseInit(
1048     const OMPExecutableDirective &D,
1049     CodeGenFunction::OMPPrivateScope &PrivateScope) {
1050   if (!HaveInsertPoint())
1051     return;
1052   SmallVector<const Expr *, 4> Shareds;
1053   SmallVector<const Expr *, 4> Privates;
1054   SmallVector<const Expr *, 4> ReductionOps;
1055   SmallVector<const Expr *, 4> LHSs;
1056   SmallVector<const Expr *, 4> RHSs;
1057   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1058     auto IPriv = C->privates().begin();
1059     auto IRed = C->reduction_ops().begin();
1060     auto ILHS = C->lhs_exprs().begin();
1061     auto IRHS = C->rhs_exprs().begin();
1062     for (const auto *Ref : C->varlists()) {
1063       Shareds.emplace_back(Ref);
1064       Privates.emplace_back(*IPriv);
1065       ReductionOps.emplace_back(*IRed);
1066       LHSs.emplace_back(*ILHS);
1067       RHSs.emplace_back(*IRHS);
1068       std::advance(IPriv, 1);
1069       std::advance(IRed, 1);
1070       std::advance(ILHS, 1);
1071       std::advance(IRHS, 1);
1072     }
1073   }
1074   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
1075   unsigned Count = 0;
1076   auto ILHS = LHSs.begin();
1077   auto IRHS = RHSs.begin();
1078   auto IPriv = Privates.begin();
1079   for (const auto *IRef : Shareds) {
1080     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1081     // Emit private VarDecl with reduction init.
1082     RedCG.emitSharedLValue(*this, Count);
1083     RedCG.emitAggregateType(*this, Count);
1084     auto Emission = EmitAutoVarAlloca(*PrivateVD);
1085     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1086                              RedCG.getSharedLValue(Count),
1087                              [&Emission](CodeGenFunction &CGF) {
1088                                CGF.EmitAutoVarInit(Emission);
1089                                return true;
1090                              });
1091     EmitAutoVarCleanups(Emission);
1092     Address BaseAddr = RedCG.adjustPrivateAddress(
1093         *this, Count, Emission.getAllocatedAddress());
1094     bool IsRegistered = PrivateScope.addPrivate(
1095         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
1096     assert(IsRegistered && "private var already registered as private");
1097     // Silence the warning about unused variable.
1098     (void)IsRegistered;
1099 
1100     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1101     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1102     QualType Type = PrivateVD->getType();
1103     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1104     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1105       // Store the address of the original variable associated with the LHS
1106       // implicit variable.
1107       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1108         return RedCG.getSharedLValue(Count).getAddress();
1109       });
1110       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1111         return GetAddrOfLocalVar(PrivateVD);
1112       });
1113     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1114                isa<ArraySubscriptExpr>(IRef)) {
1115       // Store the address of the original variable associated with the LHS
1116       // implicit variable.
1117       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1118         return RedCG.getSharedLValue(Count).getAddress();
1119       });
1120       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1121         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1122                                             ConvertTypeForMem(RHSVD->getType()),
1123                                             "rhs.begin");
1124       });
1125     } else {
1126       QualType Type = PrivateVD->getType();
1127       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1128       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1129       // Store the address of the original variable associated with the LHS
1130       // implicit variable.
1131       if (IsArray) {
1132         OriginalAddr = Builder.CreateElementBitCast(
1133             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1134       }
1135       PrivateScope.addPrivate(
1136           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1137       PrivateScope.addPrivate(
1138           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1139             return IsArray
1140                        ? Builder.CreateElementBitCast(
1141                              GetAddrOfLocalVar(PrivateVD),
1142                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1143                        : GetAddrOfLocalVar(PrivateVD);
1144           });
1145     }
1146     ++ILHS;
1147     ++IRHS;
1148     ++IPriv;
1149     ++Count;
1150   }
1151 }
1152 
1153 void CodeGenFunction::EmitOMPReductionClauseFinal(
1154     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1155   if (!HaveInsertPoint())
1156     return;
1157   llvm::SmallVector<const Expr *, 8> Privates;
1158   llvm::SmallVector<const Expr *, 8> LHSExprs;
1159   llvm::SmallVector<const Expr *, 8> RHSExprs;
1160   llvm::SmallVector<const Expr *, 8> ReductionOps;
1161   bool HasAtLeastOneReduction = false;
1162   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1163     HasAtLeastOneReduction = true;
1164     Privates.append(C->privates().begin(), C->privates().end());
1165     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1166     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1167     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1168   }
1169   if (HasAtLeastOneReduction) {
1170     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1171                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1172                       ReductionKind == OMPD_simd;
1173     bool SimpleReduction = ReductionKind == OMPD_simd;
1174     // Emit nowait reduction if nowait clause is present or directive is a
1175     // parallel directive (it always has implicit barrier).
1176     CGM.getOpenMPRuntime().emitReduction(
1177         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1178         {WithNowait, SimpleReduction, ReductionKind});
1179   }
1180 }
1181 
1182 static void emitPostUpdateForReductionClause(
1183     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1184     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1185   if (!CGF.HaveInsertPoint())
1186     return;
1187   llvm::BasicBlock *DoneBB = nullptr;
1188   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1189     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1190       if (!DoneBB) {
1191         if (auto *Cond = CondGen(CGF)) {
1192           // If the first post-update expression is found, emit conditional
1193           // block if it was requested.
1194           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1195           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1196           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1197           CGF.EmitBlock(ThenBB);
1198         }
1199       }
1200       CGF.EmitIgnoredExpr(PostUpdate);
1201     }
1202   }
1203   if (DoneBB)
1204     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1205 }
1206 
1207 namespace {
1208 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1209 /// parallel function. This is necessary for combined constructs such as
1210 /// 'distribute parallel for'
1211 typedef llvm::function_ref<void(CodeGenFunction &,
1212                                 const OMPExecutableDirective &,
1213                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1214     CodeGenBoundParametersTy;
1215 } // anonymous namespace
1216 
1217 static void emitCommonOMPParallelDirective(
1218     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1219     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1220     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1221   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1222   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1223       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1224   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1225     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1226     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1227                                          /*IgnoreResultAssign*/ true);
1228     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1229         CGF, NumThreads, NumThreadsClause->getLocStart());
1230   }
1231   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1232     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1233     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1234         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1235   }
1236   const Expr *IfCond = nullptr;
1237   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1238     if (C->getNameModifier() == OMPD_unknown ||
1239         C->getNameModifier() == OMPD_parallel) {
1240       IfCond = C->getCondition();
1241       break;
1242     }
1243   }
1244 
1245   OMPParallelScope Scope(CGF, S);
1246   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1247   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1248   // lower and upper bounds with the pragma 'for' chunking mechanism.
1249   // The following lambda takes care of appending the lower and upper bound
1250   // parameters when necessary
1251   CodeGenBoundParameters(CGF, S, CapturedVars);
1252   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1253   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1254                                               CapturedVars, IfCond);
1255 }
1256 
1257 static void emitEmptyBoundParameters(CodeGenFunction &,
1258                                      const OMPExecutableDirective &,
1259                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1260 
1261 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1262   // Emit parallel region as a standalone region.
1263   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1264     OMPPrivateScope PrivateScope(CGF);
1265     bool Copyins = CGF.EmitOMPCopyinClause(S);
1266     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1267     if (Copyins) {
1268       // Emit implicit barrier to synchronize threads and avoid data races on
1269       // propagation master's thread values of threadprivate variables to local
1270       // instances of that variables of all other implicit threads.
1271       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1272           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1273           /*ForceSimpleCall=*/true);
1274     }
1275     CGF.EmitOMPPrivateClause(S, PrivateScope);
1276     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1277     (void)PrivateScope.Privatize();
1278     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1279     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1280   };
1281   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1282                                  emitEmptyBoundParameters);
1283   emitPostUpdateForReductionClause(
1284       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1285 }
1286 
1287 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1288                                       JumpDest LoopExit) {
1289   RunCleanupsScope BodyScope(*this);
1290   // Update counters values on current iteration.
1291   for (auto I : D.updates()) {
1292     EmitIgnoredExpr(I);
1293   }
1294   // Update the linear variables.
1295   // In distribute directives only loop counters may be marked as linear, no
1296   // need to generate the code for them.
1297   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1298     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1299       for (auto *U : C->updates())
1300         EmitIgnoredExpr(U);
1301     }
1302   }
1303 
1304   // On a continue in the body, jump to the end.
1305   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1306   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1307   // Emit loop body.
1308   EmitStmt(D.getBody());
1309   // The end (updates/cleanups).
1310   EmitBlock(Continue.getBlock());
1311   BreakContinueStack.pop_back();
1312 }
1313 
1314 void CodeGenFunction::EmitOMPInnerLoop(
1315     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1316     const Expr *IncExpr,
1317     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1318     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1319   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1320 
1321   // Start the loop with a block that tests the condition.
1322   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1323   EmitBlock(CondBlock);
1324   const SourceRange &R = S.getSourceRange();
1325   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1326                  SourceLocToDebugLoc(R.getEnd()));
1327 
1328   // If there are any cleanups between here and the loop-exit scope,
1329   // create a block to stage a loop exit along.
1330   auto ExitBlock = LoopExit.getBlock();
1331   if (RequiresCleanup)
1332     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1333 
1334   auto LoopBody = createBasicBlock("omp.inner.for.body");
1335 
1336   // Emit condition.
1337   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1338   if (ExitBlock != LoopExit.getBlock()) {
1339     EmitBlock(ExitBlock);
1340     EmitBranchThroughCleanup(LoopExit);
1341   }
1342 
1343   EmitBlock(LoopBody);
1344   incrementProfileCounter(&S);
1345 
1346   // Create a block for the increment.
1347   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1348   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1349 
1350   BodyGen(*this);
1351 
1352   // Emit "IV = IV + 1" and a back-edge to the condition block.
1353   EmitBlock(Continue.getBlock());
1354   EmitIgnoredExpr(IncExpr);
1355   PostIncGen(*this);
1356   BreakContinueStack.pop_back();
1357   EmitBranch(CondBlock);
1358   LoopStack.pop();
1359   // Emit the fall-through block.
1360   EmitBlock(LoopExit.getBlock());
1361 }
1362 
1363 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1364   if (!HaveInsertPoint())
1365     return false;
1366   // Emit inits for the linear variables.
1367   bool HasLinears = false;
1368   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1369     for (auto *Init : C->inits()) {
1370       HasLinears = true;
1371       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1372       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1373         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1374         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1375         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1376                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1377                         VD->getInit()->getType(), VK_LValue,
1378                         VD->getInit()->getExprLoc());
1379         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1380                                                 VD->getType()),
1381                        /*capturedByInit=*/false);
1382         EmitAutoVarCleanups(Emission);
1383       } else
1384         EmitVarDecl(*VD);
1385     }
1386     // Emit the linear steps for the linear clauses.
1387     // If a step is not constant, it is pre-calculated before the loop.
1388     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1389       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1390         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1391         // Emit calculation of the linear step.
1392         EmitIgnoredExpr(CS);
1393       }
1394   }
1395   return HasLinears;
1396 }
1397 
1398 void CodeGenFunction::EmitOMPLinearClauseFinal(
1399     const OMPLoopDirective &D,
1400     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1401   if (!HaveInsertPoint())
1402     return;
1403   llvm::BasicBlock *DoneBB = nullptr;
1404   // Emit the final values of the linear variables.
1405   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1406     auto IC = C->varlist_begin();
1407     for (auto *F : C->finals()) {
1408       if (!DoneBB) {
1409         if (auto *Cond = CondGen(*this)) {
1410           // If the first post-update expression is found, emit conditional
1411           // block if it was requested.
1412           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1413           DoneBB = createBasicBlock(".omp.linear.pu.done");
1414           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1415           EmitBlock(ThenBB);
1416         }
1417       }
1418       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1419       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1420                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1421                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1422       Address OrigAddr = EmitLValue(&DRE).getAddress();
1423       CodeGenFunction::OMPPrivateScope VarScope(*this);
1424       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1425       (void)VarScope.Privatize();
1426       EmitIgnoredExpr(F);
1427       ++IC;
1428     }
1429     if (auto *PostUpdate = C->getPostUpdateExpr())
1430       EmitIgnoredExpr(PostUpdate);
1431   }
1432   if (DoneBB)
1433     EmitBlock(DoneBB, /*IsFinished=*/true);
1434 }
1435 
1436 static void emitAlignedClause(CodeGenFunction &CGF,
1437                               const OMPExecutableDirective &D) {
1438   if (!CGF.HaveInsertPoint())
1439     return;
1440   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1441     unsigned ClauseAlignment = 0;
1442     if (auto AlignmentExpr = Clause->getAlignment()) {
1443       auto AlignmentCI =
1444           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1445       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1446     }
1447     for (auto E : Clause->varlists()) {
1448       unsigned Alignment = ClauseAlignment;
1449       if (Alignment == 0) {
1450         // OpenMP [2.8.1, Description]
1451         // If no optional parameter is specified, implementation-defined default
1452         // alignments for SIMD instructions on the target platforms are assumed.
1453         Alignment =
1454             CGF.getContext()
1455                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1456                     E->getType()->getPointeeType()))
1457                 .getQuantity();
1458       }
1459       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1460              "alignment is not power of 2");
1461       if (Alignment != 0) {
1462         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1463         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1464       }
1465     }
1466   }
1467 }
1468 
1469 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1470     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1471   if (!HaveInsertPoint())
1472     return;
1473   auto I = S.private_counters().begin();
1474   for (auto *E : S.counters()) {
1475     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1476     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1477     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1478       // Emit var without initialization.
1479       if (!LocalDeclMap.count(PrivateVD)) {
1480         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1481         EmitAutoVarCleanups(VarEmission);
1482       }
1483       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1484                       /*RefersToEnclosingVariableOrCapture=*/false,
1485                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1486       return EmitLValue(&DRE).getAddress();
1487     });
1488     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1489         VD->hasGlobalStorage()) {
1490       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1491         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1492                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1493                         E->getType(), VK_LValue, E->getExprLoc());
1494         return EmitLValue(&DRE).getAddress();
1495       });
1496     }
1497     ++I;
1498   }
1499 }
1500 
1501 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1502                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1503                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1504   if (!CGF.HaveInsertPoint())
1505     return;
1506   {
1507     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1508     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1509     (void)PreCondScope.Privatize();
1510     // Get initial values of real counters.
1511     for (auto I : S.inits()) {
1512       CGF.EmitIgnoredExpr(I);
1513     }
1514   }
1515   // Check that loop is executed at least one time.
1516   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1517 }
1518 
1519 void CodeGenFunction::EmitOMPLinearClause(
1520     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1521   if (!HaveInsertPoint())
1522     return;
1523   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1524   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1525     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1526     for (auto *C : LoopDirective->counters()) {
1527       SIMDLCVs.insert(
1528           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1529     }
1530   }
1531   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1532     auto CurPrivate = C->privates().begin();
1533     for (auto *E : C->varlists()) {
1534       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1535       auto *PrivateVD =
1536           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1537       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1538         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1539           // Emit private VarDecl with copy init.
1540           EmitVarDecl(*PrivateVD);
1541           return GetAddrOfLocalVar(PrivateVD);
1542         });
1543         assert(IsRegistered && "linear var already registered as private");
1544         // Silence the warning about unused variable.
1545         (void)IsRegistered;
1546       } else
1547         EmitVarDecl(*PrivateVD);
1548       ++CurPrivate;
1549     }
1550   }
1551 }
1552 
1553 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1554                                      const OMPExecutableDirective &D,
1555                                      bool IsMonotonic) {
1556   if (!CGF.HaveInsertPoint())
1557     return;
1558   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1559     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1560                                  /*ignoreResult=*/true);
1561     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1562     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1563     // In presence of finite 'safelen', it may be unsafe to mark all
1564     // the memory instructions parallel, because loop-carried
1565     // dependences of 'safelen' iterations are possible.
1566     if (!IsMonotonic)
1567       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1568   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1569     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1570                                  /*ignoreResult=*/true);
1571     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1572     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1573     // In presence of finite 'safelen', it may be unsafe to mark all
1574     // the memory instructions parallel, because loop-carried
1575     // dependences of 'safelen' iterations are possible.
1576     CGF.LoopStack.setParallel(false);
1577   }
1578 }
1579 
1580 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1581                                       bool IsMonotonic) {
1582   // Walk clauses and process safelen/lastprivate.
1583   LoopStack.setParallel(!IsMonotonic);
1584   LoopStack.setVectorizeEnable(true);
1585   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1586 }
1587 
1588 void CodeGenFunction::EmitOMPSimdFinal(
1589     const OMPLoopDirective &D,
1590     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1591   if (!HaveInsertPoint())
1592     return;
1593   llvm::BasicBlock *DoneBB = nullptr;
1594   auto IC = D.counters().begin();
1595   auto IPC = D.private_counters().begin();
1596   for (auto F : D.finals()) {
1597     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1598     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1599     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1600     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1601         OrigVD->hasGlobalStorage() || CED) {
1602       if (!DoneBB) {
1603         if (auto *Cond = CondGen(*this)) {
1604           // If the first post-update expression is found, emit conditional
1605           // block if it was requested.
1606           auto *ThenBB = createBasicBlock(".omp.final.then");
1607           DoneBB = createBasicBlock(".omp.final.done");
1608           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1609           EmitBlock(ThenBB);
1610         }
1611       }
1612       Address OrigAddr = Address::invalid();
1613       if (CED)
1614         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1615       else {
1616         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1617                         /*RefersToEnclosingVariableOrCapture=*/false,
1618                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1619         OrigAddr = EmitLValue(&DRE).getAddress();
1620       }
1621       OMPPrivateScope VarScope(*this);
1622       VarScope.addPrivate(OrigVD,
1623                           [OrigAddr]() -> Address { return OrigAddr; });
1624       (void)VarScope.Privatize();
1625       EmitIgnoredExpr(F);
1626     }
1627     ++IC;
1628     ++IPC;
1629   }
1630   if (DoneBB)
1631     EmitBlock(DoneBB, /*IsFinished=*/true);
1632 }
1633 
1634 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1635                                          const OMPLoopDirective &S,
1636                                          CodeGenFunction::JumpDest LoopExit) {
1637   CGF.EmitOMPLoopBody(S, LoopExit);
1638   CGF.EmitStopPoint(&S);
1639 }
1640 
1641 /// Emit a helper variable and return corresponding lvalue.
1642 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1643                                const DeclRefExpr *Helper) {
1644   auto VDecl = cast<VarDecl>(Helper->getDecl());
1645   CGF.EmitVarDecl(*VDecl);
1646   return CGF.EmitLValue(Helper);
1647 }
1648 
1649 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1650                               PrePostActionTy &Action) {
1651   Action.Enter(CGF);
1652   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1653          "Expected simd directive");
1654   OMPLoopScope PreInitScope(CGF, S);
1655   // if (PreCond) {
1656   //   for (IV in 0..LastIteration) BODY;
1657   //   <Final counter/linear vars updates>;
1658   // }
1659   //
1660   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
1661       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
1662       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
1663     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1664     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1665   }
1666 
1667   // Emit: if (PreCond) - begin.
1668   // If the condition constant folds and can be elided, avoid emitting the
1669   // whole loop.
1670   bool CondConstant;
1671   llvm::BasicBlock *ContBlock = nullptr;
1672   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1673     if (!CondConstant)
1674       return;
1675   } else {
1676     auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1677     ContBlock = CGF.createBasicBlock("simd.if.end");
1678     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1679                 CGF.getProfileCount(&S));
1680     CGF.EmitBlock(ThenBlock);
1681     CGF.incrementProfileCounter(&S);
1682   }
1683 
1684   // Emit the loop iteration variable.
1685   const Expr *IVExpr = S.getIterationVariable();
1686   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1687   CGF.EmitVarDecl(*IVDecl);
1688   CGF.EmitIgnoredExpr(S.getInit());
1689 
1690   // Emit the iterations count variable.
1691   // If it is not a variable, Sema decided to calculate iterations count on
1692   // each iteration (e.g., it is foldable into a constant).
1693   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1694     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1695     // Emit calculation of the iterations count.
1696     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1697   }
1698 
1699   CGF.EmitOMPSimdInit(S);
1700 
1701   emitAlignedClause(CGF, S);
1702   (void)CGF.EmitOMPLinearClauseInit(S);
1703   {
1704     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1705     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1706     CGF.EmitOMPLinearClause(S, LoopScope);
1707     CGF.EmitOMPPrivateClause(S, LoopScope);
1708     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1709     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1710     (void)LoopScope.Privatize();
1711     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1712                          S.getInc(),
1713                          [&S](CodeGenFunction &CGF) {
1714                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1715                            CGF.EmitStopPoint(&S);
1716                          },
1717                          [](CodeGenFunction &) {});
1718     CGF.EmitOMPSimdFinal(
1719         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1720     // Emit final copy of the lastprivate variables at the end of loops.
1721     if (HasLastprivateClause)
1722       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1723     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1724     emitPostUpdateForReductionClause(
1725         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1726   }
1727   CGF.EmitOMPLinearClauseFinal(
1728       S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1729   // Emit: if (PreCond) - end.
1730   if (ContBlock) {
1731     CGF.EmitBranch(ContBlock);
1732     CGF.EmitBlock(ContBlock, true);
1733   }
1734 }
1735 
1736 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1737   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1738     emitOMPSimdRegion(CGF, S, Action);
1739   };
1740   OMPLexicalScope Scope(*this, S, OMPD_unknown);
1741   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1742 }
1743 
1744 void CodeGenFunction::EmitOMPOuterLoop(
1745     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1746     CodeGenFunction::OMPPrivateScope &LoopScope,
1747     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1748     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1749     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1750   auto &RT = CGM.getOpenMPRuntime();
1751 
1752   const Expr *IVExpr = S.getIterationVariable();
1753   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1754   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1755 
1756   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1757 
1758   // Start the loop with a block that tests the condition.
1759   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1760   EmitBlock(CondBlock);
1761   const SourceRange &R = S.getSourceRange();
1762   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1763                  SourceLocToDebugLoc(R.getEnd()));
1764 
1765   llvm::Value *BoolCondVal = nullptr;
1766   if (!DynamicOrOrdered) {
1767     // UB = min(UB, GlobalUB) or
1768     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1769     // 'distribute parallel for')
1770     EmitIgnoredExpr(LoopArgs.EUB);
1771     // IV = LB
1772     EmitIgnoredExpr(LoopArgs.Init);
1773     // IV < UB
1774     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1775   } else {
1776     BoolCondVal =
1777         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1778                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1779   }
1780 
1781   // If there are any cleanups between here and the loop-exit scope,
1782   // create a block to stage a loop exit along.
1783   auto ExitBlock = LoopExit.getBlock();
1784   if (LoopScope.requiresCleanups())
1785     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1786 
1787   auto LoopBody = createBasicBlock("omp.dispatch.body");
1788   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1789   if (ExitBlock != LoopExit.getBlock()) {
1790     EmitBlock(ExitBlock);
1791     EmitBranchThroughCleanup(LoopExit);
1792   }
1793   EmitBlock(LoopBody);
1794 
1795   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1796   // LB for loop condition and emitted it above).
1797   if (DynamicOrOrdered)
1798     EmitIgnoredExpr(LoopArgs.Init);
1799 
1800   // Create a block for the increment.
1801   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1802   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1803 
1804   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1805   // with dynamic/guided scheduling and without ordered clause.
1806   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1807     LoopStack.setParallel(!IsMonotonic);
1808   else
1809     EmitOMPSimdInit(S, IsMonotonic);
1810 
1811   SourceLocation Loc = S.getLocStart();
1812 
1813   // when 'distribute' is not combined with a 'for':
1814   // while (idx <= UB) { BODY; ++idx; }
1815   // when 'distribute' is combined with a 'for'
1816   // (e.g. 'distribute parallel for')
1817   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1818   EmitOMPInnerLoop(
1819       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1820       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1821         CodeGenLoop(CGF, S, LoopExit);
1822       },
1823       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1824         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1825       });
1826 
1827   EmitBlock(Continue.getBlock());
1828   BreakContinueStack.pop_back();
1829   if (!DynamicOrOrdered) {
1830     // Emit "LB = LB + Stride", "UB = UB + Stride".
1831     EmitIgnoredExpr(LoopArgs.NextLB);
1832     EmitIgnoredExpr(LoopArgs.NextUB);
1833   }
1834 
1835   EmitBranch(CondBlock);
1836   LoopStack.pop();
1837   // Emit the fall-through block.
1838   EmitBlock(LoopExit.getBlock());
1839 
1840   // Tell the runtime we are done.
1841   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1842     if (!DynamicOrOrdered)
1843       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1844                                                      S.getDirectiveKind());
1845   };
1846   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1847 }
1848 
1849 void CodeGenFunction::EmitOMPForOuterLoop(
1850     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1851     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1852     const OMPLoopArguments &LoopArgs,
1853     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1854   auto &RT = CGM.getOpenMPRuntime();
1855 
1856   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1857   const bool DynamicOrOrdered =
1858       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1859 
1860   assert((Ordered ||
1861           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1862                                  LoopArgs.Chunk != nullptr)) &&
1863          "static non-chunked schedule does not need outer loop");
1864 
1865   // Emit outer loop.
1866   //
1867   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1868   // When schedule(dynamic,chunk_size) is specified, the iterations are
1869   // distributed to threads in the team in chunks as the threads request them.
1870   // Each thread executes a chunk of iterations, then requests another chunk,
1871   // until no chunks remain to be distributed. Each chunk contains chunk_size
1872   // iterations, except for the last chunk to be distributed, which may have
1873   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1874   //
1875   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1876   // to threads in the team in chunks as the executing threads request them.
1877   // Each thread executes a chunk of iterations, then requests another chunk,
1878   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1879   // each chunk is proportional to the number of unassigned iterations divided
1880   // by the number of threads in the team, decreasing to 1. For a chunk_size
1881   // with value k (greater than 1), the size of each chunk is determined in the
1882   // same way, with the restriction that the chunks do not contain fewer than k
1883   // iterations (except for the last chunk to be assigned, which may have fewer
1884   // than k iterations).
1885   //
1886   // When schedule(auto) is specified, the decision regarding scheduling is
1887   // delegated to the compiler and/or runtime system. The programmer gives the
1888   // implementation the freedom to choose any possible mapping of iterations to
1889   // threads in the team.
1890   //
1891   // When schedule(runtime) is specified, the decision regarding scheduling is
1892   // deferred until run time, and the schedule and chunk size are taken from the
1893   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1894   // implementation defined
1895   //
1896   // while(__kmpc_dispatch_next(&LB, &UB)) {
1897   //   idx = LB;
1898   //   while (idx <= UB) { BODY; ++idx;
1899   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1900   //   } // inner loop
1901   // }
1902   //
1903   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1904   // When schedule(static, chunk_size) is specified, iterations are divided into
1905   // chunks of size chunk_size, and the chunks are assigned to the threads in
1906   // the team in a round-robin fashion in the order of the thread number.
1907   //
1908   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1909   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1910   //   LB = LB + ST;
1911   //   UB = UB + ST;
1912   // }
1913   //
1914 
1915   const Expr *IVExpr = S.getIterationVariable();
1916   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1917   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1918 
1919   if (DynamicOrOrdered) {
1920     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1921     llvm::Value *LBVal = DispatchBounds.first;
1922     llvm::Value *UBVal = DispatchBounds.second;
1923     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1924                                                              LoopArgs.Chunk};
1925     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1926                            IVSigned, Ordered, DipatchRTInputValues);
1927   } else {
1928     CGOpenMPRuntime::StaticRTInput StaticInit(
1929         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1930         LoopArgs.ST, LoopArgs.Chunk);
1931     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1932                          ScheduleKind, StaticInit);
1933   }
1934 
1935   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1936                                     const unsigned IVSize,
1937                                     const bool IVSigned) {
1938     if (Ordered) {
1939       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1940                                                             IVSigned);
1941     }
1942   };
1943 
1944   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1945                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1946   OuterLoopArgs.IncExpr = S.getInc();
1947   OuterLoopArgs.Init = S.getInit();
1948   OuterLoopArgs.Cond = S.getCond();
1949   OuterLoopArgs.NextLB = S.getNextLowerBound();
1950   OuterLoopArgs.NextUB = S.getNextUpperBound();
1951   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1952                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1953 }
1954 
1955 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1956                              const unsigned IVSize, const bool IVSigned) {}
1957 
1958 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1959     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1960     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1961     const CodeGenLoopTy &CodeGenLoopContent) {
1962 
1963   auto &RT = CGM.getOpenMPRuntime();
1964 
1965   // Emit outer loop.
1966   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1967   // dynamic
1968   //
1969 
1970   const Expr *IVExpr = S.getIterationVariable();
1971   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1972   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1973 
1974   CGOpenMPRuntime::StaticRTInput StaticInit(
1975       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1976       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1977   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1978 
1979   // for combined 'distribute' and 'for' the increment expression of distribute
1980   // is store in DistInc. For 'distribute' alone, it is in Inc.
1981   Expr *IncExpr;
1982   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1983     IncExpr = S.getDistInc();
1984   else
1985     IncExpr = S.getInc();
1986 
1987   // this routine is shared by 'omp distribute parallel for' and
1988   // 'omp distribute': select the right EUB expression depending on the
1989   // directive
1990   OMPLoopArguments OuterLoopArgs;
1991   OuterLoopArgs.LB = LoopArgs.LB;
1992   OuterLoopArgs.UB = LoopArgs.UB;
1993   OuterLoopArgs.ST = LoopArgs.ST;
1994   OuterLoopArgs.IL = LoopArgs.IL;
1995   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1996   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1997                           ? S.getCombinedEnsureUpperBound()
1998                           : S.getEnsureUpperBound();
1999   OuterLoopArgs.IncExpr = IncExpr;
2000   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2001                            ? S.getCombinedInit()
2002                            : S.getInit();
2003   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2004                            ? S.getCombinedCond()
2005                            : S.getCond();
2006   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2007                              ? S.getCombinedNextLowerBound()
2008                              : S.getNextLowerBound();
2009   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2010                              ? S.getCombinedNextUpperBound()
2011                              : S.getNextUpperBound();
2012 
2013   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2014                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
2015                    emitEmptyOrdered);
2016 }
2017 
2018 static std::pair<LValue, LValue>
2019 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2020                                      const OMPExecutableDirective &S) {
2021   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2022   LValue LB =
2023       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2024   LValue UB =
2025       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2026 
2027   // When composing 'distribute' with 'for' (e.g. as in 'distribute
2028   // parallel for') we need to use the 'distribute'
2029   // chunk lower and upper bounds rather than the whole loop iteration
2030   // space. These are parameters to the outlined function for 'parallel'
2031   // and we copy the bounds of the previous schedule into the
2032   // the current ones.
2033   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2034   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2035   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
2036   PrevLBVal = CGF.EmitScalarConversion(
2037       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2038       LS.getIterationVariable()->getType(), SourceLocation());
2039   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
2040   PrevUBVal = CGF.EmitScalarConversion(
2041       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2042       LS.getIterationVariable()->getType(), SourceLocation());
2043 
2044   CGF.EmitStoreOfScalar(PrevLBVal, LB);
2045   CGF.EmitStoreOfScalar(PrevUBVal, UB);
2046 
2047   return {LB, UB};
2048 }
2049 
2050 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2051 /// we need to use the LB and UB expressions generated by the worksharing
2052 /// code generation support, whereas in non combined situations we would
2053 /// just emit 0 and the LastIteration expression
2054 /// This function is necessary due to the difference of the LB and UB
2055 /// types for the RT emission routines for 'for_static_init' and
2056 /// 'for_dispatch_init'
2057 static std::pair<llvm::Value *, llvm::Value *>
2058 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2059                                         const OMPExecutableDirective &S,
2060                                         Address LB, Address UB) {
2061   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2062   const Expr *IVExpr = LS.getIterationVariable();
2063   // when implementing a dynamic schedule for a 'for' combined with a
2064   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2065   // is not normalized as each team only executes its own assigned
2066   // distribute chunk
2067   QualType IteratorTy = IVExpr->getType();
2068   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
2069                                             SourceLocation());
2070   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
2071                                             SourceLocation());
2072   return {LBVal, UBVal};
2073 }
2074 
2075 static void emitDistributeParallelForDistributeInnerBoundParams(
2076     CodeGenFunction &CGF, const OMPExecutableDirective &S,
2077     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2078   const auto &Dir = cast<OMPLoopDirective>(S);
2079   LValue LB =
2080       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2081   auto LBCast = CGF.Builder.CreateIntCast(
2082       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2083   CapturedVars.push_back(LBCast);
2084   LValue UB =
2085       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2086 
2087   auto UBCast = CGF.Builder.CreateIntCast(
2088       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2089   CapturedVars.push_back(UBCast);
2090 }
2091 
2092 static void
2093 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2094                                  const OMPLoopDirective &S,
2095                                  CodeGenFunction::JumpDest LoopExit) {
2096   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2097                                          PrePostActionTy &) {
2098     bool HasCancel = false;
2099     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2100       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2101         HasCancel = D->hasCancel();
2102       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2103         HasCancel = D->hasCancel();
2104       else if (const auto *D =
2105                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2106         HasCancel = D->hasCancel();
2107     }
2108     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2109                                                      HasCancel);
2110     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2111                                emitDistributeParallelForInnerBounds,
2112                                emitDistributeParallelForDispatchBounds);
2113   };
2114 
2115   emitCommonOMPParallelDirective(
2116       CGF, S,
2117       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2118       CGInlinedWorksharingLoop,
2119       emitDistributeParallelForDistributeInnerBoundParams);
2120 }
2121 
2122 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2123     const OMPDistributeParallelForDirective &S) {
2124   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2125     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2126                               S.getDistInc());
2127   };
2128   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2129   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2130 }
2131 
2132 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2133     const OMPDistributeParallelForSimdDirective &S) {
2134   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2135     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2136                               S.getDistInc());
2137   };
2138   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2139   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2140 }
2141 
2142 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2143     const OMPDistributeSimdDirective &S) {
2144   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2145     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2146   };
2147   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2148   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2149 }
2150 
2151 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2152     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2153   // Emit SPMD target parallel for region as a standalone region.
2154   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2155     emitOMPSimdRegion(CGF, S, Action);
2156   };
2157   llvm::Function *Fn;
2158   llvm::Constant *Addr;
2159   // Emit target region as a standalone region.
2160   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2161       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2162   assert(Fn && Addr && "Target device function emission failed.");
2163 }
2164 
2165 void CodeGenFunction::EmitOMPTargetSimdDirective(
2166     const OMPTargetSimdDirective &S) {
2167   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2168     emitOMPSimdRegion(CGF, S, Action);
2169   };
2170   emitCommonOMPTargetDirective(*this, S, CodeGen);
2171 }
2172 
2173 namespace {
2174   struct ScheduleKindModifiersTy {
2175     OpenMPScheduleClauseKind Kind;
2176     OpenMPScheduleClauseModifier M1;
2177     OpenMPScheduleClauseModifier M2;
2178     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2179                             OpenMPScheduleClauseModifier M1,
2180                             OpenMPScheduleClauseModifier M2)
2181         : Kind(Kind), M1(M1), M2(M2) {}
2182   };
2183 } // namespace
2184 
2185 bool CodeGenFunction::EmitOMPWorksharingLoop(
2186     const OMPLoopDirective &S, Expr *EUB,
2187     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2188     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2189   // Emit the loop iteration variable.
2190   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2191   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2192   EmitVarDecl(*IVDecl);
2193 
2194   // Emit the iterations count variable.
2195   // If it is not a variable, Sema decided to calculate iterations count on each
2196   // iteration (e.g., it is foldable into a constant).
2197   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2198     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2199     // Emit calculation of the iterations count.
2200     EmitIgnoredExpr(S.getCalcLastIteration());
2201   }
2202 
2203   auto &RT = CGM.getOpenMPRuntime();
2204 
2205   bool HasLastprivateClause;
2206   // Check pre-condition.
2207   {
2208     OMPLoopScope PreInitScope(*this, S);
2209     // Skip the entire loop if we don't meet the precondition.
2210     // If the condition constant folds and can be elided, avoid emitting the
2211     // whole loop.
2212     bool CondConstant;
2213     llvm::BasicBlock *ContBlock = nullptr;
2214     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2215       if (!CondConstant)
2216         return false;
2217     } else {
2218       auto *ThenBlock = createBasicBlock("omp.precond.then");
2219       ContBlock = createBasicBlock("omp.precond.end");
2220       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2221                   getProfileCount(&S));
2222       EmitBlock(ThenBlock);
2223       incrementProfileCounter(&S);
2224     }
2225 
2226     bool Ordered = false;
2227     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2228       if (OrderedClause->getNumForLoops())
2229         RT.emitDoacrossInit(*this, S);
2230       else
2231         Ordered = true;
2232     }
2233 
2234     llvm::DenseSet<const Expr *> EmittedFinals;
2235     emitAlignedClause(*this, S);
2236     bool HasLinears = EmitOMPLinearClauseInit(S);
2237     // Emit helper vars inits.
2238 
2239     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2240     LValue LB = Bounds.first;
2241     LValue UB = Bounds.second;
2242     LValue ST =
2243         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2244     LValue IL =
2245         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2246 
2247     // Emit 'then' code.
2248     {
2249       OMPPrivateScope LoopScope(*this);
2250       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2251         // Emit implicit barrier to synchronize threads and avoid data races on
2252         // initialization of firstprivate variables and post-update of
2253         // lastprivate variables.
2254         CGM.getOpenMPRuntime().emitBarrierCall(
2255             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2256             /*ForceSimpleCall=*/true);
2257       }
2258       EmitOMPPrivateClause(S, LoopScope);
2259       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2260       EmitOMPReductionClauseInit(S, LoopScope);
2261       EmitOMPPrivateLoopCounters(S, LoopScope);
2262       EmitOMPLinearClause(S, LoopScope);
2263       (void)LoopScope.Privatize();
2264 
2265       // Detect the loop schedule kind and chunk.
2266       llvm::Value *Chunk = nullptr;
2267       OpenMPScheduleTy ScheduleKind;
2268       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2269         ScheduleKind.Schedule = C->getScheduleKind();
2270         ScheduleKind.M1 = C->getFirstScheduleModifier();
2271         ScheduleKind.M2 = C->getSecondScheduleModifier();
2272         if (const auto *Ch = C->getChunkSize()) {
2273           Chunk = EmitScalarExpr(Ch);
2274           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2275                                        S.getIterationVariable()->getType(),
2276                                        S.getLocStart());
2277         }
2278       }
2279       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2280       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2281       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2282       // If the static schedule kind is specified or if the ordered clause is
2283       // specified, and if no monotonic modifier is specified, the effect will
2284       // be as if the monotonic modifier was specified.
2285       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2286                                 /* Chunked */ Chunk != nullptr) &&
2287           !Ordered) {
2288         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2289           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2290         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2291         // When no chunk_size is specified, the iteration space is divided into
2292         // chunks that are approximately equal in size, and at most one chunk is
2293         // distributed to each thread. Note that the size of the chunks is
2294         // unspecified in this case.
2295         CGOpenMPRuntime::StaticRTInput StaticInit(
2296             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2297             UB.getAddress(), ST.getAddress());
2298         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2299                              ScheduleKind, StaticInit);
2300         auto LoopExit =
2301             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2302         // UB = min(UB, GlobalUB);
2303         EmitIgnoredExpr(S.getEnsureUpperBound());
2304         // IV = LB;
2305         EmitIgnoredExpr(S.getInit());
2306         // while (idx <= UB) { BODY; ++idx; }
2307         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2308                          S.getInc(),
2309                          [&S, LoopExit](CodeGenFunction &CGF) {
2310                            CGF.EmitOMPLoopBody(S, LoopExit);
2311                            CGF.EmitStopPoint(&S);
2312                          },
2313                          [](CodeGenFunction &) {});
2314         EmitBlock(LoopExit.getBlock());
2315         // Tell the runtime we are done.
2316         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2317           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2318                                                          S.getDirectiveKind());
2319         };
2320         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2321       } else {
2322         const bool IsMonotonic =
2323             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2324             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2325             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2326             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2327         // Emit the outer loop, which requests its work chunk [LB..UB] from
2328         // runtime and runs the inner loop to process it.
2329         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2330                                              ST.getAddress(), IL.getAddress(),
2331                                              Chunk, EUB);
2332         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2333                             LoopArguments, CGDispatchBounds);
2334       }
2335       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2336         EmitOMPSimdFinal(S,
2337                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2338                            return CGF.Builder.CreateIsNotNull(
2339                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2340                          });
2341       }
2342       EmitOMPReductionClauseFinal(
2343           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2344                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2345                  : /*Parallel only*/ OMPD_parallel);
2346       // Emit post-update of the reduction variables if IsLastIter != 0.
2347       emitPostUpdateForReductionClause(
2348           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2349             return CGF.Builder.CreateIsNotNull(
2350                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2351           });
2352       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2353       if (HasLastprivateClause)
2354         EmitOMPLastprivateClauseFinal(
2355             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2356             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2357     }
2358     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2359       return CGF.Builder.CreateIsNotNull(
2360           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2361     });
2362     // We're now done with the loop, so jump to the continuation block.
2363     if (ContBlock) {
2364       EmitBranch(ContBlock);
2365       EmitBlock(ContBlock, true);
2366     }
2367   }
2368   return HasLastprivateClause;
2369 }
2370 
2371 /// The following two functions generate expressions for the loop lower
2372 /// and upper bounds in case of static and dynamic (dispatch) schedule
2373 /// of the associated 'for' or 'distribute' loop.
2374 static std::pair<LValue, LValue>
2375 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2376   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2377   LValue LB =
2378       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2379   LValue UB =
2380       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2381   return {LB, UB};
2382 }
2383 
2384 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2385 /// consider the lower and upper bound expressions generated by the
2386 /// worksharing loop support, but we use 0 and the iteration space size as
2387 /// constants
2388 static std::pair<llvm::Value *, llvm::Value *>
2389 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2390                           Address LB, Address UB) {
2391   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2392   const Expr *IVExpr = LS.getIterationVariable();
2393   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2394   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2395   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2396   return {LBVal, UBVal};
2397 }
2398 
2399 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2400   bool HasLastprivates = false;
2401   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2402                                           PrePostActionTy &) {
2403     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2404     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2405                                                  emitForLoopBounds,
2406                                                  emitDispatchForLoopBounds);
2407   };
2408   {
2409     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2410     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2411                                                 S.hasCancel());
2412   }
2413 
2414   // Emit an implicit barrier at the end.
2415   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2416     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2417   }
2418 }
2419 
2420 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2421   bool HasLastprivates = false;
2422   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2423                                           PrePostActionTy &) {
2424     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2425                                                  emitForLoopBounds,
2426                                                  emitDispatchForLoopBounds);
2427   };
2428   {
2429     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2430     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2431   }
2432 
2433   // Emit an implicit barrier at the end.
2434   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2435     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2436   }
2437 }
2438 
2439 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2440                                 const Twine &Name,
2441                                 llvm::Value *Init = nullptr) {
2442   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2443   if (Init)
2444     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2445   return LVal;
2446 }
2447 
2448 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2449   const Stmt *Stmt = S.getInnermostCapturedStmt()->getCapturedStmt();
2450   const auto *CS = dyn_cast<CompoundStmt>(Stmt);
2451   bool HasLastprivates = false;
2452   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2453                                                     PrePostActionTy &) {
2454     auto &C = CGF.CGM.getContext();
2455     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2456     // Emit helper vars inits.
2457     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2458                                   CGF.Builder.getInt32(0));
2459     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2460                                       : CGF.Builder.getInt32(0);
2461     LValue UB =
2462         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2463     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2464                                   CGF.Builder.getInt32(1));
2465     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2466                                   CGF.Builder.getInt32(0));
2467     // Loop counter.
2468     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2469     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2470     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2471     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2472     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2473     // Generate condition for loop.
2474     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2475                         OK_Ordinary, S.getLocStart(), FPOptions());
2476     // Increment for loop counter.
2477     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2478                       S.getLocStart(), true);
2479     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2480       // Iterate through all sections and emit a switch construct:
2481       // switch (IV) {
2482       //   case 0:
2483       //     <SectionStmt[0]>;
2484       //     break;
2485       // ...
2486       //   case <NumSection> - 1:
2487       //     <SectionStmt[<NumSection> - 1]>;
2488       //     break;
2489       // }
2490       // .omp.sections.exit:
2491       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2492       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2493           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2494           CS == nullptr ? 1 : CS->size());
2495       if (CS) {
2496         unsigned CaseNumber = 0;
2497         for (auto *SubStmt : CS->children()) {
2498           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2499           CGF.EmitBlock(CaseBB);
2500           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2501           CGF.EmitStmt(SubStmt);
2502           CGF.EmitBranch(ExitBB);
2503           ++CaseNumber;
2504         }
2505       } else {
2506         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2507         CGF.EmitBlock(CaseBB);
2508         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2509         CGF.EmitStmt(Stmt);
2510         CGF.EmitBranch(ExitBB);
2511       }
2512       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2513     };
2514 
2515     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2516     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2517       // Emit implicit barrier to synchronize threads and avoid data races on
2518       // initialization of firstprivate variables and post-update of lastprivate
2519       // variables.
2520       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2521           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2522           /*ForceSimpleCall=*/true);
2523     }
2524     CGF.EmitOMPPrivateClause(S, LoopScope);
2525     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2526     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2527     (void)LoopScope.Privatize();
2528 
2529     // Emit static non-chunked loop.
2530     OpenMPScheduleTy ScheduleKind;
2531     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2532     CGOpenMPRuntime::StaticRTInput StaticInit(
2533         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2534         LB.getAddress(), UB.getAddress(), ST.getAddress());
2535     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2536         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2537     // UB = min(UB, GlobalUB);
2538     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2539     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2540         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2541     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2542     // IV = LB;
2543     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2544     // while (idx <= UB) { BODY; ++idx; }
2545     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2546                          [](CodeGenFunction &) {});
2547     // Tell the runtime we are done.
2548     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2549       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2550                                                      S.getDirectiveKind());
2551     };
2552     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2553     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2554     // Emit post-update of the reduction variables if IsLastIter != 0.
2555     emitPostUpdateForReductionClause(
2556         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2557           return CGF.Builder.CreateIsNotNull(
2558               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2559         });
2560 
2561     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2562     if (HasLastprivates)
2563       CGF.EmitOMPLastprivateClauseFinal(
2564           S, /*NoFinals=*/false,
2565           CGF.Builder.CreateIsNotNull(
2566               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2567   };
2568 
2569   bool HasCancel = false;
2570   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2571     HasCancel = OSD->hasCancel();
2572   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2573     HasCancel = OPSD->hasCancel();
2574   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2575   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2576                                               HasCancel);
2577   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2578   // clause. Otherwise the barrier will be generated by the codegen for the
2579   // directive.
2580   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2581     // Emit implicit barrier to synchronize threads and avoid data races on
2582     // initialization of firstprivate variables.
2583     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2584                                            OMPD_unknown);
2585   }
2586 }
2587 
2588 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2589   {
2590     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2591     EmitSections(S);
2592   }
2593   // Emit an implicit barrier at the end.
2594   if (!S.getSingleClause<OMPNowaitClause>()) {
2595     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2596                                            OMPD_sections);
2597   }
2598 }
2599 
2600 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2601   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2602     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2603   };
2604   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2605   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2606                                               S.hasCancel());
2607 }
2608 
2609 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2610   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2611   llvm::SmallVector<const Expr *, 8> DestExprs;
2612   llvm::SmallVector<const Expr *, 8> SrcExprs;
2613   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2614   // Check if there are any 'copyprivate' clauses associated with this
2615   // 'single' construct.
2616   // Build a list of copyprivate variables along with helper expressions
2617   // (<source>, <destination>, <destination>=<source> expressions)
2618   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2619     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2620     DestExprs.append(C->destination_exprs().begin(),
2621                      C->destination_exprs().end());
2622     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2623     AssignmentOps.append(C->assignment_ops().begin(),
2624                          C->assignment_ops().end());
2625   }
2626   // Emit code for 'single' region along with 'copyprivate' clauses
2627   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2628     Action.Enter(CGF);
2629     OMPPrivateScope SingleScope(CGF);
2630     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2631     CGF.EmitOMPPrivateClause(S, SingleScope);
2632     (void)SingleScope.Privatize();
2633     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2634   };
2635   {
2636     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2637     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2638                                             CopyprivateVars, DestExprs,
2639                                             SrcExprs, AssignmentOps);
2640   }
2641   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2642   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2643   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2644     CGM.getOpenMPRuntime().emitBarrierCall(
2645         *this, S.getLocStart(),
2646         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2647   }
2648 }
2649 
2650 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2651   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2652     Action.Enter(CGF);
2653     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2654   };
2655   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2656   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2657 }
2658 
2659 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2660   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2661     Action.Enter(CGF);
2662     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2663   };
2664   Expr *Hint = nullptr;
2665   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2666     Hint = HintClause->getHint();
2667   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2668   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2669                                             S.getDirectiveName().getAsString(),
2670                                             CodeGen, S.getLocStart(), Hint);
2671 }
2672 
2673 void CodeGenFunction::EmitOMPParallelForDirective(
2674     const OMPParallelForDirective &S) {
2675   // Emit directive as a combined directive that consists of two implicit
2676   // directives: 'parallel' with 'for' directive.
2677   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2678     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2679     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2680                                emitDispatchForLoopBounds);
2681   };
2682   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2683                                  emitEmptyBoundParameters);
2684 }
2685 
2686 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2687     const OMPParallelForSimdDirective &S) {
2688   // Emit directive as a combined directive that consists of two implicit
2689   // directives: 'parallel' with 'for' directive.
2690   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2691     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2692                                emitDispatchForLoopBounds);
2693   };
2694   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2695                                  emitEmptyBoundParameters);
2696 }
2697 
2698 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2699     const OMPParallelSectionsDirective &S) {
2700   // Emit directive as a combined directive that consists of two implicit
2701   // directives: 'parallel' with 'sections' directive.
2702   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2703     CGF.EmitSections(S);
2704   };
2705   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2706                                  emitEmptyBoundParameters);
2707 }
2708 
2709 void CodeGenFunction::EmitOMPTaskBasedDirective(
2710     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
2711     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
2712     OMPTaskDataTy &Data) {
2713   // Emit outlined function for task construct.
2714   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
2715   auto *I = CS->getCapturedDecl()->param_begin();
2716   auto *PartId = std::next(I);
2717   auto *TaskT = std::next(I, 4);
2718   // Check if the task is final
2719   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2720     // If the condition constant folds and can be elided, try to avoid emitting
2721     // the condition and the dead arm of the if/else.
2722     auto *Cond = Clause->getCondition();
2723     bool CondConstant;
2724     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2725       Data.Final.setInt(CondConstant);
2726     else
2727       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2728   } else {
2729     // By default the task is not final.
2730     Data.Final.setInt(/*IntVal=*/false);
2731   }
2732   // Check if the task has 'priority' clause.
2733   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2734     auto *Prio = Clause->getPriority();
2735     Data.Priority.setInt(/*IntVal=*/true);
2736     Data.Priority.setPointer(EmitScalarConversion(
2737         EmitScalarExpr(Prio), Prio->getType(),
2738         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2739         Prio->getExprLoc()));
2740   }
2741   // The first function argument for tasks is a thread id, the second one is a
2742   // part id (0 for tied tasks, >=0 for untied task).
2743   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2744   // Get list of private variables.
2745   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2746     auto IRef = C->varlist_begin();
2747     for (auto *IInit : C->private_copies()) {
2748       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2749       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2750         Data.PrivateVars.push_back(*IRef);
2751         Data.PrivateCopies.push_back(IInit);
2752       }
2753       ++IRef;
2754     }
2755   }
2756   EmittedAsPrivate.clear();
2757   // Get list of firstprivate variables.
2758   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2759     auto IRef = C->varlist_begin();
2760     auto IElemInitRef = C->inits().begin();
2761     for (auto *IInit : C->private_copies()) {
2762       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2763       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2764         Data.FirstprivateVars.push_back(*IRef);
2765         Data.FirstprivateCopies.push_back(IInit);
2766         Data.FirstprivateInits.push_back(*IElemInitRef);
2767       }
2768       ++IRef;
2769       ++IElemInitRef;
2770     }
2771   }
2772   // Get list of lastprivate variables (for taskloops).
2773   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2774   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2775     auto IRef = C->varlist_begin();
2776     auto ID = C->destination_exprs().begin();
2777     for (auto *IInit : C->private_copies()) {
2778       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2779       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2780         Data.LastprivateVars.push_back(*IRef);
2781         Data.LastprivateCopies.push_back(IInit);
2782       }
2783       LastprivateDstsOrigs.insert(
2784           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2785            cast<DeclRefExpr>(*IRef)});
2786       ++IRef;
2787       ++ID;
2788     }
2789   }
2790   SmallVector<const Expr *, 4> LHSs;
2791   SmallVector<const Expr *, 4> RHSs;
2792   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2793     auto IPriv = C->privates().begin();
2794     auto IRed = C->reduction_ops().begin();
2795     auto ILHS = C->lhs_exprs().begin();
2796     auto IRHS = C->rhs_exprs().begin();
2797     for (const auto *Ref : C->varlists()) {
2798       Data.ReductionVars.emplace_back(Ref);
2799       Data.ReductionCopies.emplace_back(*IPriv);
2800       Data.ReductionOps.emplace_back(*IRed);
2801       LHSs.emplace_back(*ILHS);
2802       RHSs.emplace_back(*IRHS);
2803       std::advance(IPriv, 1);
2804       std::advance(IRed, 1);
2805       std::advance(ILHS, 1);
2806       std::advance(IRHS, 1);
2807     }
2808   }
2809   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2810       *this, S.getLocStart(), LHSs, RHSs, Data);
2811   // Build list of dependences.
2812   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2813     for (auto *IRef : C->varlists())
2814       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2815   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
2816                     CapturedRegion](CodeGenFunction &CGF,
2817                                     PrePostActionTy &Action) {
2818     // Set proper addresses for generated private copies.
2819     OMPPrivateScope Scope(CGF);
2820     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2821         !Data.LastprivateVars.empty()) {
2822       enum { PrivatesParam = 2, CopyFnParam = 3 };
2823       auto *CopyFn = CGF.Builder.CreateLoad(
2824           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2825       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2826           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2827       // Map privates.
2828       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2829       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2830       CallArgs.push_back(PrivatesPtr);
2831       for (auto *E : Data.PrivateVars) {
2832         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2833         Address PrivatePtr = CGF.CreateMemTemp(
2834             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2835         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2836         CallArgs.push_back(PrivatePtr.getPointer());
2837       }
2838       for (auto *E : Data.FirstprivateVars) {
2839         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2840         Address PrivatePtr =
2841             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2842                               ".firstpriv.ptr.addr");
2843         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2844         CallArgs.push_back(PrivatePtr.getPointer());
2845       }
2846       for (auto *E : Data.LastprivateVars) {
2847         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2848         Address PrivatePtr =
2849             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2850                               ".lastpriv.ptr.addr");
2851         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2852         CallArgs.push_back(PrivatePtr.getPointer());
2853       }
2854       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2855                                                           CopyFn, CallArgs);
2856       for (auto &&Pair : LastprivateDstsOrigs) {
2857         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2858         DeclRefExpr DRE(
2859             const_cast<VarDecl *>(OrigVD),
2860             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2861                 OrigVD) != nullptr,
2862             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2863         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2864           return CGF.EmitLValue(&DRE).getAddress();
2865         });
2866       }
2867       for (auto &&Pair : PrivatePtrs) {
2868         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2869                             CGF.getContext().getDeclAlign(Pair.first));
2870         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2871       }
2872     }
2873     if (Data.Reductions) {
2874       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
2875       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2876                              Data.ReductionOps);
2877       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2878           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2879       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2880         RedCG.emitSharedLValue(CGF, Cnt);
2881         RedCG.emitAggregateType(CGF, Cnt);
2882         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2883             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2884         Replacement =
2885             Address(CGF.EmitScalarConversion(
2886                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2887                         CGF.getContext().getPointerType(
2888                             Data.ReductionCopies[Cnt]->getType()),
2889                         SourceLocation()),
2890                     Replacement.getAlignment());
2891         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2892         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2893                          [Replacement]() { return Replacement; });
2894         // FIXME: This must removed once the runtime library is fixed.
2895         // Emit required threadprivate variables for
2896         // initilizer/combiner/finalizer.
2897         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2898                                                            RedCG, Cnt);
2899       }
2900     }
2901     // Privatize all private variables except for in_reduction items.
2902     (void)Scope.Privatize();
2903     SmallVector<const Expr *, 4> InRedVars;
2904     SmallVector<const Expr *, 4> InRedPrivs;
2905     SmallVector<const Expr *, 4> InRedOps;
2906     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2907     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2908       auto IPriv = C->privates().begin();
2909       auto IRed = C->reduction_ops().begin();
2910       auto ITD = C->taskgroup_descriptors().begin();
2911       for (const auto *Ref : C->varlists()) {
2912         InRedVars.emplace_back(Ref);
2913         InRedPrivs.emplace_back(*IPriv);
2914         InRedOps.emplace_back(*IRed);
2915         TaskgroupDescriptors.emplace_back(*ITD);
2916         std::advance(IPriv, 1);
2917         std::advance(IRed, 1);
2918         std::advance(ITD, 1);
2919       }
2920     }
2921     // Privatize in_reduction items here, because taskgroup descriptors must be
2922     // privatized earlier.
2923     OMPPrivateScope InRedScope(CGF);
2924     if (!InRedVars.empty()) {
2925       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2926       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2927         RedCG.emitSharedLValue(CGF, Cnt);
2928         RedCG.emitAggregateType(CGF, Cnt);
2929         // The taskgroup descriptor variable is always implicit firstprivate and
2930         // privatized already during procoessing of the firstprivates.
2931         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2932             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2933         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2934             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2935         Replacement = Address(
2936             CGF.EmitScalarConversion(
2937                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2938                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2939                 SourceLocation()),
2940             Replacement.getAlignment());
2941         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2942         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2943                               [Replacement]() { return Replacement; });
2944         // FIXME: This must removed once the runtime library is fixed.
2945         // Emit required threadprivate variables for
2946         // initilizer/combiner/finalizer.
2947         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2948                                                            RedCG, Cnt);
2949       }
2950     }
2951     (void)InRedScope.Privatize();
2952 
2953     Action.Enter(CGF);
2954     BodyGen(CGF);
2955   };
2956   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2957       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2958       Data.NumberOfParts);
2959   OMPLexicalScope Scope(*this, S);
2960   TaskGen(*this, OutlinedFn, Data);
2961 }
2962 
2963 static ImplicitParamDecl *
2964 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
2965                                   QualType Ty, CapturedDecl *CD) {
2966   auto *OrigVD = ImplicitParamDecl::Create(
2967       C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other);
2968   auto *OrigRef =
2969       DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
2970                           /*RefersToEnclosingVariableOrCapture=*/false,
2971                           SourceLocation(), Ty, VK_LValue);
2972   auto *PrivateVD = ImplicitParamDecl::Create(
2973       C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other);
2974   auto *PrivateRef = DeclRefExpr::Create(
2975       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
2976       /*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(), Ty,
2977       VK_LValue);
2978   QualType ElemType = C.getBaseElementType(Ty);
2979   auto *InitVD =
2980       ImplicitParamDecl::Create(C, CD, SourceLocation(), /*Id=*/nullptr,
2981                                 ElemType, ImplicitParamDecl::Other);
2982   auto *InitRef =
2983       DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
2984                           /*RefersToEnclosingVariableOrCapture=*/false,
2985                           SourceLocation(), ElemType, VK_LValue);
2986   PrivateVD->setInitStyle(VarDecl::CInit);
2987   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
2988                                               InitRef, /*BasePath=*/nullptr,
2989                                               VK_RValue));
2990   Data.FirstprivateVars.emplace_back(OrigRef);
2991   Data.FirstprivateCopies.emplace_back(PrivateRef);
2992   Data.FirstprivateInits.emplace_back(InitRef);
2993   return OrigVD;
2994 }
2995 
2996 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
2997     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
2998     OMPTargetDataInfo &InputInfo) {
2999   // Emit outlined function for task construct.
3000   auto CS = S.getCapturedStmt(OMPD_task);
3001   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3002   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3003   auto *I = CS->getCapturedDecl()->param_begin();
3004   auto *PartId = std::next(I);
3005   auto *TaskT = std::next(I, 4);
3006   OMPTaskDataTy Data;
3007   // The task is not final.
3008   Data.Final.setInt(/*IntVal=*/false);
3009   // Get list of firstprivate variables.
3010   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3011     auto IRef = C->varlist_begin();
3012     auto IElemInitRef = C->inits().begin();
3013     for (auto *IInit : C->private_copies()) {
3014       Data.FirstprivateVars.push_back(*IRef);
3015       Data.FirstprivateCopies.push_back(IInit);
3016       Data.FirstprivateInits.push_back(*IElemInitRef);
3017       ++IRef;
3018       ++IElemInitRef;
3019     }
3020   }
3021   OMPPrivateScope TargetScope(*this);
3022   VarDecl *BPVD = nullptr;
3023   VarDecl *PVD = nullptr;
3024   VarDecl *SVD = nullptr;
3025   if (InputInfo.NumberOfTargetItems > 0) {
3026     auto *CD = CapturedDecl::Create(
3027         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
3028     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
3029     QualType BaseAndPointersType = getContext().getConstantArrayType(
3030         getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
3031         /*IndexTypeQuals=*/0);
3032     BPVD = createImplicitFirstprivateForType(getContext(), Data,
3033                                              BaseAndPointersType, CD);
3034     PVD = createImplicitFirstprivateForType(getContext(), Data,
3035                                             BaseAndPointersType, CD);
3036     QualType SizesType = getContext().getConstantArrayType(
3037         getContext().getSizeType(), ArrSize, ArrayType::Normal,
3038         /*IndexTypeQuals=*/0);
3039     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD);
3040     TargetScope.addPrivate(
3041         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
3042     TargetScope.addPrivate(PVD,
3043                            [&InputInfo]() { return InputInfo.PointersArray; });
3044     TargetScope.addPrivate(SVD,
3045                            [&InputInfo]() { return InputInfo.SizesArray; });
3046   }
3047   (void)TargetScope.Privatize();
3048   // Build list of dependences.
3049   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
3050     for (auto *IRef : C->varlists())
3051       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
3052   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
3053                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
3054     // Set proper addresses for generated private copies.
3055     OMPPrivateScope Scope(CGF);
3056     if (!Data.FirstprivateVars.empty()) {
3057       enum { PrivatesParam = 2, CopyFnParam = 3 };
3058       auto *CopyFn = CGF.Builder.CreateLoad(
3059           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
3060       auto *PrivatesPtr = CGF.Builder.CreateLoad(
3061           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
3062       // Map privates.
3063       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3064       llvm::SmallVector<llvm::Value *, 16> CallArgs;
3065       CallArgs.push_back(PrivatesPtr);
3066       for (auto *E : Data.FirstprivateVars) {
3067         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3068         Address PrivatePtr =
3069             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3070                               ".firstpriv.ptr.addr");
3071         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
3072         CallArgs.push_back(PrivatePtr.getPointer());
3073       }
3074       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3075                                                           CopyFn, CallArgs);
3076       for (auto &&Pair : PrivatePtrs) {
3077         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3078                             CGF.getContext().getDeclAlign(Pair.first));
3079         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3080       }
3081     }
3082     // Privatize all private variables except for in_reduction items.
3083     (void)Scope.Privatize();
3084     if (InputInfo.NumberOfTargetItems > 0) {
3085       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
3086           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
3087       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
3088           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
3089       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
3090           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
3091     }
3092 
3093     Action.Enter(CGF);
3094     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
3095     BodyGen(CGF);
3096   };
3097   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3098       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
3099       Data.NumberOfParts);
3100   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
3101   IntegerLiteral IfCond(getContext(), TrueOrFalse,
3102                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3103                         SourceLocation());
3104 
3105   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn,
3106                                       SharedsTy, CapturedStruct, &IfCond, Data);
3107 }
3108 
3109 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
3110   // Emit outlined function for task construct.
3111   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3112   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3113   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3114   const Expr *IfCond = nullptr;
3115   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3116     if (C->getNameModifier() == OMPD_unknown ||
3117         C->getNameModifier() == OMPD_task) {
3118       IfCond = C->getCondition();
3119       break;
3120     }
3121   }
3122 
3123   OMPTaskDataTy Data;
3124   // Check if we should emit tied or untied task.
3125   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
3126   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3127     CGF.EmitStmt(CS->getCapturedStmt());
3128   };
3129   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3130                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3131                             const OMPTaskDataTy &Data) {
3132     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
3133                                             SharedsTy, CapturedStruct, IfCond,
3134                                             Data);
3135   };
3136   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
3137 }
3138 
3139 void CodeGenFunction::EmitOMPTaskyieldDirective(
3140     const OMPTaskyieldDirective &S) {
3141   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
3142 }
3143 
3144 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3145   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
3146 }
3147 
3148 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3149   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
3150 }
3151 
3152 void CodeGenFunction::EmitOMPTaskgroupDirective(
3153     const OMPTaskgroupDirective &S) {
3154   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3155     Action.Enter(CGF);
3156     if (const Expr *E = S.getReductionRef()) {
3157       SmallVector<const Expr *, 4> LHSs;
3158       SmallVector<const Expr *, 4> RHSs;
3159       OMPTaskDataTy Data;
3160       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
3161         auto IPriv = C->privates().begin();
3162         auto IRed = C->reduction_ops().begin();
3163         auto ILHS = C->lhs_exprs().begin();
3164         auto IRHS = C->rhs_exprs().begin();
3165         for (const auto *Ref : C->varlists()) {
3166           Data.ReductionVars.emplace_back(Ref);
3167           Data.ReductionCopies.emplace_back(*IPriv);
3168           Data.ReductionOps.emplace_back(*IRed);
3169           LHSs.emplace_back(*ILHS);
3170           RHSs.emplace_back(*IRHS);
3171           std::advance(IPriv, 1);
3172           std::advance(IRed, 1);
3173           std::advance(ILHS, 1);
3174           std::advance(IRHS, 1);
3175         }
3176       }
3177       llvm::Value *ReductionDesc =
3178           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
3179                                                            LHSs, RHSs, Data);
3180       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3181       CGF.EmitVarDecl(*VD);
3182       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3183                             /*Volatile=*/false, E->getType());
3184     }
3185     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3186   };
3187   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3188   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3189 }
3190 
3191 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3192   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3193     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3194       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3195                                 FlushClause->varlist_end());
3196     }
3197     return llvm::None;
3198   }(), S.getLocStart());
3199 }
3200 
3201 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3202                                             const CodeGenLoopTy &CodeGenLoop,
3203                                             Expr *IncExpr) {
3204   // Emit the loop iteration variable.
3205   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3206   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3207   EmitVarDecl(*IVDecl);
3208 
3209   // Emit the iterations count variable.
3210   // If it is not a variable, Sema decided to calculate iterations count on each
3211   // iteration (e.g., it is foldable into a constant).
3212   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3213     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3214     // Emit calculation of the iterations count.
3215     EmitIgnoredExpr(S.getCalcLastIteration());
3216   }
3217 
3218   auto &RT = CGM.getOpenMPRuntime();
3219 
3220   bool HasLastprivateClause = false;
3221   // Check pre-condition.
3222   {
3223     OMPLoopScope PreInitScope(*this, S);
3224     // Skip the entire loop if we don't meet the precondition.
3225     // If the condition constant folds and can be elided, avoid emitting the
3226     // whole loop.
3227     bool CondConstant;
3228     llvm::BasicBlock *ContBlock = nullptr;
3229     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3230       if (!CondConstant)
3231         return;
3232     } else {
3233       auto *ThenBlock = createBasicBlock("omp.precond.then");
3234       ContBlock = createBasicBlock("omp.precond.end");
3235       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3236                   getProfileCount(&S));
3237       EmitBlock(ThenBlock);
3238       incrementProfileCounter(&S);
3239     }
3240 
3241     emitAlignedClause(*this, S);
3242     // Emit 'then' code.
3243     {
3244       // Emit helper vars inits.
3245 
3246       LValue LB = EmitOMPHelperVar(
3247           *this, cast<DeclRefExpr>(
3248                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3249                           ? S.getCombinedLowerBoundVariable()
3250                           : S.getLowerBoundVariable())));
3251       LValue UB = EmitOMPHelperVar(
3252           *this, cast<DeclRefExpr>(
3253                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3254                           ? S.getCombinedUpperBoundVariable()
3255                           : S.getUpperBoundVariable())));
3256       LValue ST =
3257           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3258       LValue IL =
3259           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3260 
3261       OMPPrivateScope LoopScope(*this);
3262       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3263         // Emit implicit barrier to synchronize threads and avoid data races
3264         // on initialization of firstprivate variables and post-update of
3265         // lastprivate variables.
3266         CGM.getOpenMPRuntime().emitBarrierCall(
3267             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3268             /*ForceSimpleCall=*/true);
3269       }
3270       EmitOMPPrivateClause(S, LoopScope);
3271       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3272           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3273           !isOpenMPTeamsDirective(S.getDirectiveKind()))
3274         EmitOMPReductionClauseInit(S, LoopScope);
3275       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3276       EmitOMPPrivateLoopCounters(S, LoopScope);
3277       (void)LoopScope.Privatize();
3278 
3279       // Detect the distribute schedule kind and chunk.
3280       llvm::Value *Chunk = nullptr;
3281       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3282       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3283         ScheduleKind = C->getDistScheduleKind();
3284         if (const auto *Ch = C->getChunkSize()) {
3285           Chunk = EmitScalarExpr(Ch);
3286           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3287                                        S.getIterationVariable()->getType(),
3288                                        S.getLocStart());
3289         }
3290       }
3291       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3292       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3293 
3294       // OpenMP [2.10.8, distribute Construct, Description]
3295       // If dist_schedule is specified, kind must be static. If specified,
3296       // iterations are divided into chunks of size chunk_size, chunks are
3297       // assigned to the teams of the league in a round-robin fashion in the
3298       // order of the team number. When no chunk_size is specified, the
3299       // iteration space is divided into chunks that are approximately equal
3300       // in size, and at most one chunk is distributed to each team of the
3301       // league. The size of the chunks is unspecified in this case.
3302       if (RT.isStaticNonchunked(ScheduleKind,
3303                                 /* Chunked */ Chunk != nullptr)) {
3304         if (isOpenMPSimdDirective(S.getDirectiveKind()))
3305           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
3306         CGOpenMPRuntime::StaticRTInput StaticInit(
3307             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3308             LB.getAddress(), UB.getAddress(), ST.getAddress());
3309         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3310                                     StaticInit);
3311         auto LoopExit =
3312             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3313         // UB = min(UB, GlobalUB);
3314         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3315                             ? S.getCombinedEnsureUpperBound()
3316                             : S.getEnsureUpperBound());
3317         // IV = LB;
3318         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3319                             ? S.getCombinedInit()
3320                             : S.getInit());
3321 
3322         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3323                          ? S.getCombinedCond()
3324                          : S.getCond();
3325 
3326         // for distribute alone,  codegen
3327         // while (idx <= UB) { BODY; ++idx; }
3328         // when combined with 'for' (e.g. as in 'distribute parallel for')
3329         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3330         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3331                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3332                            CodeGenLoop(CGF, S, LoopExit);
3333                          },
3334                          [](CodeGenFunction &) {});
3335         EmitBlock(LoopExit.getBlock());
3336         // Tell the runtime we are done.
3337         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3338       } else {
3339         // Emit the outer loop, which requests its work chunk [LB..UB] from
3340         // runtime and runs the inner loop to process it.
3341         const OMPLoopArguments LoopArguments = {
3342             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3343             Chunk};
3344         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3345                                    CodeGenLoop);
3346       }
3347       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3348         EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3349           return CGF.Builder.CreateIsNotNull(
3350               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3351         });
3352       }
3353       OpenMPDirectiveKind ReductionKind = OMPD_unknown;
3354       if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
3355           isOpenMPSimdDirective(S.getDirectiveKind())) {
3356         ReductionKind = OMPD_parallel_for_simd;
3357       } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3358         ReductionKind = OMPD_parallel_for;
3359       } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3360         ReductionKind = OMPD_simd;
3361       } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
3362                  S.hasClausesOfKind<OMPReductionClause>()) {
3363         llvm_unreachable(
3364             "No reduction clauses is allowed in distribute directive.");
3365       }
3366       EmitOMPReductionClauseFinal(S, ReductionKind);
3367       // Emit post-update of the reduction variables if IsLastIter != 0.
3368       emitPostUpdateForReductionClause(
3369           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3370             return CGF.Builder.CreateIsNotNull(
3371                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3372           });
3373       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3374       if (HasLastprivateClause) {
3375         EmitOMPLastprivateClauseFinal(
3376             S, /*NoFinals=*/false,
3377             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
3378       }
3379     }
3380 
3381     // We're now done with the loop, so jump to the continuation block.
3382     if (ContBlock) {
3383       EmitBranch(ContBlock);
3384       EmitBlock(ContBlock, true);
3385     }
3386   }
3387 }
3388 
3389 void CodeGenFunction::EmitOMPDistributeDirective(
3390     const OMPDistributeDirective &S) {
3391   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3392 
3393     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3394   };
3395   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3396   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3397 }
3398 
3399 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3400                                                    const CapturedStmt *S) {
3401   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3402   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3403   CGF.CapturedStmtInfo = &CapStmtInfo;
3404   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3405   Fn->addFnAttr(llvm::Attribute::NoInline);
3406   return Fn;
3407 }
3408 
3409 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3410   if (S.hasClausesOfKind<OMPDependClause>()) {
3411     assert(!S.getAssociatedStmt() &&
3412            "No associated statement must be in ordered depend construct.");
3413     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3414       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3415     return;
3416   }
3417   auto *C = S.getSingleClause<OMPSIMDClause>();
3418   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3419                                  PrePostActionTy &Action) {
3420     const CapturedStmt *CS = S.getInnermostCapturedStmt();
3421     if (C) {
3422       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3423       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3424       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3425       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3426                                                       OutlinedFn, CapturedVars);
3427     } else {
3428       Action.Enter(CGF);
3429       CGF.EmitStmt(CS->getCapturedStmt());
3430     }
3431   };
3432   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3433   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3434 }
3435 
3436 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3437                                          QualType SrcType, QualType DestType,
3438                                          SourceLocation Loc) {
3439   assert(CGF.hasScalarEvaluationKind(DestType) &&
3440          "DestType must have scalar evaluation kind.");
3441   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3442   return Val.isScalar()
3443              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3444                                         Loc)
3445              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3446                                                  DestType, Loc);
3447 }
3448 
3449 static CodeGenFunction::ComplexPairTy
3450 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3451                       QualType DestType, SourceLocation Loc) {
3452   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3453          "DestType must have complex evaluation kind.");
3454   CodeGenFunction::ComplexPairTy ComplexVal;
3455   if (Val.isScalar()) {
3456     // Convert the input element to the element type of the complex.
3457     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3458     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3459                                               DestElementType, Loc);
3460     ComplexVal = CodeGenFunction::ComplexPairTy(
3461         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3462   } else {
3463     assert(Val.isComplex() && "Must be a scalar or complex.");
3464     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3465     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3466     ComplexVal.first = CGF.EmitScalarConversion(
3467         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3468     ComplexVal.second = CGF.EmitScalarConversion(
3469         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3470   }
3471   return ComplexVal;
3472 }
3473 
3474 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3475                                   LValue LVal, RValue RVal) {
3476   if (LVal.isGlobalReg()) {
3477     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3478   } else {
3479     CGF.EmitAtomicStore(RVal, LVal,
3480                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3481                                  : llvm::AtomicOrdering::Monotonic,
3482                         LVal.isVolatile(), /*IsInit=*/false);
3483   }
3484 }
3485 
3486 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3487                                          QualType RValTy, SourceLocation Loc) {
3488   switch (getEvaluationKind(LVal.getType())) {
3489   case TEK_Scalar:
3490     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3491                                *this, RVal, RValTy, LVal.getType(), Loc)),
3492                            LVal);
3493     break;
3494   case TEK_Complex:
3495     EmitStoreOfComplex(
3496         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3497         /*isInit=*/false);
3498     break;
3499   case TEK_Aggregate:
3500     llvm_unreachable("Must be a scalar or complex.");
3501   }
3502 }
3503 
3504 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3505                                   const Expr *X, const Expr *V,
3506                                   SourceLocation Loc) {
3507   // v = x;
3508   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3509   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3510   LValue XLValue = CGF.EmitLValue(X);
3511   LValue VLValue = CGF.EmitLValue(V);
3512   RValue Res = XLValue.isGlobalReg()
3513                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3514                    : CGF.EmitAtomicLoad(
3515                          XLValue, Loc,
3516                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3517                                   : llvm::AtomicOrdering::Monotonic,
3518                          XLValue.isVolatile());
3519   // OpenMP, 2.12.6, atomic Construct
3520   // Any atomic construct with a seq_cst clause forces the atomically
3521   // performed operation to include an implicit flush operation without a
3522   // list.
3523   if (IsSeqCst)
3524     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3525   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3526 }
3527 
3528 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3529                                    const Expr *X, const Expr *E,
3530                                    SourceLocation Loc) {
3531   // x = expr;
3532   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3533   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3534   // OpenMP, 2.12.6, atomic Construct
3535   // Any atomic construct with a seq_cst clause forces the atomically
3536   // performed operation to include an implicit flush operation without a
3537   // list.
3538   if (IsSeqCst)
3539     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3540 }
3541 
3542 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3543                                                 RValue Update,
3544                                                 BinaryOperatorKind BO,
3545                                                 llvm::AtomicOrdering AO,
3546                                                 bool IsXLHSInRHSPart) {
3547   auto &Context = CGF.CGM.getContext();
3548   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3549   // expression is simple and atomic is allowed for the given type for the
3550   // target platform.
3551   if (BO == BO_Comma || !Update.isScalar() ||
3552       !Update.getScalarVal()->getType()->isIntegerTy() ||
3553       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3554                         (Update.getScalarVal()->getType() !=
3555                          X.getAddress().getElementType())) ||
3556       !X.getAddress().getElementType()->isIntegerTy() ||
3557       !Context.getTargetInfo().hasBuiltinAtomic(
3558           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3559     return std::make_pair(false, RValue::get(nullptr));
3560 
3561   llvm::AtomicRMWInst::BinOp RMWOp;
3562   switch (BO) {
3563   case BO_Add:
3564     RMWOp = llvm::AtomicRMWInst::Add;
3565     break;
3566   case BO_Sub:
3567     if (!IsXLHSInRHSPart)
3568       return std::make_pair(false, RValue::get(nullptr));
3569     RMWOp = llvm::AtomicRMWInst::Sub;
3570     break;
3571   case BO_And:
3572     RMWOp = llvm::AtomicRMWInst::And;
3573     break;
3574   case BO_Or:
3575     RMWOp = llvm::AtomicRMWInst::Or;
3576     break;
3577   case BO_Xor:
3578     RMWOp = llvm::AtomicRMWInst::Xor;
3579     break;
3580   case BO_LT:
3581     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3582                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3583                                    : llvm::AtomicRMWInst::Max)
3584                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3585                                    : llvm::AtomicRMWInst::UMax);
3586     break;
3587   case BO_GT:
3588     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3589                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3590                                    : llvm::AtomicRMWInst::Min)
3591                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3592                                    : llvm::AtomicRMWInst::UMin);
3593     break;
3594   case BO_Assign:
3595     RMWOp = llvm::AtomicRMWInst::Xchg;
3596     break;
3597   case BO_Mul:
3598   case BO_Div:
3599   case BO_Rem:
3600   case BO_Shl:
3601   case BO_Shr:
3602   case BO_LAnd:
3603   case BO_LOr:
3604     return std::make_pair(false, RValue::get(nullptr));
3605   case BO_PtrMemD:
3606   case BO_PtrMemI:
3607   case BO_LE:
3608   case BO_GE:
3609   case BO_EQ:
3610   case BO_NE:
3611   case BO_Cmp:
3612   case BO_AddAssign:
3613   case BO_SubAssign:
3614   case BO_AndAssign:
3615   case BO_OrAssign:
3616   case BO_XorAssign:
3617   case BO_MulAssign:
3618   case BO_DivAssign:
3619   case BO_RemAssign:
3620   case BO_ShlAssign:
3621   case BO_ShrAssign:
3622   case BO_Comma:
3623     llvm_unreachable("Unsupported atomic update operation");
3624   }
3625   auto *UpdateVal = Update.getScalarVal();
3626   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3627     UpdateVal = CGF.Builder.CreateIntCast(
3628         IC, X.getAddress().getElementType(),
3629         X.getType()->hasSignedIntegerRepresentation());
3630   }
3631   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3632   return std::make_pair(true, RValue::get(Res));
3633 }
3634 
3635 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3636     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3637     llvm::AtomicOrdering AO, SourceLocation Loc,
3638     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3639   // Update expressions are allowed to have the following forms:
3640   // x binop= expr; -> xrval + expr;
3641   // x++, ++x -> xrval + 1;
3642   // x--, --x -> xrval - 1;
3643   // x = x binop expr; -> xrval binop expr
3644   // x = expr Op x; - > expr binop xrval;
3645   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3646   if (!Res.first) {
3647     if (X.isGlobalReg()) {
3648       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3649       // 'xrval'.
3650       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3651     } else {
3652       // Perform compare-and-swap procedure.
3653       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3654     }
3655   }
3656   return Res;
3657 }
3658 
3659 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3660                                     const Expr *X, const Expr *E,
3661                                     const Expr *UE, bool IsXLHSInRHSPart,
3662                                     SourceLocation Loc) {
3663   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3664          "Update expr in 'atomic update' must be a binary operator.");
3665   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3666   // Update expressions are allowed to have the following forms:
3667   // x binop= expr; -> xrval + expr;
3668   // x++, ++x -> xrval + 1;
3669   // x--, --x -> xrval - 1;
3670   // x = x binop expr; -> xrval binop expr
3671   // x = expr Op x; - > expr binop xrval;
3672   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3673   LValue XLValue = CGF.EmitLValue(X);
3674   RValue ExprRValue = CGF.EmitAnyExpr(E);
3675   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3676                      : llvm::AtomicOrdering::Monotonic;
3677   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3678   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3679   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3680   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3681   auto Gen =
3682       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3683         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3684         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3685         return CGF.EmitAnyExpr(UE);
3686       };
3687   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3688       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3689   // OpenMP, 2.12.6, atomic Construct
3690   // Any atomic construct with a seq_cst clause forces the atomically
3691   // performed operation to include an implicit flush operation without a
3692   // list.
3693   if (IsSeqCst)
3694     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3695 }
3696 
3697 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3698                             QualType SourceType, QualType ResType,
3699                             SourceLocation Loc) {
3700   switch (CGF.getEvaluationKind(ResType)) {
3701   case TEK_Scalar:
3702     return RValue::get(
3703         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3704   case TEK_Complex: {
3705     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3706     return RValue::getComplex(Res.first, Res.second);
3707   }
3708   case TEK_Aggregate:
3709     break;
3710   }
3711   llvm_unreachable("Must be a scalar or complex.");
3712 }
3713 
3714 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3715                                      bool IsPostfixUpdate, const Expr *V,
3716                                      const Expr *X, const Expr *E,
3717                                      const Expr *UE, bool IsXLHSInRHSPart,
3718                                      SourceLocation Loc) {
3719   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3720   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3721   RValue NewVVal;
3722   LValue VLValue = CGF.EmitLValue(V);
3723   LValue XLValue = CGF.EmitLValue(X);
3724   RValue ExprRValue = CGF.EmitAnyExpr(E);
3725   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3726                      : llvm::AtomicOrdering::Monotonic;
3727   QualType NewVValType;
3728   if (UE) {
3729     // 'x' is updated with some additional value.
3730     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3731            "Update expr in 'atomic capture' must be a binary operator.");
3732     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3733     // Update expressions are allowed to have the following forms:
3734     // x binop= expr; -> xrval + expr;
3735     // x++, ++x -> xrval + 1;
3736     // x--, --x -> xrval - 1;
3737     // x = x binop expr; -> xrval binop expr
3738     // x = expr Op x; - > expr binop xrval;
3739     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3740     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3741     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3742     NewVValType = XRValExpr->getType();
3743     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3744     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3745                   IsPostfixUpdate](RValue XRValue) -> RValue {
3746       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3747       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3748       RValue Res = CGF.EmitAnyExpr(UE);
3749       NewVVal = IsPostfixUpdate ? XRValue : Res;
3750       return Res;
3751     };
3752     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3753         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3754     if (Res.first) {
3755       // 'atomicrmw' instruction was generated.
3756       if (IsPostfixUpdate) {
3757         // Use old value from 'atomicrmw'.
3758         NewVVal = Res.second;
3759       } else {
3760         // 'atomicrmw' does not provide new value, so evaluate it using old
3761         // value of 'x'.
3762         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3763         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3764         NewVVal = CGF.EmitAnyExpr(UE);
3765       }
3766     }
3767   } else {
3768     // 'x' is simply rewritten with some 'expr'.
3769     NewVValType = X->getType().getNonReferenceType();
3770     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3771                                X->getType().getNonReferenceType(), Loc);
3772     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3773       NewVVal = XRValue;
3774       return ExprRValue;
3775     };
3776     // Try to perform atomicrmw xchg, otherwise simple exchange.
3777     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3778         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3779         Loc, Gen);
3780     if (Res.first) {
3781       // 'atomicrmw' instruction was generated.
3782       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3783     }
3784   }
3785   // Emit post-update store to 'v' of old/new 'x' value.
3786   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3787   // OpenMP, 2.12.6, atomic Construct
3788   // Any atomic construct with a seq_cst clause forces the atomically
3789   // performed operation to include an implicit flush operation without a
3790   // list.
3791   if (IsSeqCst)
3792     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3793 }
3794 
3795 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3796                               bool IsSeqCst, bool IsPostfixUpdate,
3797                               const Expr *X, const Expr *V, const Expr *E,
3798                               const Expr *UE, bool IsXLHSInRHSPart,
3799                               SourceLocation Loc) {
3800   switch (Kind) {
3801   case OMPC_read:
3802     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3803     break;
3804   case OMPC_write:
3805     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3806     break;
3807   case OMPC_unknown:
3808   case OMPC_update:
3809     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3810     break;
3811   case OMPC_capture:
3812     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3813                              IsXLHSInRHSPart, Loc);
3814     break;
3815   case OMPC_if:
3816   case OMPC_final:
3817   case OMPC_num_threads:
3818   case OMPC_private:
3819   case OMPC_firstprivate:
3820   case OMPC_lastprivate:
3821   case OMPC_reduction:
3822   case OMPC_task_reduction:
3823   case OMPC_in_reduction:
3824   case OMPC_safelen:
3825   case OMPC_simdlen:
3826   case OMPC_collapse:
3827   case OMPC_default:
3828   case OMPC_seq_cst:
3829   case OMPC_shared:
3830   case OMPC_linear:
3831   case OMPC_aligned:
3832   case OMPC_copyin:
3833   case OMPC_copyprivate:
3834   case OMPC_flush:
3835   case OMPC_proc_bind:
3836   case OMPC_schedule:
3837   case OMPC_ordered:
3838   case OMPC_nowait:
3839   case OMPC_untied:
3840   case OMPC_threadprivate:
3841   case OMPC_depend:
3842   case OMPC_mergeable:
3843   case OMPC_device:
3844   case OMPC_threads:
3845   case OMPC_simd:
3846   case OMPC_map:
3847   case OMPC_num_teams:
3848   case OMPC_thread_limit:
3849   case OMPC_priority:
3850   case OMPC_grainsize:
3851   case OMPC_nogroup:
3852   case OMPC_num_tasks:
3853   case OMPC_hint:
3854   case OMPC_dist_schedule:
3855   case OMPC_defaultmap:
3856   case OMPC_uniform:
3857   case OMPC_to:
3858   case OMPC_from:
3859   case OMPC_use_device_ptr:
3860   case OMPC_is_device_ptr:
3861     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3862   }
3863 }
3864 
3865 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3866   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3867   OpenMPClauseKind Kind = OMPC_unknown;
3868   for (auto *C : S.clauses()) {
3869     // Find first clause (skip seq_cst clause, if it is first).
3870     if (C->getClauseKind() != OMPC_seq_cst) {
3871       Kind = C->getClauseKind();
3872       break;
3873     }
3874   }
3875 
3876   const auto *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
3877   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3878     enterFullExpression(EWC);
3879   }
3880   // Processing for statements under 'atomic capture'.
3881   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3882     for (const auto *C : Compound->body()) {
3883       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3884         enterFullExpression(EWC);
3885       }
3886     }
3887   }
3888 
3889   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3890                                             PrePostActionTy &) {
3891     CGF.EmitStopPoint(CS);
3892     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3893                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3894                       S.isXLHSInRHSPart(), S.getLocStart());
3895   };
3896   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3897   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3898 }
3899 
3900 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3901                                          const OMPExecutableDirective &S,
3902                                          const RegionCodeGenTy &CodeGen) {
3903   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3904   CodeGenModule &CGM = CGF.CGM;
3905 
3906   llvm::Function *Fn = nullptr;
3907   llvm::Constant *FnID = nullptr;
3908 
3909   const Expr *IfCond = nullptr;
3910   // Check for the at most one if clause associated with the target region.
3911   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3912     if (C->getNameModifier() == OMPD_unknown ||
3913         C->getNameModifier() == OMPD_target) {
3914       IfCond = C->getCondition();
3915       break;
3916     }
3917   }
3918 
3919   // Check if we have any device clause associated with the directive.
3920   const Expr *Device = nullptr;
3921   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3922     Device = C->getDevice();
3923   }
3924 
3925   // Check if we have an if clause whose conditional always evaluates to false
3926   // or if we do not have any targets specified. If so the target region is not
3927   // an offload entry point.
3928   bool IsOffloadEntry = true;
3929   if (IfCond) {
3930     bool Val;
3931     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3932       IsOffloadEntry = false;
3933   }
3934   if (CGM.getLangOpts().OMPTargetTriples.empty())
3935     IsOffloadEntry = false;
3936 
3937   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3938   StringRef ParentName;
3939   // In case we have Ctors/Dtors we use the complete type variant to produce
3940   // the mangling of the device outlined kernel.
3941   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3942     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3943   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3944     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3945   else
3946     ParentName =
3947         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3948 
3949   // Emit target region as a standalone region.
3950   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3951                                                     IsOffloadEntry, CodeGen);
3952   OMPLexicalScope Scope(CGF, S, OMPD_task);
3953   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
3954 }
3955 
3956 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3957                              PrePostActionTy &Action) {
3958   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3959   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3960   CGF.EmitOMPPrivateClause(S, PrivateScope);
3961   (void)PrivateScope.Privatize();
3962 
3963   Action.Enter(CGF);
3964   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
3965 }
3966 
3967 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3968                                                   StringRef ParentName,
3969                                                   const OMPTargetDirective &S) {
3970   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3971     emitTargetRegion(CGF, S, Action);
3972   };
3973   llvm::Function *Fn;
3974   llvm::Constant *Addr;
3975   // Emit target region as a standalone region.
3976   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3977       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3978   assert(Fn && Addr && "Target device function emission failed.");
3979 }
3980 
3981 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3982   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3983     emitTargetRegion(CGF, S, Action);
3984   };
3985   emitCommonOMPTargetDirective(*this, S, CodeGen);
3986 }
3987 
3988 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3989                                         const OMPExecutableDirective &S,
3990                                         OpenMPDirectiveKind InnermostKind,
3991                                         const RegionCodeGenTy &CodeGen) {
3992   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3993   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3994       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3995 
3996   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3997   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3998   if (NT || TL) {
3999     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
4000     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
4001 
4002     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
4003                                                   S.getLocStart());
4004   }
4005 
4006   OMPTeamsScope Scope(CGF, S);
4007   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4008   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4009   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
4010                                            CapturedVars);
4011 }
4012 
4013 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
4014   // Emit teams region as a standalone region.
4015   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4016     OMPPrivateScope PrivateScope(CGF);
4017     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4018     CGF.EmitOMPPrivateClause(S, PrivateScope);
4019     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4020     (void)PrivateScope.Privatize();
4021     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
4022     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4023   };
4024   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4025   emitPostUpdateForReductionClause(
4026       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4027 }
4028 
4029 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4030                                   const OMPTargetTeamsDirective &S) {
4031   auto *CS = S.getCapturedStmt(OMPD_teams);
4032   Action.Enter(CGF);
4033   // Emit teams region as a standalone region.
4034   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4035     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4036     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4037     CGF.EmitOMPPrivateClause(S, PrivateScope);
4038     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4039     (void)PrivateScope.Privatize();
4040     Action.Enter(CGF);
4041     CGF.EmitStmt(CS->getCapturedStmt());
4042     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4043   };
4044   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
4045   emitPostUpdateForReductionClause(
4046       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4047 }
4048 
4049 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
4050     CodeGenModule &CGM, StringRef ParentName,
4051     const OMPTargetTeamsDirective &S) {
4052   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4053     emitTargetTeamsRegion(CGF, Action, S);
4054   };
4055   llvm::Function *Fn;
4056   llvm::Constant *Addr;
4057   // Emit target region as a standalone region.
4058   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4059       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4060   assert(Fn && Addr && "Target device function emission failed.");
4061 }
4062 
4063 void CodeGenFunction::EmitOMPTargetTeamsDirective(
4064     const OMPTargetTeamsDirective &S) {
4065   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4066     emitTargetTeamsRegion(CGF, Action, S);
4067   };
4068   emitCommonOMPTargetDirective(*this, S, CodeGen);
4069 }
4070 
4071 static void
4072 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4073                                 const OMPTargetTeamsDistributeDirective &S) {
4074   Action.Enter(CGF);
4075   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4076     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4077   };
4078 
4079   // Emit teams region as a standalone region.
4080   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4081                                             PrePostActionTy &) {
4082     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4083     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4084     (void)PrivateScope.Privatize();
4085     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4086                                                     CodeGenDistribute);
4087     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4088   };
4089   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
4090   emitPostUpdateForReductionClause(CGF, S,
4091                                    [](CodeGenFunction &) { return nullptr; });
4092 }
4093 
4094 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
4095     CodeGenModule &CGM, StringRef ParentName,
4096     const OMPTargetTeamsDistributeDirective &S) {
4097   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4098     emitTargetTeamsDistributeRegion(CGF, Action, S);
4099   };
4100   llvm::Function *Fn;
4101   llvm::Constant *Addr;
4102   // Emit target region as a standalone region.
4103   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4104       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4105   assert(Fn && Addr && "Target device function emission failed.");
4106 }
4107 
4108 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
4109     const OMPTargetTeamsDistributeDirective &S) {
4110   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4111     emitTargetTeamsDistributeRegion(CGF, Action, S);
4112   };
4113   emitCommonOMPTargetDirective(*this, S, CodeGen);
4114 }
4115 
4116 static void emitTargetTeamsDistributeSimdRegion(
4117     CodeGenFunction &CGF, PrePostActionTy &Action,
4118     const OMPTargetTeamsDistributeSimdDirective &S) {
4119   Action.Enter(CGF);
4120   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4121     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4122   };
4123 
4124   // Emit teams region as a standalone region.
4125   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4126                                             PrePostActionTy &) {
4127     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4128     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4129     (void)PrivateScope.Privatize();
4130     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4131                                                     CodeGenDistribute);
4132     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4133   };
4134   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
4135   emitPostUpdateForReductionClause(CGF, S,
4136                                    [](CodeGenFunction &) { return nullptr; });
4137 }
4138 
4139 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
4140     CodeGenModule &CGM, StringRef ParentName,
4141     const OMPTargetTeamsDistributeSimdDirective &S) {
4142   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4143     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4144   };
4145   llvm::Function *Fn;
4146   llvm::Constant *Addr;
4147   // Emit target region as a standalone region.
4148   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4149       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4150   assert(Fn && Addr && "Target device function emission failed.");
4151 }
4152 
4153 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
4154     const OMPTargetTeamsDistributeSimdDirective &S) {
4155   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4156     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4157   };
4158   emitCommonOMPTargetDirective(*this, S, CodeGen);
4159 }
4160 
4161 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
4162     const OMPTeamsDistributeDirective &S) {
4163 
4164   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4165     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4166   };
4167 
4168   // Emit teams region as a standalone region.
4169   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4170                                             PrePostActionTy &) {
4171     OMPPrivateScope PrivateScope(CGF);
4172     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4173     (void)PrivateScope.Privatize();
4174     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4175                                                     CodeGenDistribute);
4176     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4177   };
4178   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4179   emitPostUpdateForReductionClause(*this, S,
4180                                    [](CodeGenFunction &) { return nullptr; });
4181 }
4182 
4183 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
4184     const OMPTeamsDistributeSimdDirective &S) {
4185   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4186     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4187   };
4188 
4189   // Emit teams region as a standalone region.
4190   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4191                                             PrePostActionTy &) {
4192     OMPPrivateScope PrivateScope(CGF);
4193     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4194     (void)PrivateScope.Privatize();
4195     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
4196                                                     CodeGenDistribute);
4197     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4198   };
4199   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
4200   emitPostUpdateForReductionClause(*this, S,
4201                                    [](CodeGenFunction &) { return nullptr; });
4202 }
4203 
4204 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
4205     const OMPTeamsDistributeParallelForDirective &S) {
4206   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4207     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4208                               S.getDistInc());
4209   };
4210 
4211   // Emit teams region as a standalone region.
4212   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4213                                             PrePostActionTy &) {
4214     OMPPrivateScope PrivateScope(CGF);
4215     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4216     (void)PrivateScope.Privatize();
4217     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4218                                                     CodeGenDistribute);
4219     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4220   };
4221   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4222   emitPostUpdateForReductionClause(*this, S,
4223                                    [](CodeGenFunction &) { return nullptr; });
4224 }
4225 
4226 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
4227     const OMPTeamsDistributeParallelForSimdDirective &S) {
4228   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4229     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4230                               S.getDistInc());
4231   };
4232 
4233   // Emit teams region as a standalone region.
4234   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4235                                             PrePostActionTy &) {
4236     OMPPrivateScope PrivateScope(CGF);
4237     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4238     (void)PrivateScope.Privatize();
4239     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4240         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4241     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4242   };
4243   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4244   emitPostUpdateForReductionClause(*this, S,
4245                                    [](CodeGenFunction &) { return nullptr; });
4246 }
4247 
4248 static void emitTargetTeamsDistributeParallelForRegion(
4249     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
4250     PrePostActionTy &Action) {
4251   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4252     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4253                               S.getDistInc());
4254   };
4255 
4256   // Emit teams region as a standalone region.
4257   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4258                                                  PrePostActionTy &) {
4259     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4260     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4261     (void)PrivateScope.Privatize();
4262     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4263         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4264     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4265   };
4266 
4267   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
4268                               CodeGenTeams);
4269   emitPostUpdateForReductionClause(CGF, S,
4270                                    [](CodeGenFunction &) { return nullptr; });
4271 }
4272 
4273 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
4274     CodeGenModule &CGM, StringRef ParentName,
4275     const OMPTargetTeamsDistributeParallelForDirective &S) {
4276   // Emit SPMD target teams distribute parallel for region as a standalone
4277   // region.
4278   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4279     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4280   };
4281   llvm::Function *Fn;
4282   llvm::Constant *Addr;
4283   // Emit target region as a standalone region.
4284   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4285       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4286   assert(Fn && Addr && "Target device function emission failed.");
4287 }
4288 
4289 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
4290     const OMPTargetTeamsDistributeParallelForDirective &S) {
4291   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4292     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4293   };
4294   emitCommonOMPTargetDirective(*this, S, CodeGen);
4295 }
4296 
4297 static void emitTargetTeamsDistributeParallelForSimdRegion(
4298     CodeGenFunction &CGF,
4299     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
4300     PrePostActionTy &Action) {
4301   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4302     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4303                               S.getDistInc());
4304   };
4305 
4306   // Emit teams region as a standalone region.
4307   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4308                                                  PrePostActionTy &) {
4309     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4310     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4311     (void)PrivateScope.Privatize();
4312     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4313         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4314     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4315   };
4316 
4317   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
4318                               CodeGenTeams);
4319   emitPostUpdateForReductionClause(CGF, S,
4320                                    [](CodeGenFunction &) { return nullptr; });
4321 }
4322 
4323 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
4324     CodeGenModule &CGM, StringRef ParentName,
4325     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4326   // Emit SPMD target teams distribute parallel for simd region as a standalone
4327   // region.
4328   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4329     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4330   };
4331   llvm::Function *Fn;
4332   llvm::Constant *Addr;
4333   // Emit target region as a standalone region.
4334   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4335       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4336   assert(Fn && Addr && "Target device function emission failed.");
4337 }
4338 
4339 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
4340     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4341   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4342     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4343   };
4344   emitCommonOMPTargetDirective(*this, S, CodeGen);
4345 }
4346 
4347 void CodeGenFunction::EmitOMPCancellationPointDirective(
4348     const OMPCancellationPointDirective &S) {
4349   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
4350                                                    S.getCancelRegion());
4351 }
4352 
4353 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
4354   const Expr *IfCond = nullptr;
4355   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4356     if (C->getNameModifier() == OMPD_unknown ||
4357         C->getNameModifier() == OMPD_cancel) {
4358       IfCond = C->getCondition();
4359       break;
4360     }
4361   }
4362   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
4363                                         S.getCancelRegion());
4364 }
4365 
4366 CodeGenFunction::JumpDest
4367 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
4368   if (Kind == OMPD_parallel || Kind == OMPD_task ||
4369       Kind == OMPD_target_parallel)
4370     return ReturnBlock;
4371   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
4372          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
4373          Kind == OMPD_distribute_parallel_for ||
4374          Kind == OMPD_target_parallel_for ||
4375          Kind == OMPD_teams_distribute_parallel_for ||
4376          Kind == OMPD_target_teams_distribute_parallel_for);
4377   return OMPCancelStack.getExitBlock();
4378 }
4379 
4380 void CodeGenFunction::EmitOMPUseDevicePtrClause(
4381     const OMPClause &NC, OMPPrivateScope &PrivateScope,
4382     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
4383   const auto &C = cast<OMPUseDevicePtrClause>(NC);
4384   auto OrigVarIt = C.varlist_begin();
4385   auto InitIt = C.inits().begin();
4386   for (auto PvtVarIt : C.private_copies()) {
4387     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
4388     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
4389     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
4390 
4391     // In order to identify the right initializer we need to match the
4392     // declaration used by the mapping logic. In some cases we may get
4393     // OMPCapturedExprDecl that refers to the original declaration.
4394     const ValueDecl *MatchingVD = OrigVD;
4395     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
4396       // OMPCapturedExprDecl are used to privative fields of the current
4397       // structure.
4398       auto *ME = cast<MemberExpr>(OED->getInit());
4399       assert(isa<CXXThisExpr>(ME->getBase()) &&
4400              "Base should be the current struct!");
4401       MatchingVD = ME->getMemberDecl();
4402     }
4403 
4404     // If we don't have information about the current list item, move on to
4405     // the next one.
4406     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
4407     if (InitAddrIt == CaptureDeviceAddrMap.end())
4408       continue;
4409 
4410     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
4411       // Initialize the temporary initialization variable with the address we
4412       // get from the runtime library. We have to cast the source address
4413       // because it is always a void *. References are materialized in the
4414       // privatization scope, so the initialization here disregards the fact
4415       // the original variable is a reference.
4416       QualType AddrQTy =
4417           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
4418       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
4419       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
4420       setAddrOfLocalVar(InitVD, InitAddr);
4421 
4422       // Emit private declaration, it will be initialized by the value we
4423       // declaration we just added to the local declarations map.
4424       EmitDecl(*PvtVD);
4425 
4426       // The initialization variables reached its purpose in the emission
4427       // ofthe previous declaration, so we don't need it anymore.
4428       LocalDeclMap.erase(InitVD);
4429 
4430       // Return the address of the private variable.
4431       return GetAddrOfLocalVar(PvtVD);
4432     });
4433     assert(IsRegistered && "firstprivate var already registered as private");
4434     // Silence the warning about unused variable.
4435     (void)IsRegistered;
4436 
4437     ++OrigVarIt;
4438     ++InitIt;
4439   }
4440 }
4441 
4442 // Generate the instructions for '#pragma omp target data' directive.
4443 void CodeGenFunction::EmitOMPTargetDataDirective(
4444     const OMPTargetDataDirective &S) {
4445   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4446 
4447   // Create a pre/post action to signal the privatization of the device pointer.
4448   // This action can be replaced by the OpenMP runtime code generation to
4449   // deactivate privatization.
4450   bool PrivatizeDevicePointers = false;
4451   class DevicePointerPrivActionTy : public PrePostActionTy {
4452     bool &PrivatizeDevicePointers;
4453 
4454   public:
4455     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4456         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4457     void Enter(CodeGenFunction &CGF) override {
4458       PrivatizeDevicePointers = true;
4459     }
4460   };
4461   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4462 
4463   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4464                        CodeGenFunction &CGF, PrePostActionTy &Action) {
4465     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4466       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4467     };
4468 
4469     // Codegen that selects wheather to generate the privatization code or not.
4470     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4471                           &InnermostCodeGen](CodeGenFunction &CGF,
4472                                              PrePostActionTy &Action) {
4473       RegionCodeGenTy RCG(InnermostCodeGen);
4474       PrivatizeDevicePointers = false;
4475 
4476       // Call the pre-action to change the status of PrivatizeDevicePointers if
4477       // needed.
4478       Action.Enter(CGF);
4479 
4480       if (PrivatizeDevicePointers) {
4481         OMPPrivateScope PrivateScope(CGF);
4482         // Emit all instances of the use_device_ptr clause.
4483         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4484           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4485                                         Info.CaptureDeviceAddrMap);
4486         (void)PrivateScope.Privatize();
4487         RCG(CGF);
4488       } else
4489         RCG(CGF);
4490     };
4491 
4492     // Forward the provided action to the privatization codegen.
4493     RegionCodeGenTy PrivRCG(PrivCodeGen);
4494     PrivRCG.setAction(Action);
4495 
4496     // Notwithstanding the body of the region is emitted as inlined directive,
4497     // we don't use an inline scope as changes in the references inside the
4498     // region are expected to be visible outside, so we do not privative them.
4499     OMPLexicalScope Scope(CGF, S);
4500     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4501                                                     PrivRCG);
4502   };
4503 
4504   RegionCodeGenTy RCG(CodeGen);
4505 
4506   // If we don't have target devices, don't bother emitting the data mapping
4507   // code.
4508   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4509     RCG(*this);
4510     return;
4511   }
4512 
4513   // Check if we have any if clause associated with the directive.
4514   const Expr *IfCond = nullptr;
4515   if (auto *C = S.getSingleClause<OMPIfClause>())
4516     IfCond = C->getCondition();
4517 
4518   // Check if we have any device clause associated with the directive.
4519   const Expr *Device = nullptr;
4520   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4521     Device = C->getDevice();
4522 
4523   // Set the action to signal privatization of device pointers.
4524   RCG.setAction(PrivAction);
4525 
4526   // Emit region code.
4527   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4528                                              Info);
4529 }
4530 
4531 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4532     const OMPTargetEnterDataDirective &S) {
4533   // If we don't have target devices, don't bother emitting the data mapping
4534   // code.
4535   if (CGM.getLangOpts().OMPTargetTriples.empty())
4536     return;
4537 
4538   // Check if we have any if clause associated with the directive.
4539   const Expr *IfCond = nullptr;
4540   if (auto *C = S.getSingleClause<OMPIfClause>())
4541     IfCond = C->getCondition();
4542 
4543   // Check if we have any device clause associated with the directive.
4544   const Expr *Device = nullptr;
4545   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4546     Device = C->getDevice();
4547 
4548   OMPLexicalScope Scope(*this, S, OMPD_task);
4549   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4550 }
4551 
4552 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4553     const OMPTargetExitDataDirective &S) {
4554   // If we don't have target devices, don't bother emitting the data mapping
4555   // code.
4556   if (CGM.getLangOpts().OMPTargetTriples.empty())
4557     return;
4558 
4559   // Check if we have any if clause associated with the directive.
4560   const Expr *IfCond = nullptr;
4561   if (auto *C = S.getSingleClause<OMPIfClause>())
4562     IfCond = C->getCondition();
4563 
4564   // Check if we have any device clause associated with the directive.
4565   const Expr *Device = nullptr;
4566   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4567     Device = C->getDevice();
4568 
4569   OMPLexicalScope Scope(*this, S, OMPD_task);
4570   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4571 }
4572 
4573 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4574                                      const OMPTargetParallelDirective &S,
4575                                      PrePostActionTy &Action) {
4576   // Get the captured statement associated with the 'parallel' region.
4577   auto *CS = S.getCapturedStmt(OMPD_parallel);
4578   Action.Enter(CGF);
4579   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4580     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4581     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4582     CGF.EmitOMPPrivateClause(S, PrivateScope);
4583     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4584     (void)PrivateScope.Privatize();
4585     // TODO: Add support for clauses.
4586     CGF.EmitStmt(CS->getCapturedStmt());
4587     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4588   };
4589   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4590                                  emitEmptyBoundParameters);
4591   emitPostUpdateForReductionClause(
4592       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4593 }
4594 
4595 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4596     CodeGenModule &CGM, StringRef ParentName,
4597     const OMPTargetParallelDirective &S) {
4598   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4599     emitTargetParallelRegion(CGF, S, Action);
4600   };
4601   llvm::Function *Fn;
4602   llvm::Constant *Addr;
4603   // Emit target region as a standalone region.
4604   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4605       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4606   assert(Fn && Addr && "Target device function emission failed.");
4607 }
4608 
4609 void CodeGenFunction::EmitOMPTargetParallelDirective(
4610     const OMPTargetParallelDirective &S) {
4611   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4612     emitTargetParallelRegion(CGF, S, Action);
4613   };
4614   emitCommonOMPTargetDirective(*this, S, CodeGen);
4615 }
4616 
4617 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4618                                         const OMPTargetParallelForDirective &S,
4619                                         PrePostActionTy &Action) {
4620   Action.Enter(CGF);
4621   // Emit directive as a combined directive that consists of two implicit
4622   // directives: 'parallel' with 'for' directive.
4623   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4624     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4625         CGF, OMPD_target_parallel_for, S.hasCancel());
4626     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4627                                emitDispatchForLoopBounds);
4628   };
4629   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4630                                  emitEmptyBoundParameters);
4631 }
4632 
4633 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4634     CodeGenModule &CGM, StringRef ParentName,
4635     const OMPTargetParallelForDirective &S) {
4636   // Emit SPMD target parallel for region as a standalone region.
4637   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4638     emitTargetParallelForRegion(CGF, S, Action);
4639   };
4640   llvm::Function *Fn;
4641   llvm::Constant *Addr;
4642   // Emit target region as a standalone region.
4643   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4644       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4645   assert(Fn && Addr && "Target device function emission failed.");
4646 }
4647 
4648 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4649     const OMPTargetParallelForDirective &S) {
4650   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4651     emitTargetParallelForRegion(CGF, S, Action);
4652   };
4653   emitCommonOMPTargetDirective(*this, S, CodeGen);
4654 }
4655 
4656 static void
4657 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4658                                 const OMPTargetParallelForSimdDirective &S,
4659                                 PrePostActionTy &Action) {
4660   Action.Enter(CGF);
4661   // Emit directive as a combined directive that consists of two implicit
4662   // directives: 'parallel' with 'for' directive.
4663   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4664     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4665                                emitDispatchForLoopBounds);
4666   };
4667   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4668                                  emitEmptyBoundParameters);
4669 }
4670 
4671 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4672     CodeGenModule &CGM, StringRef ParentName,
4673     const OMPTargetParallelForSimdDirective &S) {
4674   // Emit SPMD target parallel for region as a standalone region.
4675   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4676     emitTargetParallelForSimdRegion(CGF, S, Action);
4677   };
4678   llvm::Function *Fn;
4679   llvm::Constant *Addr;
4680   // Emit target region as a standalone region.
4681   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4682       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4683   assert(Fn && Addr && "Target device function emission failed.");
4684 }
4685 
4686 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4687     const OMPTargetParallelForSimdDirective &S) {
4688   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4689     emitTargetParallelForSimdRegion(CGF, S, Action);
4690   };
4691   emitCommonOMPTargetDirective(*this, S, CodeGen);
4692 }
4693 
4694 /// Emit a helper variable and return corresponding lvalue.
4695 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4696                      const ImplicitParamDecl *PVD,
4697                      CodeGenFunction::OMPPrivateScope &Privates) {
4698   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4699   Privates.addPrivate(
4700       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4701 }
4702 
4703 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4704   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4705   // Emit outlined function for task construct.
4706   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
4707   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4708   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4709   const Expr *IfCond = nullptr;
4710   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4711     if (C->getNameModifier() == OMPD_unknown ||
4712         C->getNameModifier() == OMPD_taskloop) {
4713       IfCond = C->getCondition();
4714       break;
4715     }
4716   }
4717 
4718   OMPTaskDataTy Data;
4719   // Check if taskloop must be emitted without taskgroup.
4720   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4721   // TODO: Check if we should emit tied or untied task.
4722   Data.Tied = true;
4723   // Set scheduling for taskloop
4724   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4725     // grainsize clause
4726     Data.Schedule.setInt(/*IntVal=*/false);
4727     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4728   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4729     // num_tasks clause
4730     Data.Schedule.setInt(/*IntVal=*/true);
4731     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4732   }
4733 
4734   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4735     // if (PreCond) {
4736     //   for (IV in 0..LastIteration) BODY;
4737     //   <Final counter/linear vars updates>;
4738     // }
4739     //
4740 
4741     // Emit: if (PreCond) - begin.
4742     // If the condition constant folds and can be elided, avoid emitting the
4743     // whole loop.
4744     bool CondConstant;
4745     llvm::BasicBlock *ContBlock = nullptr;
4746     OMPLoopScope PreInitScope(CGF, S);
4747     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4748       if (!CondConstant)
4749         return;
4750     } else {
4751       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4752       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4753       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4754                   CGF.getProfileCount(&S));
4755       CGF.EmitBlock(ThenBlock);
4756       CGF.incrementProfileCounter(&S);
4757     }
4758 
4759     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4760       CGF.EmitOMPSimdInit(S);
4761 
4762     OMPPrivateScope LoopScope(CGF);
4763     // Emit helper vars inits.
4764     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4765     auto *I = CS->getCapturedDecl()->param_begin();
4766     auto *LBP = std::next(I, LowerBound);
4767     auto *UBP = std::next(I, UpperBound);
4768     auto *STP = std::next(I, Stride);
4769     auto *LIP = std::next(I, LastIter);
4770     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4771              LoopScope);
4772     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4773              LoopScope);
4774     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4775     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4776              LoopScope);
4777     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4778     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4779     (void)LoopScope.Privatize();
4780     // Emit the loop iteration variable.
4781     const Expr *IVExpr = S.getIterationVariable();
4782     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4783     CGF.EmitVarDecl(*IVDecl);
4784     CGF.EmitIgnoredExpr(S.getInit());
4785 
4786     // Emit the iterations count variable.
4787     // If it is not a variable, Sema decided to calculate iterations count on
4788     // each iteration (e.g., it is foldable into a constant).
4789     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4790       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4791       // Emit calculation of the iterations count.
4792       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4793     }
4794 
4795     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4796                          S.getInc(),
4797                          [&S](CodeGenFunction &CGF) {
4798                            CGF.EmitOMPLoopBody(S, JumpDest());
4799                            CGF.EmitStopPoint(&S);
4800                          },
4801                          [](CodeGenFunction &) {});
4802     // Emit: if (PreCond) - end.
4803     if (ContBlock) {
4804       CGF.EmitBranch(ContBlock);
4805       CGF.EmitBlock(ContBlock, true);
4806     }
4807     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4808     if (HasLastprivateClause) {
4809       CGF.EmitOMPLastprivateClauseFinal(
4810           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4811           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4812               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4813               (*LIP)->getType(), S.getLocStart())));
4814     }
4815   };
4816   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4817                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4818                             const OMPTaskDataTy &Data) {
4819     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4820       OMPLoopScope PreInitScope(CGF, S);
4821       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4822                                                   OutlinedFn, SharedsTy,
4823                                                   CapturedStruct, IfCond, Data);
4824     };
4825     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4826                                                     CodeGen);
4827   };
4828   if (Data.Nogroup) {
4829     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
4830   } else {
4831     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4832         *this,
4833         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4834                                         PrePostActionTy &Action) {
4835           Action.Enter(CGF);
4836           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
4837                                         Data);
4838         },
4839         S.getLocStart());
4840   }
4841 }
4842 
4843 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4844   EmitOMPTaskLoopBasedDirective(S);
4845 }
4846 
4847 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4848     const OMPTaskLoopSimdDirective &S) {
4849   EmitOMPTaskLoopBasedDirective(S);
4850 }
4851 
4852 // Generate the instructions for '#pragma omp target update' directive.
4853 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4854     const OMPTargetUpdateDirective &S) {
4855   // If we don't have target devices, don't bother emitting the data mapping
4856   // code.
4857   if (CGM.getLangOpts().OMPTargetTriples.empty())
4858     return;
4859 
4860   // Check if we have any if clause associated with the directive.
4861   const Expr *IfCond = nullptr;
4862   if (auto *C = S.getSingleClause<OMPIfClause>())
4863     IfCond = C->getCondition();
4864 
4865   // Check if we have any device clause associated with the directive.
4866   const Expr *Device = nullptr;
4867   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4868     Device = C->getDevice();
4869 
4870   OMPLexicalScope Scope(*this, S, OMPD_task);
4871   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4872 }
4873 
4874 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
4875     const OMPExecutableDirective &D) {
4876   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
4877     return;
4878   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
4879     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
4880       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
4881     } else {
4882       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
4883         for (const auto *E : LD->counters()) {
4884           if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
4885                   cast<DeclRefExpr>(E)->getDecl())) {
4886             // Emit only those that were not explicitly referenced in clauses.
4887             if (!CGF.LocalDeclMap.count(VD))
4888               CGF.EmitVarDecl(*VD);
4889           }
4890         }
4891       }
4892       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
4893     }
4894   };
4895   OMPSimdLexicalScope Scope(*this, D);
4896   CGM.getOpenMPRuntime().emitInlinedDirective(
4897       *this,
4898       isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
4899                                                   : D.getDirectiveKind(),
4900       CodeGen);
4901 }
4902