1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             assert(VD == VD->getCanonicalDecl() &&
69                         "Canonical decl must be captured.");
70             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
71                             isCapturedVar(CGF, VD) ||
72                                 (CGF.CapturedStmtInfo &&
73                                  InlinedShareds.isGlobalVarCaptured(VD)),
74                             VD->getType().getNonReferenceType(), VK_LValue,
75                             SourceLocation());
76             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
77               return CGF.EmitLValue(&DRE).getAddress();
78             });
79           }
80         }
81         (void)InlinedShareds.Privatize();
82       }
83     }
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S,
100                         /*AsInlined=*/false,
101                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S,
116                         /*AsInlined=*/false,
117                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 };
119 
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
123   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
125       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
126         for (const auto *I : PreInits->decls())
127           CGF.EmitVarDecl(cast<VarDecl>(*I));
128       }
129     }
130   }
131 
132 public:
133   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
134       : CodeGenFunction::RunCleanupsScope(CGF) {
135     emitPreInitStmt(CGF, S);
136   }
137 };
138 
139 } // namespace
140 
141 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
142   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
143     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
144       OrigVD = OrigVD->getCanonicalDecl();
145       bool IsCaptured =
146           LambdaCaptureFields.lookup(OrigVD) ||
147           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
148           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
149       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
150                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
151       return EmitLValue(&DRE);
152     }
153   }
154   return EmitLValue(E);
155 }
156 
157 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
158   auto &C = getContext();
159   llvm::Value *Size = nullptr;
160   auto SizeInChars = C.getTypeSizeInChars(Ty);
161   if (SizeInChars.isZero()) {
162     // getTypeSizeInChars() returns 0 for a VLA.
163     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
164       llvm::Value *ArraySize;
165       std::tie(ArraySize, Ty) = getVLASize(VAT);
166       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
167     }
168     SizeInChars = C.getTypeSizeInChars(Ty);
169     if (SizeInChars.isZero())
170       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
171     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
172   } else
173     Size = CGM.getSize(SizeInChars);
174   return Size;
175 }
176 
177 void CodeGenFunction::GenerateOpenMPCapturedVars(
178     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
179   const RecordDecl *RD = S.getCapturedRecordDecl();
180   auto CurField = RD->field_begin();
181   auto CurCap = S.captures().begin();
182   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
183                                                  E = S.capture_init_end();
184        I != E; ++I, ++CurField, ++CurCap) {
185     if (CurField->hasCapturedVLAType()) {
186       auto VAT = CurField->getCapturedVLAType();
187       auto *Val = VLASizeMap[VAT->getSizeExpr()];
188       CapturedVars.push_back(Val);
189     } else if (CurCap->capturesThis())
190       CapturedVars.push_back(CXXThisValue);
191     else if (CurCap->capturesVariableByCopy()) {
192       llvm::Value *CV =
193           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
194 
195       // If the field is not a pointer, we need to save the actual value
196       // and load it as a void pointer.
197       if (!CurField->getType()->isAnyPointerType()) {
198         auto &Ctx = getContext();
199         auto DstAddr = CreateMemTemp(
200             Ctx.getUIntPtrType(),
201             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
202         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
203 
204         auto *SrcAddrVal = EmitScalarConversion(
205             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
206             Ctx.getPointerType(CurField->getType()), SourceLocation());
207         LValue SrcLV =
208             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
209 
210         // Store the value using the source type pointer.
211         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
212 
213         // Load the value using the destination type pointer.
214         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
215       }
216       CapturedVars.push_back(CV);
217     } else {
218       assert(CurCap->capturesVariable() && "Expected capture by reference.");
219       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
220     }
221   }
222 }
223 
224 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
225                                     StringRef Name, LValue AddrLV,
226                                     bool isReferenceType = false) {
227   ASTContext &Ctx = CGF.getContext();
228 
229   auto *CastedPtr = CGF.EmitScalarConversion(
230       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
231       Ctx.getPointerType(DstType), SourceLocation());
232   auto TmpAddr =
233       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
234           .getAddress();
235 
236   // If we are dealing with references we need to return the address of the
237   // reference instead of the reference of the value.
238   if (isReferenceType) {
239     QualType RefType = Ctx.getLValueReferenceType(DstType);
240     auto *RefVal = TmpAddr.getPointer();
241     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
242     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
243     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
244   }
245 
246   return TmpAddr;
247 }
248 
249 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
250   if (T->isLValueReferenceType()) {
251     return C.getLValueReferenceType(
252         getCanonicalParamType(C, T.getNonReferenceType()),
253         /*SpelledAsLValue=*/false);
254   }
255   if (T->isPointerType())
256     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
257   if (auto *A = T->getAsArrayTypeUnsafe()) {
258     if (auto *VLA = dyn_cast<VariableArrayType>(A))
259       return getCanonicalParamType(C, VLA->getElementType());
260     else if (!A->isVariablyModifiedType())
261       return C.getCanonicalType(T);
262   }
263   return C.getCanonicalParamType(T);
264 }
265 
266 namespace {
267   /// Contains required data for proper outlined function codegen.
268   struct FunctionOptions {
269     /// Captured statement for which the function is generated.
270     const CapturedStmt *S = nullptr;
271     /// true if cast to/from  UIntPtr is required for variables captured by
272     /// value.
273     const bool UIntPtrCastRequired = true;
274     /// true if only casted arguments must be registered as local args or VLA
275     /// sizes.
276     const bool RegisterCastedArgsOnly = false;
277     /// Name of the generated function.
278     const StringRef FunctionName;
279     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
280                              bool RegisterCastedArgsOnly,
281                              StringRef FunctionName)
282         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
283           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
284           FunctionName(FunctionName) {}
285   };
286 }
287 
288 static llvm::Function *emitOutlinedFunctionPrologue(
289     CodeGenFunction &CGF, FunctionArgList &Args,
290     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
291         &LocalAddrs,
292     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
293         &VLASizes,
294     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
295   const CapturedDecl *CD = FO.S->getCapturedDecl();
296   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
297   assert(CD->hasBody() && "missing CapturedDecl body");
298 
299   CXXThisValue = nullptr;
300   // Build the argument list.
301   CodeGenModule &CGM = CGF.CGM;
302   ASTContext &Ctx = CGM.getContext();
303   FunctionArgList TargetArgs;
304   Args.append(CD->param_begin(),
305               std::next(CD->param_begin(), CD->getContextParamPosition()));
306   TargetArgs.append(
307       CD->param_begin(),
308       std::next(CD->param_begin(), CD->getContextParamPosition()));
309   auto I = FO.S->captures().begin();
310   for (auto *FD : RD->fields()) {
311     QualType ArgType = FD->getType();
312     IdentifierInfo *II = nullptr;
313     VarDecl *CapVar = nullptr;
314 
315     // If this is a capture by copy and the type is not a pointer, the outlined
316     // function argument type should be uintptr and the value properly casted to
317     // uintptr. This is necessary given that the runtime library is only able to
318     // deal with pointers. We can pass in the same way the VLA type sizes to the
319     // outlined function.
320     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
321         I->capturesVariableArrayType()) {
322       if (FO.UIntPtrCastRequired)
323         ArgType = Ctx.getUIntPtrType();
324     }
325 
326     if (I->capturesVariable() || I->capturesVariableByCopy()) {
327       CapVar = I->getCapturedVar();
328       II = CapVar->getIdentifier();
329     } else if (I->capturesThis())
330       II = &Ctx.Idents.get("this");
331     else {
332       assert(I->capturesVariableArrayType());
333       II = &Ctx.Idents.get("vla");
334     }
335     if (ArgType->isVariablyModifiedType())
336       ArgType = getCanonicalParamType(Ctx, ArgType);
337     auto *Arg =
338         ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
339                                   ArgType, ImplicitParamDecl::Other);
340     Args.emplace_back(Arg);
341     // Do not cast arguments if we emit function with non-original types.
342     TargetArgs.emplace_back(
343         FO.UIntPtrCastRequired
344             ? Arg
345             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
346     ++I;
347   }
348   Args.append(
349       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
350       CD->param_end());
351   TargetArgs.append(
352       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
353       CD->param_end());
354 
355   // Create the function declaration.
356   FunctionType::ExtInfo ExtInfo;
357   const CGFunctionInfo &FuncInfo =
358       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
359   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
360 
361   llvm::Function *F =
362       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
363                              FO.FunctionName, &CGM.getModule());
364   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
365   if (CD->isNothrow())
366     F->setDoesNotThrow();
367 
368   // Generate the function.
369   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
370                     FO.S->getLocStart(), CD->getBody()->getLocStart());
371   unsigned Cnt = CD->getContextParamPosition();
372   I = FO.S->captures().begin();
373   for (auto *FD : RD->fields()) {
374     // Do not map arguments if we emit function with non-original types.
375     Address LocalAddr(Address::invalid());
376     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
377       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
378                                                              TargetArgs[Cnt]);
379     } else {
380       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
381     }
382     // If we are capturing a pointer by copy we don't need to do anything, just
383     // use the value that we get from the arguments.
384     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
385       const VarDecl *CurVD = I->getCapturedVar();
386       // If the variable is a reference we need to materialize it here.
387       if (CurVD->getType()->isReferenceType()) {
388         Address RefAddr = CGF.CreateMemTemp(
389             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
390         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
391                               /*Volatile=*/false, CurVD->getType());
392         LocalAddr = RefAddr;
393       }
394       if (!FO.RegisterCastedArgsOnly)
395         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
396       ++Cnt;
397       ++I;
398       continue;
399     }
400 
401     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
402                                         AlignmentSource::Decl);
403     if (FD->hasCapturedVLAType()) {
404       if (FO.UIntPtrCastRequired) {
405         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
406                                                           Args[Cnt]->getName(),
407                                                           ArgLVal),
408                                      FD->getType(), AlignmentSource::Decl);
409       }
410       auto *ExprArg =
411           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
412       auto VAT = FD->getCapturedVLAType();
413       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
414     } else if (I->capturesVariable()) {
415       auto *Var = I->getCapturedVar();
416       QualType VarTy = Var->getType();
417       Address ArgAddr = ArgLVal.getAddress();
418       if (!VarTy->isReferenceType()) {
419         if (ArgLVal.getType()->isLValueReferenceType()) {
420           ArgAddr = CGF.EmitLoadOfReference(
421               ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
422         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
423           assert(ArgLVal.getType()->isPointerType());
424           ArgAddr = CGF.EmitLoadOfPointer(
425               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
426         }
427       }
428       if (!FO.RegisterCastedArgsOnly) {
429         LocalAddrs.insert(
430             {Args[Cnt],
431              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
432       }
433     } else if (I->capturesVariableByCopy()) {
434       assert(!FD->getType()->isAnyPointerType() &&
435              "Not expecting a captured pointer.");
436       auto *Var = I->getCapturedVar();
437       QualType VarTy = Var->getType();
438       LocalAddrs.insert(
439           {Args[Cnt],
440            {Var,
441             FO.UIntPtrCastRequired
442                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
443                                        ArgLVal, VarTy->isReferenceType())
444                 : ArgLVal.getAddress()}});
445     } else {
446       // If 'this' is captured, load it into CXXThisValue.
447       assert(I->capturesThis());
448       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
449                          .getScalarVal();
450       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
451     }
452     ++Cnt;
453     ++I;
454   }
455 
456   return F;
457 }
458 
459 llvm::Function *
460 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
461   assert(
462       CapturedStmtInfo &&
463       "CapturedStmtInfo should be set when generating the captured function");
464   const CapturedDecl *CD = S.getCapturedDecl();
465   // Build the argument list.
466   bool NeedWrapperFunction =
467       getDebugInfo() &&
468       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
469   FunctionArgList Args;
470   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
471   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
472   SmallString<256> Buffer;
473   llvm::raw_svector_ostream Out(Buffer);
474   Out << CapturedStmtInfo->getHelperName();
475   if (NeedWrapperFunction)
476     Out << "_debug__";
477   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
478                      Out.str());
479   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
480                                                    VLASizes, CXXThisValue, FO);
481   for (const auto &LocalAddrPair : LocalAddrs) {
482     if (LocalAddrPair.second.first) {
483       setAddrOfLocalVar(LocalAddrPair.second.first,
484                         LocalAddrPair.second.second);
485     }
486   }
487   for (const auto &VLASizePair : VLASizes)
488     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
489   PGO.assignRegionCounters(GlobalDecl(CD), F);
490   CapturedStmtInfo->EmitBody(*this, CD->getBody());
491   FinishFunction(CD->getBodyRBrace());
492   if (!NeedWrapperFunction)
493     return F;
494 
495   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
496                             /*RegisterCastedArgsOnly=*/true,
497                             CapturedStmtInfo->getHelperName());
498   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
499   Args.clear();
500   LocalAddrs.clear();
501   VLASizes.clear();
502   llvm::Function *WrapperF =
503       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
504                                    WrapperCGF.CXXThisValue, WrapperFO);
505   llvm::SmallVector<llvm::Value *, 4> CallArgs;
506   for (const auto *Arg : Args) {
507     llvm::Value *CallArg;
508     auto I = LocalAddrs.find(Arg);
509     if (I != LocalAddrs.end()) {
510       LValue LV = WrapperCGF.MakeAddrLValue(
511           I->second.second,
512           I->second.first ? I->second.first->getType() : Arg->getType(),
513           AlignmentSource::Decl);
514       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
515     } else {
516       auto EI = VLASizes.find(Arg);
517       if (EI != VLASizes.end())
518         CallArg = EI->second.second;
519       else {
520         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
521                                               Arg->getType(),
522                                               AlignmentSource::Decl);
523         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
524       }
525     }
526     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
527   }
528   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
529                                                   F, CallArgs);
530   WrapperCGF.FinishFunction();
531   return WrapperF;
532 }
533 
534 //===----------------------------------------------------------------------===//
535 //                              OpenMP Directive Emission
536 //===----------------------------------------------------------------------===//
537 void CodeGenFunction::EmitOMPAggregateAssign(
538     Address DestAddr, Address SrcAddr, QualType OriginalType,
539     const llvm::function_ref<void(Address, Address)> &CopyGen) {
540   // Perform element-by-element initialization.
541   QualType ElementTy;
542 
543   // Drill down to the base element type on both arrays.
544   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
545   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
546   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
547 
548   auto SrcBegin = SrcAddr.getPointer();
549   auto DestBegin = DestAddr.getPointer();
550   // Cast from pointer to array type to pointer to single element.
551   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
552   // The basic structure here is a while-do loop.
553   auto BodyBB = createBasicBlock("omp.arraycpy.body");
554   auto DoneBB = createBasicBlock("omp.arraycpy.done");
555   auto IsEmpty =
556       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
557   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
558 
559   // Enter the loop body, making that address the current address.
560   auto EntryBB = Builder.GetInsertBlock();
561   EmitBlock(BodyBB);
562 
563   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
564 
565   llvm::PHINode *SrcElementPHI =
566     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
567   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
568   Address SrcElementCurrent =
569       Address(SrcElementPHI,
570               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
571 
572   llvm::PHINode *DestElementPHI =
573     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
574   DestElementPHI->addIncoming(DestBegin, EntryBB);
575   Address DestElementCurrent =
576     Address(DestElementPHI,
577             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
578 
579   // Emit copy.
580   CopyGen(DestElementCurrent, SrcElementCurrent);
581 
582   // Shift the address forward by one element.
583   auto DestElementNext = Builder.CreateConstGEP1_32(
584       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
585   auto SrcElementNext = Builder.CreateConstGEP1_32(
586       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
587   // Check whether we've reached the end.
588   auto Done =
589       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
590   Builder.CreateCondBr(Done, DoneBB, BodyBB);
591   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
592   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
593 
594   // Done.
595   EmitBlock(DoneBB, /*IsFinished=*/true);
596 }
597 
598 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
599                                   Address SrcAddr, const VarDecl *DestVD,
600                                   const VarDecl *SrcVD, const Expr *Copy) {
601   if (OriginalType->isArrayType()) {
602     auto *BO = dyn_cast<BinaryOperator>(Copy);
603     if (BO && BO->getOpcode() == BO_Assign) {
604       // Perform simple memcpy for simple copying.
605       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
606     } else {
607       // For arrays with complex element types perform element by element
608       // copying.
609       EmitOMPAggregateAssign(
610           DestAddr, SrcAddr, OriginalType,
611           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
612             // Working with the single array element, so have to remap
613             // destination and source variables to corresponding array
614             // elements.
615             CodeGenFunction::OMPPrivateScope Remap(*this);
616             Remap.addPrivate(DestVD, [DestElement]() -> Address {
617               return DestElement;
618             });
619             Remap.addPrivate(
620                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
621             (void)Remap.Privatize();
622             EmitIgnoredExpr(Copy);
623           });
624     }
625   } else {
626     // Remap pseudo source variable to private copy.
627     CodeGenFunction::OMPPrivateScope Remap(*this);
628     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
629     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
630     (void)Remap.Privatize();
631     // Emit copying of the whole variable.
632     EmitIgnoredExpr(Copy);
633   }
634 }
635 
636 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
637                                                 OMPPrivateScope &PrivateScope) {
638   if (!HaveInsertPoint())
639     return false;
640   bool FirstprivateIsLastprivate = false;
641   llvm::DenseSet<const VarDecl *> Lastprivates;
642   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
643     for (const auto *D : C->varlists())
644       Lastprivates.insert(
645           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
646   }
647   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
648   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
649   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
650     auto IRef = C->varlist_begin();
651     auto InitsRef = C->inits().begin();
652     for (auto IInit : C->private_copies()) {
653       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
654       bool ThisFirstprivateIsLastprivate =
655           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
656       auto *CapFD = CapturesInfo.lookup(OrigVD);
657       auto *FD = CapturedStmtInfo->lookup(OrigVD);
658       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
659           !FD->getType()->isReferenceType()) {
660         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
661         ++IRef;
662         ++InitsRef;
663         continue;
664       }
665       FirstprivateIsLastprivate =
666           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
667       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
668         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
669         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
670         bool IsRegistered;
671         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
672                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
673                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
674         Address OriginalAddr = EmitLValue(&DRE).getAddress();
675         QualType Type = VD->getType();
676         if (Type->isArrayType()) {
677           // Emit VarDecl with copy init for arrays.
678           // Get the address of the original variable captured in current
679           // captured region.
680           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
681             auto Emission = EmitAutoVarAlloca(*VD);
682             auto *Init = VD->getInit();
683             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
684               // Perform simple memcpy.
685               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
686                                   Type);
687             } else {
688               EmitOMPAggregateAssign(
689                   Emission.getAllocatedAddress(), OriginalAddr, Type,
690                   [this, VDInit, Init](Address DestElement,
691                                        Address SrcElement) {
692                     // Clean up any temporaries needed by the initialization.
693                     RunCleanupsScope InitScope(*this);
694                     // Emit initialization for single element.
695                     setAddrOfLocalVar(VDInit, SrcElement);
696                     EmitAnyExprToMem(Init, DestElement,
697                                      Init->getType().getQualifiers(),
698                                      /*IsInitializer*/ false);
699                     LocalDeclMap.erase(VDInit);
700                   });
701             }
702             EmitAutoVarCleanups(Emission);
703             return Emission.getAllocatedAddress();
704           });
705         } else {
706           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
707             // Emit private VarDecl with copy init.
708             // Remap temp VDInit variable to the address of the original
709             // variable
710             // (for proper handling of captured global variables).
711             setAddrOfLocalVar(VDInit, OriginalAddr);
712             EmitDecl(*VD);
713             LocalDeclMap.erase(VDInit);
714             return GetAddrOfLocalVar(VD);
715           });
716         }
717         assert(IsRegistered &&
718                "firstprivate var already registered as private");
719         // Silence the warning about unused variable.
720         (void)IsRegistered;
721       }
722       ++IRef;
723       ++InitsRef;
724     }
725   }
726   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
727 }
728 
729 void CodeGenFunction::EmitOMPPrivateClause(
730     const OMPExecutableDirective &D,
731     CodeGenFunction::OMPPrivateScope &PrivateScope) {
732   if (!HaveInsertPoint())
733     return;
734   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
735   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
736     auto IRef = C->varlist_begin();
737     for (auto IInit : C->private_copies()) {
738       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
739       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
740         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
741         bool IsRegistered =
742             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
743               // Emit private VarDecl with copy init.
744               EmitDecl(*VD);
745               return GetAddrOfLocalVar(VD);
746             });
747         assert(IsRegistered && "private var already registered as private");
748         // Silence the warning about unused variable.
749         (void)IsRegistered;
750       }
751       ++IRef;
752     }
753   }
754 }
755 
756 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
757   if (!HaveInsertPoint())
758     return false;
759   // threadprivate_var1 = master_threadprivate_var1;
760   // operator=(threadprivate_var2, master_threadprivate_var2);
761   // ...
762   // __kmpc_barrier(&loc, global_tid);
763   llvm::DenseSet<const VarDecl *> CopiedVars;
764   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
765   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
766     auto IRef = C->varlist_begin();
767     auto ISrcRef = C->source_exprs().begin();
768     auto IDestRef = C->destination_exprs().begin();
769     for (auto *AssignOp : C->assignment_ops()) {
770       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
771       QualType Type = VD->getType();
772       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
773         // Get the address of the master variable. If we are emitting code with
774         // TLS support, the address is passed from the master as field in the
775         // captured declaration.
776         Address MasterAddr = Address::invalid();
777         if (getLangOpts().OpenMPUseTLS &&
778             getContext().getTargetInfo().isTLSSupported()) {
779           assert(CapturedStmtInfo->lookup(VD) &&
780                  "Copyin threadprivates should have been captured!");
781           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
782                           VK_LValue, (*IRef)->getExprLoc());
783           MasterAddr = EmitLValue(&DRE).getAddress();
784           LocalDeclMap.erase(VD);
785         } else {
786           MasterAddr =
787             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
788                                         : CGM.GetAddrOfGlobal(VD),
789                     getContext().getDeclAlign(VD));
790         }
791         // Get the address of the threadprivate variable.
792         Address PrivateAddr = EmitLValue(*IRef).getAddress();
793         if (CopiedVars.size() == 1) {
794           // At first check if current thread is a master thread. If it is, no
795           // need to copy data.
796           CopyBegin = createBasicBlock("copyin.not.master");
797           CopyEnd = createBasicBlock("copyin.not.master.end");
798           Builder.CreateCondBr(
799               Builder.CreateICmpNE(
800                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
801                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
802               CopyBegin, CopyEnd);
803           EmitBlock(CopyBegin);
804         }
805         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
806         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
807         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
808       }
809       ++IRef;
810       ++ISrcRef;
811       ++IDestRef;
812     }
813   }
814   if (CopyEnd) {
815     // Exit out of copying procedure for non-master thread.
816     EmitBlock(CopyEnd, /*IsFinished=*/true);
817     return true;
818   }
819   return false;
820 }
821 
822 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
823     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
824   if (!HaveInsertPoint())
825     return false;
826   bool HasAtLeastOneLastprivate = false;
827   llvm::DenseSet<const VarDecl *> SIMDLCVs;
828   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
829     auto *LoopDirective = cast<OMPLoopDirective>(&D);
830     for (auto *C : LoopDirective->counters()) {
831       SIMDLCVs.insert(
832           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
833     }
834   }
835   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
836   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
837     HasAtLeastOneLastprivate = true;
838     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
839       break;
840     auto IRef = C->varlist_begin();
841     auto IDestRef = C->destination_exprs().begin();
842     for (auto *IInit : C->private_copies()) {
843       // Keep the address of the original variable for future update at the end
844       // of the loop.
845       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
846       // Taskloops do not require additional initialization, it is done in
847       // runtime support library.
848       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
849         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
850         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
851           DeclRefExpr DRE(
852               const_cast<VarDecl *>(OrigVD),
853               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
854                   OrigVD) != nullptr,
855               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
856           return EmitLValue(&DRE).getAddress();
857         });
858         // Check if the variable is also a firstprivate: in this case IInit is
859         // not generated. Initialization of this variable will happen in codegen
860         // for 'firstprivate' clause.
861         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
862           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
863           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
864             // Emit private VarDecl with copy init.
865             EmitDecl(*VD);
866             return GetAddrOfLocalVar(VD);
867           });
868           assert(IsRegistered &&
869                  "lastprivate var already registered as private");
870           (void)IsRegistered;
871         }
872       }
873       ++IRef;
874       ++IDestRef;
875     }
876   }
877   return HasAtLeastOneLastprivate;
878 }
879 
880 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
881     const OMPExecutableDirective &D, bool NoFinals,
882     llvm::Value *IsLastIterCond) {
883   if (!HaveInsertPoint())
884     return;
885   // Emit following code:
886   // if (<IsLastIterCond>) {
887   //   orig_var1 = private_orig_var1;
888   //   ...
889   //   orig_varn = private_orig_varn;
890   // }
891   llvm::BasicBlock *ThenBB = nullptr;
892   llvm::BasicBlock *DoneBB = nullptr;
893   if (IsLastIterCond) {
894     ThenBB = createBasicBlock(".omp.lastprivate.then");
895     DoneBB = createBasicBlock(".omp.lastprivate.done");
896     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
897     EmitBlock(ThenBB);
898   }
899   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
900   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
901   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
902     auto IC = LoopDirective->counters().begin();
903     for (auto F : LoopDirective->finals()) {
904       auto *D =
905           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
906       if (NoFinals)
907         AlreadyEmittedVars.insert(D);
908       else
909         LoopCountersAndUpdates[D] = F;
910       ++IC;
911     }
912   }
913   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
914     auto IRef = C->varlist_begin();
915     auto ISrcRef = C->source_exprs().begin();
916     auto IDestRef = C->destination_exprs().begin();
917     for (auto *AssignOp : C->assignment_ops()) {
918       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
919       QualType Type = PrivateVD->getType();
920       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
921       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
922         // If lastprivate variable is a loop control variable for loop-based
923         // directive, update its value before copyin back to original
924         // variable.
925         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
926           EmitIgnoredExpr(FinalExpr);
927         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
928         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
929         // Get the address of the original variable.
930         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
931         // Get the address of the private variable.
932         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
933         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
934           PrivateAddr =
935               Address(Builder.CreateLoad(PrivateAddr),
936                       getNaturalTypeAlignment(RefTy->getPointeeType()));
937         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
938       }
939       ++IRef;
940       ++ISrcRef;
941       ++IDestRef;
942     }
943     if (auto *PostUpdate = C->getPostUpdateExpr())
944       EmitIgnoredExpr(PostUpdate);
945   }
946   if (IsLastIterCond)
947     EmitBlock(DoneBB, /*IsFinished=*/true);
948 }
949 
950 void CodeGenFunction::EmitOMPReductionClauseInit(
951     const OMPExecutableDirective &D,
952     CodeGenFunction::OMPPrivateScope &PrivateScope) {
953   if (!HaveInsertPoint())
954     return;
955   SmallVector<const Expr *, 4> Shareds;
956   SmallVector<const Expr *, 4> Privates;
957   SmallVector<const Expr *, 4> ReductionOps;
958   SmallVector<const Expr *, 4> LHSs;
959   SmallVector<const Expr *, 4> RHSs;
960   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
961     auto IPriv = C->privates().begin();
962     auto IRed = C->reduction_ops().begin();
963     auto ILHS = C->lhs_exprs().begin();
964     auto IRHS = C->rhs_exprs().begin();
965     for (const auto *Ref : C->varlists()) {
966       Shareds.emplace_back(Ref);
967       Privates.emplace_back(*IPriv);
968       ReductionOps.emplace_back(*IRed);
969       LHSs.emplace_back(*ILHS);
970       RHSs.emplace_back(*IRHS);
971       std::advance(IPriv, 1);
972       std::advance(IRed, 1);
973       std::advance(ILHS, 1);
974       std::advance(IRHS, 1);
975     }
976   }
977   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
978   unsigned Count = 0;
979   auto ILHS = LHSs.begin();
980   auto IRHS = RHSs.begin();
981   auto IPriv = Privates.begin();
982   for (const auto *IRef : Shareds) {
983     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
984     // Emit private VarDecl with reduction init.
985     RedCG.emitSharedLValue(*this, Count);
986     RedCG.emitAggregateType(*this, Count);
987     auto Emission = EmitAutoVarAlloca(*PrivateVD);
988     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
989                              RedCG.getSharedLValue(Count),
990                              [&Emission](CodeGenFunction &CGF) {
991                                CGF.EmitAutoVarInit(Emission);
992                                return true;
993                              });
994     EmitAutoVarCleanups(Emission);
995     Address BaseAddr = RedCG.adjustPrivateAddress(
996         *this, Count, Emission.getAllocatedAddress());
997     bool IsRegistered = PrivateScope.addPrivate(
998         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
999     assert(IsRegistered && "private var already registered as private");
1000     // Silence the warning about unused variable.
1001     (void)IsRegistered;
1002 
1003     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1004     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1005     QualType Type = PrivateVD->getType();
1006     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1007     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1008       // Store the address of the original variable associated with the LHS
1009       // implicit variable.
1010       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1011         return RedCG.getSharedLValue(Count).getAddress();
1012       });
1013       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1014         return GetAddrOfLocalVar(PrivateVD);
1015       });
1016     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1017                isa<ArraySubscriptExpr>(IRef)) {
1018       // Store the address of the original variable associated with the LHS
1019       // implicit variable.
1020       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1021         return RedCG.getSharedLValue(Count).getAddress();
1022       });
1023       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1024         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1025                                             ConvertTypeForMem(RHSVD->getType()),
1026                                             "rhs.begin");
1027       });
1028     } else {
1029       QualType Type = PrivateVD->getType();
1030       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1031       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1032       // Store the address of the original variable associated with the LHS
1033       // implicit variable.
1034       if (IsArray) {
1035         OriginalAddr = Builder.CreateElementBitCast(
1036             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1037       }
1038       PrivateScope.addPrivate(
1039           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1040       PrivateScope.addPrivate(
1041           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1042             return IsArray
1043                        ? Builder.CreateElementBitCast(
1044                              GetAddrOfLocalVar(PrivateVD),
1045                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1046                        : GetAddrOfLocalVar(PrivateVD);
1047           });
1048     }
1049     ++ILHS;
1050     ++IRHS;
1051     ++IPriv;
1052     ++Count;
1053   }
1054 }
1055 
1056 void CodeGenFunction::EmitOMPReductionClauseFinal(
1057     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1058   if (!HaveInsertPoint())
1059     return;
1060   llvm::SmallVector<const Expr *, 8> Privates;
1061   llvm::SmallVector<const Expr *, 8> LHSExprs;
1062   llvm::SmallVector<const Expr *, 8> RHSExprs;
1063   llvm::SmallVector<const Expr *, 8> ReductionOps;
1064   bool HasAtLeastOneReduction = false;
1065   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1066     HasAtLeastOneReduction = true;
1067     Privates.append(C->privates().begin(), C->privates().end());
1068     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1069     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1070     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1071   }
1072   if (HasAtLeastOneReduction) {
1073     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1074                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1075                       D.getDirectiveKind() == OMPD_simd;
1076     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1077     // Emit nowait reduction if nowait clause is present or directive is a
1078     // parallel directive (it always has implicit barrier).
1079     CGM.getOpenMPRuntime().emitReduction(
1080         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1081         {WithNowait, SimpleReduction, ReductionKind});
1082   }
1083 }
1084 
1085 static void emitPostUpdateForReductionClause(
1086     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1087     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1088   if (!CGF.HaveInsertPoint())
1089     return;
1090   llvm::BasicBlock *DoneBB = nullptr;
1091   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1092     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1093       if (!DoneBB) {
1094         if (auto *Cond = CondGen(CGF)) {
1095           // If the first post-update expression is found, emit conditional
1096           // block if it was requested.
1097           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1098           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1099           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1100           CGF.EmitBlock(ThenBB);
1101         }
1102       }
1103       CGF.EmitIgnoredExpr(PostUpdate);
1104     }
1105   }
1106   if (DoneBB)
1107     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1108 }
1109 
1110 namespace {
1111 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1112 /// parallel function. This is necessary for combined constructs such as
1113 /// 'distribute parallel for'
1114 typedef llvm::function_ref<void(CodeGenFunction &,
1115                                 const OMPExecutableDirective &,
1116                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1117     CodeGenBoundParametersTy;
1118 } // anonymous namespace
1119 
1120 static void emitCommonOMPParallelDirective(
1121     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1122     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1123     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1124   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1125   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1126       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1127   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1128     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1129     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1130                                          /*IgnoreResultAssign*/ true);
1131     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1132         CGF, NumThreads, NumThreadsClause->getLocStart());
1133   }
1134   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1135     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1136     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1137         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1138   }
1139   const Expr *IfCond = nullptr;
1140   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1141     if (C->getNameModifier() == OMPD_unknown ||
1142         C->getNameModifier() == OMPD_parallel) {
1143       IfCond = C->getCondition();
1144       break;
1145     }
1146   }
1147 
1148   OMPParallelScope Scope(CGF, S);
1149   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1150   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1151   // lower and upper bounds with the pragma 'for' chunking mechanism.
1152   // The following lambda takes care of appending the lower and upper bound
1153   // parameters when necessary
1154   CodeGenBoundParameters(CGF, S, CapturedVars);
1155   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1156   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1157                                               CapturedVars, IfCond);
1158 }
1159 
1160 static void emitEmptyBoundParameters(CodeGenFunction &,
1161                                      const OMPExecutableDirective &,
1162                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1163 
1164 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1165   // Emit parallel region as a standalone region.
1166   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1167     OMPPrivateScope PrivateScope(CGF);
1168     bool Copyins = CGF.EmitOMPCopyinClause(S);
1169     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1170     if (Copyins) {
1171       // Emit implicit barrier to synchronize threads and avoid data races on
1172       // propagation master's thread values of threadprivate variables to local
1173       // instances of that variables of all other implicit threads.
1174       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1175           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1176           /*ForceSimpleCall=*/true);
1177     }
1178     CGF.EmitOMPPrivateClause(S, PrivateScope);
1179     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1180     (void)PrivateScope.Privatize();
1181     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1182     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1183   };
1184   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1185                                  emitEmptyBoundParameters);
1186   emitPostUpdateForReductionClause(
1187       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1188 }
1189 
1190 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1191                                       JumpDest LoopExit) {
1192   RunCleanupsScope BodyScope(*this);
1193   // Update counters values on current iteration.
1194   for (auto I : D.updates()) {
1195     EmitIgnoredExpr(I);
1196   }
1197   // Update the linear variables.
1198   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1199     for (auto *U : C->updates())
1200       EmitIgnoredExpr(U);
1201   }
1202 
1203   // On a continue in the body, jump to the end.
1204   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1205   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1206   // Emit loop body.
1207   EmitStmt(D.getBody());
1208   // The end (updates/cleanups).
1209   EmitBlock(Continue.getBlock());
1210   BreakContinueStack.pop_back();
1211 }
1212 
1213 void CodeGenFunction::EmitOMPInnerLoop(
1214     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1215     const Expr *IncExpr,
1216     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1217     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1218   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1219 
1220   // Start the loop with a block that tests the condition.
1221   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1222   EmitBlock(CondBlock);
1223   const SourceRange &R = S.getSourceRange();
1224   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1225                  SourceLocToDebugLoc(R.getEnd()));
1226 
1227   // If there are any cleanups between here and the loop-exit scope,
1228   // create a block to stage a loop exit along.
1229   auto ExitBlock = LoopExit.getBlock();
1230   if (RequiresCleanup)
1231     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1232 
1233   auto LoopBody = createBasicBlock("omp.inner.for.body");
1234 
1235   // Emit condition.
1236   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1237   if (ExitBlock != LoopExit.getBlock()) {
1238     EmitBlock(ExitBlock);
1239     EmitBranchThroughCleanup(LoopExit);
1240   }
1241 
1242   EmitBlock(LoopBody);
1243   incrementProfileCounter(&S);
1244 
1245   // Create a block for the increment.
1246   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1247   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1248 
1249   BodyGen(*this);
1250 
1251   // Emit "IV = IV + 1" and a back-edge to the condition block.
1252   EmitBlock(Continue.getBlock());
1253   EmitIgnoredExpr(IncExpr);
1254   PostIncGen(*this);
1255   BreakContinueStack.pop_back();
1256   EmitBranch(CondBlock);
1257   LoopStack.pop();
1258   // Emit the fall-through block.
1259   EmitBlock(LoopExit.getBlock());
1260 }
1261 
1262 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1263   if (!HaveInsertPoint())
1264     return false;
1265   // Emit inits for the linear variables.
1266   bool HasLinears = false;
1267   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1268     for (auto *Init : C->inits()) {
1269       HasLinears = true;
1270       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1271       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1272         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1273         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1274         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1275                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1276                         VD->getInit()->getType(), VK_LValue,
1277                         VD->getInit()->getExprLoc());
1278         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1279                                                 VD->getType()),
1280                        /*capturedByInit=*/false);
1281         EmitAutoVarCleanups(Emission);
1282       } else
1283         EmitVarDecl(*VD);
1284     }
1285     // Emit the linear steps for the linear clauses.
1286     // If a step is not constant, it is pre-calculated before the loop.
1287     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1288       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1289         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1290         // Emit calculation of the linear step.
1291         EmitIgnoredExpr(CS);
1292       }
1293   }
1294   return HasLinears;
1295 }
1296 
1297 void CodeGenFunction::EmitOMPLinearClauseFinal(
1298     const OMPLoopDirective &D,
1299     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1300   if (!HaveInsertPoint())
1301     return;
1302   llvm::BasicBlock *DoneBB = nullptr;
1303   // Emit the final values of the linear variables.
1304   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1305     auto IC = C->varlist_begin();
1306     for (auto *F : C->finals()) {
1307       if (!DoneBB) {
1308         if (auto *Cond = CondGen(*this)) {
1309           // If the first post-update expression is found, emit conditional
1310           // block if it was requested.
1311           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1312           DoneBB = createBasicBlock(".omp.linear.pu.done");
1313           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1314           EmitBlock(ThenBB);
1315         }
1316       }
1317       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1318       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1319                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1320                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1321       Address OrigAddr = EmitLValue(&DRE).getAddress();
1322       CodeGenFunction::OMPPrivateScope VarScope(*this);
1323       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1324       (void)VarScope.Privatize();
1325       EmitIgnoredExpr(F);
1326       ++IC;
1327     }
1328     if (auto *PostUpdate = C->getPostUpdateExpr())
1329       EmitIgnoredExpr(PostUpdate);
1330   }
1331   if (DoneBB)
1332     EmitBlock(DoneBB, /*IsFinished=*/true);
1333 }
1334 
1335 static void emitAlignedClause(CodeGenFunction &CGF,
1336                               const OMPExecutableDirective &D) {
1337   if (!CGF.HaveInsertPoint())
1338     return;
1339   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1340     unsigned ClauseAlignment = 0;
1341     if (auto AlignmentExpr = Clause->getAlignment()) {
1342       auto AlignmentCI =
1343           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1344       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1345     }
1346     for (auto E : Clause->varlists()) {
1347       unsigned Alignment = ClauseAlignment;
1348       if (Alignment == 0) {
1349         // OpenMP [2.8.1, Description]
1350         // If no optional parameter is specified, implementation-defined default
1351         // alignments for SIMD instructions on the target platforms are assumed.
1352         Alignment =
1353             CGF.getContext()
1354                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1355                     E->getType()->getPointeeType()))
1356                 .getQuantity();
1357       }
1358       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1359              "alignment is not power of 2");
1360       if (Alignment != 0) {
1361         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1362         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1363       }
1364     }
1365   }
1366 }
1367 
1368 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1369     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1370   if (!HaveInsertPoint())
1371     return;
1372   auto I = S.private_counters().begin();
1373   for (auto *E : S.counters()) {
1374     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1375     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1376     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1377       // Emit var without initialization.
1378       if (!LocalDeclMap.count(PrivateVD)) {
1379         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1380         EmitAutoVarCleanups(VarEmission);
1381       }
1382       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1383                       /*RefersToEnclosingVariableOrCapture=*/false,
1384                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1385       return EmitLValue(&DRE).getAddress();
1386     });
1387     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1388         VD->hasGlobalStorage()) {
1389       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1390         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1391                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1392                         E->getType(), VK_LValue, E->getExprLoc());
1393         return EmitLValue(&DRE).getAddress();
1394       });
1395     }
1396     ++I;
1397   }
1398 }
1399 
1400 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1401                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1402                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1403   if (!CGF.HaveInsertPoint())
1404     return;
1405   {
1406     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1407     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1408     (void)PreCondScope.Privatize();
1409     // Get initial values of real counters.
1410     for (auto I : S.inits()) {
1411       CGF.EmitIgnoredExpr(I);
1412     }
1413   }
1414   // Check that loop is executed at least one time.
1415   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1416 }
1417 
1418 void CodeGenFunction::EmitOMPLinearClause(
1419     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1420   if (!HaveInsertPoint())
1421     return;
1422   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1423   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1424     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1425     for (auto *C : LoopDirective->counters()) {
1426       SIMDLCVs.insert(
1427           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1428     }
1429   }
1430   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1431     auto CurPrivate = C->privates().begin();
1432     for (auto *E : C->varlists()) {
1433       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1434       auto *PrivateVD =
1435           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1436       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1437         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1438           // Emit private VarDecl with copy init.
1439           EmitVarDecl(*PrivateVD);
1440           return GetAddrOfLocalVar(PrivateVD);
1441         });
1442         assert(IsRegistered && "linear var already registered as private");
1443         // Silence the warning about unused variable.
1444         (void)IsRegistered;
1445       } else
1446         EmitVarDecl(*PrivateVD);
1447       ++CurPrivate;
1448     }
1449   }
1450 }
1451 
1452 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1453                                      const OMPExecutableDirective &D,
1454                                      bool IsMonotonic) {
1455   if (!CGF.HaveInsertPoint())
1456     return;
1457   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1458     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1459                                  /*ignoreResult=*/true);
1460     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1461     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1462     // In presence of finite 'safelen', it may be unsafe to mark all
1463     // the memory instructions parallel, because loop-carried
1464     // dependences of 'safelen' iterations are possible.
1465     if (!IsMonotonic)
1466       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1467   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1468     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1469                                  /*ignoreResult=*/true);
1470     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1471     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1472     // In presence of finite 'safelen', it may be unsafe to mark all
1473     // the memory instructions parallel, because loop-carried
1474     // dependences of 'safelen' iterations are possible.
1475     CGF.LoopStack.setParallel(false);
1476   }
1477 }
1478 
1479 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1480                                       bool IsMonotonic) {
1481   // Walk clauses and process safelen/lastprivate.
1482   LoopStack.setParallel(!IsMonotonic);
1483   LoopStack.setVectorizeEnable(true);
1484   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1485 }
1486 
1487 void CodeGenFunction::EmitOMPSimdFinal(
1488     const OMPLoopDirective &D,
1489     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1490   if (!HaveInsertPoint())
1491     return;
1492   llvm::BasicBlock *DoneBB = nullptr;
1493   auto IC = D.counters().begin();
1494   auto IPC = D.private_counters().begin();
1495   for (auto F : D.finals()) {
1496     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1497     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1498     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1499     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1500         OrigVD->hasGlobalStorage() || CED) {
1501       if (!DoneBB) {
1502         if (auto *Cond = CondGen(*this)) {
1503           // If the first post-update expression is found, emit conditional
1504           // block if it was requested.
1505           auto *ThenBB = createBasicBlock(".omp.final.then");
1506           DoneBB = createBasicBlock(".omp.final.done");
1507           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1508           EmitBlock(ThenBB);
1509         }
1510       }
1511       Address OrigAddr = Address::invalid();
1512       if (CED)
1513         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1514       else {
1515         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1516                         /*RefersToEnclosingVariableOrCapture=*/false,
1517                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1518         OrigAddr = EmitLValue(&DRE).getAddress();
1519       }
1520       OMPPrivateScope VarScope(*this);
1521       VarScope.addPrivate(OrigVD,
1522                           [OrigAddr]() -> Address { return OrigAddr; });
1523       (void)VarScope.Privatize();
1524       EmitIgnoredExpr(F);
1525     }
1526     ++IC;
1527     ++IPC;
1528   }
1529   if (DoneBB)
1530     EmitBlock(DoneBB, /*IsFinished=*/true);
1531 }
1532 
1533 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1534                                          const OMPLoopDirective &S,
1535                                          CodeGenFunction::JumpDest LoopExit) {
1536   CGF.EmitOMPLoopBody(S, LoopExit);
1537   CGF.EmitStopPoint(&S);
1538 }
1539 
1540 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1541   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1542     OMPLoopScope PreInitScope(CGF, S);
1543     // if (PreCond) {
1544     //   for (IV in 0..LastIteration) BODY;
1545     //   <Final counter/linear vars updates>;
1546     // }
1547     //
1548 
1549     // Emit: if (PreCond) - begin.
1550     // If the condition constant folds and can be elided, avoid emitting the
1551     // whole loop.
1552     bool CondConstant;
1553     llvm::BasicBlock *ContBlock = nullptr;
1554     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1555       if (!CondConstant)
1556         return;
1557     } else {
1558       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1559       ContBlock = CGF.createBasicBlock("simd.if.end");
1560       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1561                   CGF.getProfileCount(&S));
1562       CGF.EmitBlock(ThenBlock);
1563       CGF.incrementProfileCounter(&S);
1564     }
1565 
1566     // Emit the loop iteration variable.
1567     const Expr *IVExpr = S.getIterationVariable();
1568     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1569     CGF.EmitVarDecl(*IVDecl);
1570     CGF.EmitIgnoredExpr(S.getInit());
1571 
1572     // Emit the iterations count variable.
1573     // If it is not a variable, Sema decided to calculate iterations count on
1574     // each iteration (e.g., it is foldable into a constant).
1575     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1576       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1577       // Emit calculation of the iterations count.
1578       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1579     }
1580 
1581     CGF.EmitOMPSimdInit(S);
1582 
1583     emitAlignedClause(CGF, S);
1584     (void)CGF.EmitOMPLinearClauseInit(S);
1585     {
1586       OMPPrivateScope LoopScope(CGF);
1587       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1588       CGF.EmitOMPLinearClause(S, LoopScope);
1589       CGF.EmitOMPPrivateClause(S, LoopScope);
1590       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1591       bool HasLastprivateClause =
1592           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1593       (void)LoopScope.Privatize();
1594       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1595                            S.getInc(),
1596                            [&S](CodeGenFunction &CGF) {
1597                              CGF.EmitOMPLoopBody(S, JumpDest());
1598                              CGF.EmitStopPoint(&S);
1599                            },
1600                            [](CodeGenFunction &) {});
1601       CGF.EmitOMPSimdFinal(
1602           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1603       // Emit final copy of the lastprivate variables at the end of loops.
1604       if (HasLastprivateClause)
1605         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1606       CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1607       emitPostUpdateForReductionClause(
1608           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1609     }
1610     CGF.EmitOMPLinearClauseFinal(
1611         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1612     // Emit: if (PreCond) - end.
1613     if (ContBlock) {
1614       CGF.EmitBranch(ContBlock);
1615       CGF.EmitBlock(ContBlock, true);
1616     }
1617   };
1618   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1619   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1620 }
1621 
1622 void CodeGenFunction::EmitOMPOuterLoop(
1623     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1624     CodeGenFunction::OMPPrivateScope &LoopScope,
1625     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1626     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1627     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1628   auto &RT = CGM.getOpenMPRuntime();
1629 
1630   const Expr *IVExpr = S.getIterationVariable();
1631   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1632   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1633 
1634   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1635 
1636   // Start the loop with a block that tests the condition.
1637   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1638   EmitBlock(CondBlock);
1639   const SourceRange &R = S.getSourceRange();
1640   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1641                  SourceLocToDebugLoc(R.getEnd()));
1642 
1643   llvm::Value *BoolCondVal = nullptr;
1644   if (!DynamicOrOrdered) {
1645     // UB = min(UB, GlobalUB) or
1646     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1647     // 'distribute parallel for')
1648     EmitIgnoredExpr(LoopArgs.EUB);
1649     // IV = LB
1650     EmitIgnoredExpr(LoopArgs.Init);
1651     // IV < UB
1652     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1653   } else {
1654     BoolCondVal =
1655         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1656                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1657   }
1658 
1659   // If there are any cleanups between here and the loop-exit scope,
1660   // create a block to stage a loop exit along.
1661   auto ExitBlock = LoopExit.getBlock();
1662   if (LoopScope.requiresCleanups())
1663     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1664 
1665   auto LoopBody = createBasicBlock("omp.dispatch.body");
1666   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1667   if (ExitBlock != LoopExit.getBlock()) {
1668     EmitBlock(ExitBlock);
1669     EmitBranchThroughCleanup(LoopExit);
1670   }
1671   EmitBlock(LoopBody);
1672 
1673   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1674   // LB for loop condition and emitted it above).
1675   if (DynamicOrOrdered)
1676     EmitIgnoredExpr(LoopArgs.Init);
1677 
1678   // Create a block for the increment.
1679   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1680   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1681 
1682   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1683   // with dynamic/guided scheduling and without ordered clause.
1684   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1685     LoopStack.setParallel(!IsMonotonic);
1686   else
1687     EmitOMPSimdInit(S, IsMonotonic);
1688 
1689   SourceLocation Loc = S.getLocStart();
1690 
1691   // when 'distribute' is not combined with a 'for':
1692   // while (idx <= UB) { BODY; ++idx; }
1693   // when 'distribute' is combined with a 'for'
1694   // (e.g. 'distribute parallel for')
1695   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1696   EmitOMPInnerLoop(
1697       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1698       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1699         CodeGenLoop(CGF, S, LoopExit);
1700       },
1701       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1702         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1703       });
1704 
1705   EmitBlock(Continue.getBlock());
1706   BreakContinueStack.pop_back();
1707   if (!DynamicOrOrdered) {
1708     // Emit "LB = LB + Stride", "UB = UB + Stride".
1709     EmitIgnoredExpr(LoopArgs.NextLB);
1710     EmitIgnoredExpr(LoopArgs.NextUB);
1711   }
1712 
1713   EmitBranch(CondBlock);
1714   LoopStack.pop();
1715   // Emit the fall-through block.
1716   EmitBlock(LoopExit.getBlock());
1717 
1718   // Tell the runtime we are done.
1719   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1720     if (!DynamicOrOrdered)
1721       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1722                                                      S.getDirectiveKind());
1723   };
1724   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1725 }
1726 
1727 void CodeGenFunction::EmitOMPForOuterLoop(
1728     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1729     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1730     const OMPLoopArguments &LoopArgs,
1731     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1732   auto &RT = CGM.getOpenMPRuntime();
1733 
1734   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1735   const bool DynamicOrOrdered =
1736       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1737 
1738   assert((Ordered ||
1739           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1740                                  LoopArgs.Chunk != nullptr)) &&
1741          "static non-chunked schedule does not need outer loop");
1742 
1743   // Emit outer loop.
1744   //
1745   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1746   // When schedule(dynamic,chunk_size) is specified, the iterations are
1747   // distributed to threads in the team in chunks as the threads request them.
1748   // Each thread executes a chunk of iterations, then requests another chunk,
1749   // until no chunks remain to be distributed. Each chunk contains chunk_size
1750   // iterations, except for the last chunk to be distributed, which may have
1751   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1752   //
1753   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1754   // to threads in the team in chunks as the executing threads request them.
1755   // Each thread executes a chunk of iterations, then requests another chunk,
1756   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1757   // each chunk is proportional to the number of unassigned iterations divided
1758   // by the number of threads in the team, decreasing to 1. For a chunk_size
1759   // with value k (greater than 1), the size of each chunk is determined in the
1760   // same way, with the restriction that the chunks do not contain fewer than k
1761   // iterations (except for the last chunk to be assigned, which may have fewer
1762   // than k iterations).
1763   //
1764   // When schedule(auto) is specified, the decision regarding scheduling is
1765   // delegated to the compiler and/or runtime system. The programmer gives the
1766   // implementation the freedom to choose any possible mapping of iterations to
1767   // threads in the team.
1768   //
1769   // When schedule(runtime) is specified, the decision regarding scheduling is
1770   // deferred until run time, and the schedule and chunk size are taken from the
1771   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1772   // implementation defined
1773   //
1774   // while(__kmpc_dispatch_next(&LB, &UB)) {
1775   //   idx = LB;
1776   //   while (idx <= UB) { BODY; ++idx;
1777   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1778   //   } // inner loop
1779   // }
1780   //
1781   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1782   // When schedule(static, chunk_size) is specified, iterations are divided into
1783   // chunks of size chunk_size, and the chunks are assigned to the threads in
1784   // the team in a round-robin fashion in the order of the thread number.
1785   //
1786   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1787   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1788   //   LB = LB + ST;
1789   //   UB = UB + ST;
1790   // }
1791   //
1792 
1793   const Expr *IVExpr = S.getIterationVariable();
1794   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1795   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1796 
1797   if (DynamicOrOrdered) {
1798     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1799     llvm::Value *LBVal = DispatchBounds.first;
1800     llvm::Value *UBVal = DispatchBounds.second;
1801     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1802                                                              LoopArgs.Chunk};
1803     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1804                            IVSigned, Ordered, DipatchRTInputValues);
1805   } else {
1806     CGOpenMPRuntime::StaticRTInput StaticInit(
1807         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1808         LoopArgs.ST, LoopArgs.Chunk);
1809     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1810                          ScheduleKind, StaticInit);
1811   }
1812 
1813   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1814                                     const unsigned IVSize,
1815                                     const bool IVSigned) {
1816     if (Ordered) {
1817       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1818                                                             IVSigned);
1819     }
1820   };
1821 
1822   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1823                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1824   OuterLoopArgs.IncExpr = S.getInc();
1825   OuterLoopArgs.Init = S.getInit();
1826   OuterLoopArgs.Cond = S.getCond();
1827   OuterLoopArgs.NextLB = S.getNextLowerBound();
1828   OuterLoopArgs.NextUB = S.getNextUpperBound();
1829   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1830                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1831 }
1832 
1833 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1834                              const unsigned IVSize, const bool IVSigned) {}
1835 
1836 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1837     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1838     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1839     const CodeGenLoopTy &CodeGenLoopContent) {
1840 
1841   auto &RT = CGM.getOpenMPRuntime();
1842 
1843   // Emit outer loop.
1844   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1845   // dynamic
1846   //
1847 
1848   const Expr *IVExpr = S.getIterationVariable();
1849   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1850   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1851 
1852   CGOpenMPRuntime::StaticRTInput StaticInit(
1853       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1854       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1855   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1856 
1857   // for combined 'distribute' and 'for' the increment expression of distribute
1858   // is store in DistInc. For 'distribute' alone, it is in Inc.
1859   Expr *IncExpr;
1860   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1861     IncExpr = S.getDistInc();
1862   else
1863     IncExpr = S.getInc();
1864 
1865   // this routine is shared by 'omp distribute parallel for' and
1866   // 'omp distribute': select the right EUB expression depending on the
1867   // directive
1868   OMPLoopArguments OuterLoopArgs;
1869   OuterLoopArgs.LB = LoopArgs.LB;
1870   OuterLoopArgs.UB = LoopArgs.UB;
1871   OuterLoopArgs.ST = LoopArgs.ST;
1872   OuterLoopArgs.IL = LoopArgs.IL;
1873   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1874   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1875                           ? S.getCombinedEnsureUpperBound()
1876                           : S.getEnsureUpperBound();
1877   OuterLoopArgs.IncExpr = IncExpr;
1878   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1879                            ? S.getCombinedInit()
1880                            : S.getInit();
1881   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1882                            ? S.getCombinedCond()
1883                            : S.getCond();
1884   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1885                              ? S.getCombinedNextLowerBound()
1886                              : S.getNextLowerBound();
1887   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1888                              ? S.getCombinedNextUpperBound()
1889                              : S.getNextUpperBound();
1890 
1891   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1892                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1893                    emitEmptyOrdered);
1894 }
1895 
1896 /// Emit a helper variable and return corresponding lvalue.
1897 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1898                                const DeclRefExpr *Helper) {
1899   auto VDecl = cast<VarDecl>(Helper->getDecl());
1900   CGF.EmitVarDecl(*VDecl);
1901   return CGF.EmitLValue(Helper);
1902 }
1903 
1904 static std::pair<LValue, LValue>
1905 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1906                                      const OMPExecutableDirective &S) {
1907   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1908   LValue LB =
1909       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1910   LValue UB =
1911       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1912 
1913   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1914   // parallel for') we need to use the 'distribute'
1915   // chunk lower and upper bounds rather than the whole loop iteration
1916   // space. These are parameters to the outlined function for 'parallel'
1917   // and we copy the bounds of the previous schedule into the
1918   // the current ones.
1919   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1920   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1921   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1922   PrevLBVal = CGF.EmitScalarConversion(
1923       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1924       LS.getIterationVariable()->getType(), SourceLocation());
1925   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1926   PrevUBVal = CGF.EmitScalarConversion(
1927       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1928       LS.getIterationVariable()->getType(), SourceLocation());
1929 
1930   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1931   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1932 
1933   return {LB, UB};
1934 }
1935 
1936 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1937 /// we need to use the LB and UB expressions generated by the worksharing
1938 /// code generation support, whereas in non combined situations we would
1939 /// just emit 0 and the LastIteration expression
1940 /// This function is necessary due to the difference of the LB and UB
1941 /// types for the RT emission routines for 'for_static_init' and
1942 /// 'for_dispatch_init'
1943 static std::pair<llvm::Value *, llvm::Value *>
1944 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1945                                         const OMPExecutableDirective &S,
1946                                         Address LB, Address UB) {
1947   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1948   const Expr *IVExpr = LS.getIterationVariable();
1949   // when implementing a dynamic schedule for a 'for' combined with a
1950   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1951   // is not normalized as each team only executes its own assigned
1952   // distribute chunk
1953   QualType IteratorTy = IVExpr->getType();
1954   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1955                                             SourceLocation());
1956   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1957                                             SourceLocation());
1958   return {LBVal, UBVal};
1959 }
1960 
1961 static void emitDistributeParallelForDistributeInnerBoundParams(
1962     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1963     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1964   const auto &Dir = cast<OMPLoopDirective>(S);
1965   LValue LB =
1966       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1967   auto LBCast = CGF.Builder.CreateIntCast(
1968       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1969   CapturedVars.push_back(LBCast);
1970   LValue UB =
1971       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1972 
1973   auto UBCast = CGF.Builder.CreateIntCast(
1974       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1975   CapturedVars.push_back(UBCast);
1976 }
1977 
1978 static void
1979 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
1980                                  const OMPLoopDirective &S,
1981                                  CodeGenFunction::JumpDest LoopExit) {
1982   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
1983                                          PrePostActionTy &) {
1984     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
1985                                emitDistributeParallelForInnerBounds,
1986                                emitDistributeParallelForDispatchBounds);
1987   };
1988 
1989   emitCommonOMPParallelDirective(
1990       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
1991       emitDistributeParallelForDistributeInnerBoundParams);
1992 }
1993 
1994 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1995     const OMPDistributeParallelForDirective &S) {
1996   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1997     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
1998                               S.getDistInc());
1999   };
2000   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2001   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
2002                                   /*HasCancel=*/false);
2003   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2004                                               /*HasCancel=*/false);
2005 }
2006 
2007 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2008     const OMPDistributeParallelForSimdDirective &S) {
2009   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2010   CGM.getOpenMPRuntime().emitInlinedDirective(
2011       *this, OMPD_distribute_parallel_for_simd,
2012       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2013         OMPLoopScope PreInitScope(CGF, S);
2014         CGF.EmitStmt(
2015             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2016       });
2017 }
2018 
2019 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2020     const OMPDistributeSimdDirective &S) {
2021   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2022   CGM.getOpenMPRuntime().emitInlinedDirective(
2023       *this, OMPD_distribute_simd,
2024       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2025         OMPLoopScope PreInitScope(CGF, S);
2026         CGF.EmitStmt(
2027             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2028       });
2029 }
2030 
2031 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
2032     const OMPTargetParallelForSimdDirective &S) {
2033   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2034   CGM.getOpenMPRuntime().emitInlinedDirective(
2035       *this, OMPD_target_parallel_for_simd,
2036       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2037         OMPLoopScope PreInitScope(CGF, S);
2038         CGF.EmitStmt(
2039             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2040       });
2041 }
2042 
2043 void CodeGenFunction::EmitOMPTargetSimdDirective(
2044     const OMPTargetSimdDirective &S) {
2045   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2046   CGM.getOpenMPRuntime().emitInlinedDirective(
2047       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2048         OMPLoopScope PreInitScope(CGF, S);
2049         CGF.EmitStmt(
2050             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2051       });
2052 }
2053 
2054 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2055     const OMPTeamsDistributeSimdDirective &S) {
2056   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2057   CGM.getOpenMPRuntime().emitInlinedDirective(
2058       *this, OMPD_teams_distribute_simd,
2059       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2060         OMPLoopScope PreInitScope(CGF, S);
2061         CGF.EmitStmt(
2062             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2063       });
2064 }
2065 
2066 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2067     const OMPTeamsDistributeParallelForSimdDirective &S) {
2068   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2069   CGM.getOpenMPRuntime().emitInlinedDirective(
2070       *this, OMPD_teams_distribute_parallel_for_simd,
2071       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2072         OMPLoopScope PreInitScope(CGF, S);
2073         CGF.EmitStmt(
2074             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2075       });
2076 }
2077 
2078 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2079     const OMPTeamsDistributeParallelForDirective &S) {
2080   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2081   CGM.getOpenMPRuntime().emitInlinedDirective(
2082       *this, OMPD_teams_distribute_parallel_for,
2083       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2084         OMPLoopScope PreInitScope(CGF, S);
2085         CGF.EmitStmt(
2086             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2087       });
2088 }
2089 
2090 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2091     const OMPTargetTeamsDistributeDirective &S) {
2092   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2093   CGM.getOpenMPRuntime().emitInlinedDirective(
2094       *this, OMPD_target_teams_distribute,
2095       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2096         CGF.EmitStmt(
2097             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2098       });
2099 }
2100 
2101 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2102     const OMPTargetTeamsDistributeParallelForDirective &S) {
2103   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2104   CGM.getOpenMPRuntime().emitInlinedDirective(
2105       *this, OMPD_target_teams_distribute_parallel_for,
2106       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2107         CGF.EmitStmt(
2108             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2109       });
2110 }
2111 
2112 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2113     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2114   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2115   CGM.getOpenMPRuntime().emitInlinedDirective(
2116       *this, OMPD_target_teams_distribute_parallel_for_simd,
2117       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2118         CGF.EmitStmt(
2119             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2120       });
2121 }
2122 
2123 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2124     const OMPTargetTeamsDistributeSimdDirective &S) {
2125   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2126   CGM.getOpenMPRuntime().emitInlinedDirective(
2127       *this, OMPD_target_teams_distribute_simd,
2128       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2129         CGF.EmitStmt(
2130             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2131       });
2132 }
2133 
2134 namespace {
2135   struct ScheduleKindModifiersTy {
2136     OpenMPScheduleClauseKind Kind;
2137     OpenMPScheduleClauseModifier M1;
2138     OpenMPScheduleClauseModifier M2;
2139     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2140                             OpenMPScheduleClauseModifier M1,
2141                             OpenMPScheduleClauseModifier M2)
2142         : Kind(Kind), M1(M1), M2(M2) {}
2143   };
2144 } // namespace
2145 
2146 bool CodeGenFunction::EmitOMPWorksharingLoop(
2147     const OMPLoopDirective &S, Expr *EUB,
2148     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2149     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2150   // Emit the loop iteration variable.
2151   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2152   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2153   EmitVarDecl(*IVDecl);
2154 
2155   // Emit the iterations count variable.
2156   // If it is not a variable, Sema decided to calculate iterations count on each
2157   // iteration (e.g., it is foldable into a constant).
2158   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2159     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2160     // Emit calculation of the iterations count.
2161     EmitIgnoredExpr(S.getCalcLastIteration());
2162   }
2163 
2164   auto &RT = CGM.getOpenMPRuntime();
2165 
2166   bool HasLastprivateClause;
2167   // Check pre-condition.
2168   {
2169     OMPLoopScope PreInitScope(*this, S);
2170     // Skip the entire loop if we don't meet the precondition.
2171     // If the condition constant folds and can be elided, avoid emitting the
2172     // whole loop.
2173     bool CondConstant;
2174     llvm::BasicBlock *ContBlock = nullptr;
2175     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2176       if (!CondConstant)
2177         return false;
2178     } else {
2179       auto *ThenBlock = createBasicBlock("omp.precond.then");
2180       ContBlock = createBasicBlock("omp.precond.end");
2181       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2182                   getProfileCount(&S));
2183       EmitBlock(ThenBlock);
2184       incrementProfileCounter(&S);
2185     }
2186 
2187     bool Ordered = false;
2188     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2189       if (OrderedClause->getNumForLoops())
2190         RT.emitDoacrossInit(*this, S);
2191       else
2192         Ordered = true;
2193     }
2194 
2195     llvm::DenseSet<const Expr *> EmittedFinals;
2196     emitAlignedClause(*this, S);
2197     bool HasLinears = EmitOMPLinearClauseInit(S);
2198     // Emit helper vars inits.
2199 
2200     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2201     LValue LB = Bounds.first;
2202     LValue UB = Bounds.second;
2203     LValue ST =
2204         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2205     LValue IL =
2206         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2207 
2208     // Emit 'then' code.
2209     {
2210       OMPPrivateScope LoopScope(*this);
2211       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2212         // Emit implicit barrier to synchronize threads and avoid data races on
2213         // initialization of firstprivate variables and post-update of
2214         // lastprivate variables.
2215         CGM.getOpenMPRuntime().emitBarrierCall(
2216             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2217             /*ForceSimpleCall=*/true);
2218       }
2219       EmitOMPPrivateClause(S, LoopScope);
2220       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2221       EmitOMPReductionClauseInit(S, LoopScope);
2222       EmitOMPPrivateLoopCounters(S, LoopScope);
2223       EmitOMPLinearClause(S, LoopScope);
2224       (void)LoopScope.Privatize();
2225 
2226       // Detect the loop schedule kind and chunk.
2227       llvm::Value *Chunk = nullptr;
2228       OpenMPScheduleTy ScheduleKind;
2229       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2230         ScheduleKind.Schedule = C->getScheduleKind();
2231         ScheduleKind.M1 = C->getFirstScheduleModifier();
2232         ScheduleKind.M2 = C->getSecondScheduleModifier();
2233         if (const auto *Ch = C->getChunkSize()) {
2234           Chunk = EmitScalarExpr(Ch);
2235           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2236                                        S.getIterationVariable()->getType(),
2237                                        S.getLocStart());
2238         }
2239       }
2240       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2241       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2242       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2243       // If the static schedule kind is specified or if the ordered clause is
2244       // specified, and if no monotonic modifier is specified, the effect will
2245       // be as if the monotonic modifier was specified.
2246       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2247                                 /* Chunked */ Chunk != nullptr) &&
2248           !Ordered) {
2249         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2250           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2251         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2252         // When no chunk_size is specified, the iteration space is divided into
2253         // chunks that are approximately equal in size, and at most one chunk is
2254         // distributed to each thread. Note that the size of the chunks is
2255         // unspecified in this case.
2256         CGOpenMPRuntime::StaticRTInput StaticInit(
2257             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2258             UB.getAddress(), ST.getAddress());
2259         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2260                              ScheduleKind, StaticInit);
2261         auto LoopExit =
2262             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2263         // UB = min(UB, GlobalUB);
2264         EmitIgnoredExpr(S.getEnsureUpperBound());
2265         // IV = LB;
2266         EmitIgnoredExpr(S.getInit());
2267         // while (idx <= UB) { BODY; ++idx; }
2268         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2269                          S.getInc(),
2270                          [&S, LoopExit](CodeGenFunction &CGF) {
2271                            CGF.EmitOMPLoopBody(S, LoopExit);
2272                            CGF.EmitStopPoint(&S);
2273                          },
2274                          [](CodeGenFunction &) {});
2275         EmitBlock(LoopExit.getBlock());
2276         // Tell the runtime we are done.
2277         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2278           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2279                                                          S.getDirectiveKind());
2280         };
2281         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2282       } else {
2283         const bool IsMonotonic =
2284             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2285             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2286             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2287             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2288         // Emit the outer loop, which requests its work chunk [LB..UB] from
2289         // runtime and runs the inner loop to process it.
2290         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2291                                              ST.getAddress(), IL.getAddress(),
2292                                              Chunk, EUB);
2293         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2294                             LoopArguments, CGDispatchBounds);
2295       }
2296       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2297         EmitOMPSimdFinal(S,
2298                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2299                            return CGF.Builder.CreateIsNotNull(
2300                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2301                          });
2302       }
2303       EmitOMPReductionClauseFinal(
2304           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2305                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2306                  : /*Parallel only*/ OMPD_parallel);
2307       // Emit post-update of the reduction variables if IsLastIter != 0.
2308       emitPostUpdateForReductionClause(
2309           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2310             return CGF.Builder.CreateIsNotNull(
2311                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2312           });
2313       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2314       if (HasLastprivateClause)
2315         EmitOMPLastprivateClauseFinal(
2316             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2317             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2318     }
2319     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2320       return CGF.Builder.CreateIsNotNull(
2321           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2322     });
2323     // We're now done with the loop, so jump to the continuation block.
2324     if (ContBlock) {
2325       EmitBranch(ContBlock);
2326       EmitBlock(ContBlock, true);
2327     }
2328   }
2329   return HasLastprivateClause;
2330 }
2331 
2332 /// The following two functions generate expressions for the loop lower
2333 /// and upper bounds in case of static and dynamic (dispatch) schedule
2334 /// of the associated 'for' or 'distribute' loop.
2335 static std::pair<LValue, LValue>
2336 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2337   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2338   LValue LB =
2339       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2340   LValue UB =
2341       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2342   return {LB, UB};
2343 }
2344 
2345 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2346 /// consider the lower and upper bound expressions generated by the
2347 /// worksharing loop support, but we use 0 and the iteration space size as
2348 /// constants
2349 static std::pair<llvm::Value *, llvm::Value *>
2350 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2351                           Address LB, Address UB) {
2352   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2353   const Expr *IVExpr = LS.getIterationVariable();
2354   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2355   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2356   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2357   return {LBVal, UBVal};
2358 }
2359 
2360 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2361   bool HasLastprivates = false;
2362   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2363                                           PrePostActionTy &) {
2364     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2365     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2366                                                  emitForLoopBounds,
2367                                                  emitDispatchForLoopBounds);
2368   };
2369   {
2370     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2371     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2372                                                 S.hasCancel());
2373   }
2374 
2375   // Emit an implicit barrier at the end.
2376   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2377     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2378   }
2379 }
2380 
2381 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2382   bool HasLastprivates = false;
2383   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2384                                           PrePostActionTy &) {
2385     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2386                                                  emitForLoopBounds,
2387                                                  emitDispatchForLoopBounds);
2388   };
2389   {
2390     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2391     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2392   }
2393 
2394   // Emit an implicit barrier at the end.
2395   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2396     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2397   }
2398 }
2399 
2400 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2401                                 const Twine &Name,
2402                                 llvm::Value *Init = nullptr) {
2403   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2404   if (Init)
2405     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2406   return LVal;
2407 }
2408 
2409 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2410   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2411   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2412   bool HasLastprivates = false;
2413   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2414                                                     PrePostActionTy &) {
2415     auto &C = CGF.CGM.getContext();
2416     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2417     // Emit helper vars inits.
2418     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2419                                   CGF.Builder.getInt32(0));
2420     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2421                                       : CGF.Builder.getInt32(0);
2422     LValue UB =
2423         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2424     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2425                                   CGF.Builder.getInt32(1));
2426     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2427                                   CGF.Builder.getInt32(0));
2428     // Loop counter.
2429     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2430     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2431     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2432     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2433     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2434     // Generate condition for loop.
2435     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2436                         OK_Ordinary, S.getLocStart(), FPOptions());
2437     // Increment for loop counter.
2438     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2439                       S.getLocStart());
2440     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2441       // Iterate through all sections and emit a switch construct:
2442       // switch (IV) {
2443       //   case 0:
2444       //     <SectionStmt[0]>;
2445       //     break;
2446       // ...
2447       //   case <NumSection> - 1:
2448       //     <SectionStmt[<NumSection> - 1]>;
2449       //     break;
2450       // }
2451       // .omp.sections.exit:
2452       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2453       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2454           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2455           CS == nullptr ? 1 : CS->size());
2456       if (CS) {
2457         unsigned CaseNumber = 0;
2458         for (auto *SubStmt : CS->children()) {
2459           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2460           CGF.EmitBlock(CaseBB);
2461           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2462           CGF.EmitStmt(SubStmt);
2463           CGF.EmitBranch(ExitBB);
2464           ++CaseNumber;
2465         }
2466       } else {
2467         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2468         CGF.EmitBlock(CaseBB);
2469         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2470         CGF.EmitStmt(Stmt);
2471         CGF.EmitBranch(ExitBB);
2472       }
2473       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2474     };
2475 
2476     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2477     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2478       // Emit implicit barrier to synchronize threads and avoid data races on
2479       // initialization of firstprivate variables and post-update of lastprivate
2480       // variables.
2481       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2482           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2483           /*ForceSimpleCall=*/true);
2484     }
2485     CGF.EmitOMPPrivateClause(S, LoopScope);
2486     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2487     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2488     (void)LoopScope.Privatize();
2489 
2490     // Emit static non-chunked loop.
2491     OpenMPScheduleTy ScheduleKind;
2492     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2493     CGOpenMPRuntime::StaticRTInput StaticInit(
2494         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2495         LB.getAddress(), UB.getAddress(), ST.getAddress());
2496     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2497         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2498     // UB = min(UB, GlobalUB);
2499     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2500     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2501         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2502     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2503     // IV = LB;
2504     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2505     // while (idx <= UB) { BODY; ++idx; }
2506     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2507                          [](CodeGenFunction &) {});
2508     // Tell the runtime we are done.
2509     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2510       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2511                                                      S.getDirectiveKind());
2512     };
2513     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2514     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2515     // Emit post-update of the reduction variables if IsLastIter != 0.
2516     emitPostUpdateForReductionClause(
2517         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2518           return CGF.Builder.CreateIsNotNull(
2519               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2520         });
2521 
2522     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2523     if (HasLastprivates)
2524       CGF.EmitOMPLastprivateClauseFinal(
2525           S, /*NoFinals=*/false,
2526           CGF.Builder.CreateIsNotNull(
2527               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2528   };
2529 
2530   bool HasCancel = false;
2531   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2532     HasCancel = OSD->hasCancel();
2533   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2534     HasCancel = OPSD->hasCancel();
2535   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2536   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2537                                               HasCancel);
2538   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2539   // clause. Otherwise the barrier will be generated by the codegen for the
2540   // directive.
2541   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2542     // Emit implicit barrier to synchronize threads and avoid data races on
2543     // initialization of firstprivate variables.
2544     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2545                                            OMPD_unknown);
2546   }
2547 }
2548 
2549 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2550   {
2551     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2552     EmitSections(S);
2553   }
2554   // Emit an implicit barrier at the end.
2555   if (!S.getSingleClause<OMPNowaitClause>()) {
2556     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2557                                            OMPD_sections);
2558   }
2559 }
2560 
2561 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2562   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2563     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2564   };
2565   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2566   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2567                                               S.hasCancel());
2568 }
2569 
2570 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2571   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2572   llvm::SmallVector<const Expr *, 8> DestExprs;
2573   llvm::SmallVector<const Expr *, 8> SrcExprs;
2574   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2575   // Check if there are any 'copyprivate' clauses associated with this
2576   // 'single' construct.
2577   // Build a list of copyprivate variables along with helper expressions
2578   // (<source>, <destination>, <destination>=<source> expressions)
2579   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2580     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2581     DestExprs.append(C->destination_exprs().begin(),
2582                      C->destination_exprs().end());
2583     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2584     AssignmentOps.append(C->assignment_ops().begin(),
2585                          C->assignment_ops().end());
2586   }
2587   // Emit code for 'single' region along with 'copyprivate' clauses
2588   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2589     Action.Enter(CGF);
2590     OMPPrivateScope SingleScope(CGF);
2591     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2592     CGF.EmitOMPPrivateClause(S, SingleScope);
2593     (void)SingleScope.Privatize();
2594     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2595   };
2596   {
2597     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2598     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2599                                             CopyprivateVars, DestExprs,
2600                                             SrcExprs, AssignmentOps);
2601   }
2602   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2603   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2604   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2605     CGM.getOpenMPRuntime().emitBarrierCall(
2606         *this, S.getLocStart(),
2607         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2608   }
2609 }
2610 
2611 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2612   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2613     Action.Enter(CGF);
2614     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2615   };
2616   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2617   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2618 }
2619 
2620 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2621   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2622     Action.Enter(CGF);
2623     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2624   };
2625   Expr *Hint = nullptr;
2626   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2627     Hint = HintClause->getHint();
2628   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2629   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2630                                             S.getDirectiveName().getAsString(),
2631                                             CodeGen, S.getLocStart(), Hint);
2632 }
2633 
2634 void CodeGenFunction::EmitOMPParallelForDirective(
2635     const OMPParallelForDirective &S) {
2636   // Emit directive as a combined directive that consists of two implicit
2637   // directives: 'parallel' with 'for' directive.
2638   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2639     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2640     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2641                                emitDispatchForLoopBounds);
2642   };
2643   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2644                                  emitEmptyBoundParameters);
2645 }
2646 
2647 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2648     const OMPParallelForSimdDirective &S) {
2649   // Emit directive as a combined directive that consists of two implicit
2650   // directives: 'parallel' with 'for' directive.
2651   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2652     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2653                                emitDispatchForLoopBounds);
2654   };
2655   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2656                                  emitEmptyBoundParameters);
2657 }
2658 
2659 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2660     const OMPParallelSectionsDirective &S) {
2661   // Emit directive as a combined directive that consists of two implicit
2662   // directives: 'parallel' with 'sections' directive.
2663   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2664     CGF.EmitSections(S);
2665   };
2666   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2667                                  emitEmptyBoundParameters);
2668 }
2669 
2670 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2671                                                 const RegionCodeGenTy &BodyGen,
2672                                                 const TaskGenTy &TaskGen,
2673                                                 OMPTaskDataTy &Data) {
2674   // Emit outlined function for task construct.
2675   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2676   auto *I = CS->getCapturedDecl()->param_begin();
2677   auto *PartId = std::next(I);
2678   auto *TaskT = std::next(I, 4);
2679   // Check if the task is final
2680   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2681     // If the condition constant folds and can be elided, try to avoid emitting
2682     // the condition and the dead arm of the if/else.
2683     auto *Cond = Clause->getCondition();
2684     bool CondConstant;
2685     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2686       Data.Final.setInt(CondConstant);
2687     else
2688       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2689   } else {
2690     // By default the task is not final.
2691     Data.Final.setInt(/*IntVal=*/false);
2692   }
2693   // Check if the task has 'priority' clause.
2694   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2695     auto *Prio = Clause->getPriority();
2696     Data.Priority.setInt(/*IntVal=*/true);
2697     Data.Priority.setPointer(EmitScalarConversion(
2698         EmitScalarExpr(Prio), Prio->getType(),
2699         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2700         Prio->getExprLoc()));
2701   }
2702   // The first function argument for tasks is a thread id, the second one is a
2703   // part id (0 for tied tasks, >=0 for untied task).
2704   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2705   // Get list of private variables.
2706   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2707     auto IRef = C->varlist_begin();
2708     for (auto *IInit : C->private_copies()) {
2709       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2710       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2711         Data.PrivateVars.push_back(*IRef);
2712         Data.PrivateCopies.push_back(IInit);
2713       }
2714       ++IRef;
2715     }
2716   }
2717   EmittedAsPrivate.clear();
2718   // Get list of firstprivate variables.
2719   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2720     auto IRef = C->varlist_begin();
2721     auto IElemInitRef = C->inits().begin();
2722     for (auto *IInit : C->private_copies()) {
2723       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2724       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2725         Data.FirstprivateVars.push_back(*IRef);
2726         Data.FirstprivateCopies.push_back(IInit);
2727         Data.FirstprivateInits.push_back(*IElemInitRef);
2728       }
2729       ++IRef;
2730       ++IElemInitRef;
2731     }
2732   }
2733   // Get list of lastprivate variables (for taskloops).
2734   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2735   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2736     auto IRef = C->varlist_begin();
2737     auto ID = C->destination_exprs().begin();
2738     for (auto *IInit : C->private_copies()) {
2739       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2740       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2741         Data.LastprivateVars.push_back(*IRef);
2742         Data.LastprivateCopies.push_back(IInit);
2743       }
2744       LastprivateDstsOrigs.insert(
2745           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2746            cast<DeclRefExpr>(*IRef)});
2747       ++IRef;
2748       ++ID;
2749     }
2750   }
2751   SmallVector<const Expr *, 4> LHSs;
2752   SmallVector<const Expr *, 4> RHSs;
2753   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2754     auto IPriv = C->privates().begin();
2755     auto IRed = C->reduction_ops().begin();
2756     auto ILHS = C->lhs_exprs().begin();
2757     auto IRHS = C->rhs_exprs().begin();
2758     for (const auto *Ref : C->varlists()) {
2759       Data.ReductionVars.emplace_back(Ref);
2760       Data.ReductionCopies.emplace_back(*IPriv);
2761       Data.ReductionOps.emplace_back(*IRed);
2762       LHSs.emplace_back(*ILHS);
2763       RHSs.emplace_back(*IRHS);
2764       std::advance(IPriv, 1);
2765       std::advance(IRed, 1);
2766       std::advance(ILHS, 1);
2767       std::advance(IRHS, 1);
2768     }
2769   }
2770   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2771       *this, S.getLocStart(), LHSs, RHSs, Data);
2772   // Build list of dependences.
2773   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2774     for (auto *IRef : C->varlists())
2775       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2776   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2777       CodeGenFunction &CGF, PrePostActionTy &Action) {
2778     // Set proper addresses for generated private copies.
2779     OMPPrivateScope Scope(CGF);
2780     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2781         !Data.LastprivateVars.empty()) {
2782       enum { PrivatesParam = 2, CopyFnParam = 3 };
2783       auto *CopyFn = CGF.Builder.CreateLoad(
2784           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2785       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2786           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2787       // Map privates.
2788       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2789       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2790       CallArgs.push_back(PrivatesPtr);
2791       for (auto *E : Data.PrivateVars) {
2792         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2793         Address PrivatePtr = CGF.CreateMemTemp(
2794             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2795         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2796         CallArgs.push_back(PrivatePtr.getPointer());
2797       }
2798       for (auto *E : Data.FirstprivateVars) {
2799         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2800         Address PrivatePtr =
2801             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2802                               ".firstpriv.ptr.addr");
2803         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2804         CallArgs.push_back(PrivatePtr.getPointer());
2805       }
2806       for (auto *E : Data.LastprivateVars) {
2807         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2808         Address PrivatePtr =
2809             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2810                               ".lastpriv.ptr.addr");
2811         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2812         CallArgs.push_back(PrivatePtr.getPointer());
2813       }
2814       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2815                                                           CopyFn, CallArgs);
2816       for (auto &&Pair : LastprivateDstsOrigs) {
2817         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2818         DeclRefExpr DRE(
2819             const_cast<VarDecl *>(OrigVD),
2820             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2821                 OrigVD) != nullptr,
2822             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2823         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2824           return CGF.EmitLValue(&DRE).getAddress();
2825         });
2826       }
2827       for (auto &&Pair : PrivatePtrs) {
2828         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2829                             CGF.getContext().getDeclAlign(Pair.first));
2830         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2831       }
2832     }
2833     if (Data.Reductions) {
2834       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2835       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2836                              Data.ReductionOps);
2837       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2838           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2839       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2840         RedCG.emitSharedLValue(CGF, Cnt);
2841         RedCG.emitAggregateType(CGF, Cnt);
2842         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2843             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2844         Replacement =
2845             Address(CGF.EmitScalarConversion(
2846                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2847                         CGF.getContext().getPointerType(
2848                             Data.ReductionCopies[Cnt]->getType()),
2849                         SourceLocation()),
2850                     Replacement.getAlignment());
2851         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2852         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2853                          [Replacement]() { return Replacement; });
2854         // FIXME: This must removed once the runtime library is fixed.
2855         // Emit required threadprivate variables for
2856         // initilizer/combiner/finalizer.
2857         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2858                                                            RedCG, Cnt);
2859       }
2860     }
2861     // Privatize all private variables except for in_reduction items.
2862     (void)Scope.Privatize();
2863     SmallVector<const Expr *, 4> InRedVars;
2864     SmallVector<const Expr *, 4> InRedPrivs;
2865     SmallVector<const Expr *, 4> InRedOps;
2866     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2867     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2868       auto IPriv = C->privates().begin();
2869       auto IRed = C->reduction_ops().begin();
2870       auto ITD = C->taskgroup_descriptors().begin();
2871       for (const auto *Ref : C->varlists()) {
2872         InRedVars.emplace_back(Ref);
2873         InRedPrivs.emplace_back(*IPriv);
2874         InRedOps.emplace_back(*IRed);
2875         TaskgroupDescriptors.emplace_back(*ITD);
2876         std::advance(IPriv, 1);
2877         std::advance(IRed, 1);
2878         std::advance(ITD, 1);
2879       }
2880     }
2881     // Privatize in_reduction items here, because taskgroup descriptors must be
2882     // privatized earlier.
2883     OMPPrivateScope InRedScope(CGF);
2884     if (!InRedVars.empty()) {
2885       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2886       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2887         RedCG.emitSharedLValue(CGF, Cnt);
2888         RedCG.emitAggregateType(CGF, Cnt);
2889         // The taskgroup descriptor variable is always implicit firstprivate and
2890         // privatized already during procoessing of the firstprivates.
2891         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2892             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2893         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2894             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2895         Replacement = Address(
2896             CGF.EmitScalarConversion(
2897                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2898                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2899                 SourceLocation()),
2900             Replacement.getAlignment());
2901         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2902         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2903                               [Replacement]() { return Replacement; });
2904         // FIXME: This must removed once the runtime library is fixed.
2905         // Emit required threadprivate variables for
2906         // initilizer/combiner/finalizer.
2907         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2908                                                            RedCG, Cnt);
2909       }
2910     }
2911     (void)InRedScope.Privatize();
2912 
2913     Action.Enter(CGF);
2914     BodyGen(CGF);
2915   };
2916   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2917       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2918       Data.NumberOfParts);
2919   OMPLexicalScope Scope(*this, S);
2920   TaskGen(*this, OutlinedFn, Data);
2921 }
2922 
2923 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2924   // Emit outlined function for task construct.
2925   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2926   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2927   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2928   const Expr *IfCond = nullptr;
2929   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2930     if (C->getNameModifier() == OMPD_unknown ||
2931         C->getNameModifier() == OMPD_task) {
2932       IfCond = C->getCondition();
2933       break;
2934     }
2935   }
2936 
2937   OMPTaskDataTy Data;
2938   // Check if we should emit tied or untied task.
2939   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2940   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2941     CGF.EmitStmt(CS->getCapturedStmt());
2942   };
2943   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2944                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2945                             const OMPTaskDataTy &Data) {
2946     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2947                                             SharedsTy, CapturedStruct, IfCond,
2948                                             Data);
2949   };
2950   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2951 }
2952 
2953 void CodeGenFunction::EmitOMPTaskyieldDirective(
2954     const OMPTaskyieldDirective &S) {
2955   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2956 }
2957 
2958 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2959   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2960 }
2961 
2962 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2963   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2964 }
2965 
2966 void CodeGenFunction::EmitOMPTaskgroupDirective(
2967     const OMPTaskgroupDirective &S) {
2968   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2969     Action.Enter(CGF);
2970     if (const Expr *E = S.getReductionRef()) {
2971       SmallVector<const Expr *, 4> LHSs;
2972       SmallVector<const Expr *, 4> RHSs;
2973       OMPTaskDataTy Data;
2974       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2975         auto IPriv = C->privates().begin();
2976         auto IRed = C->reduction_ops().begin();
2977         auto ILHS = C->lhs_exprs().begin();
2978         auto IRHS = C->rhs_exprs().begin();
2979         for (const auto *Ref : C->varlists()) {
2980           Data.ReductionVars.emplace_back(Ref);
2981           Data.ReductionCopies.emplace_back(*IPriv);
2982           Data.ReductionOps.emplace_back(*IRed);
2983           LHSs.emplace_back(*ILHS);
2984           RHSs.emplace_back(*IRHS);
2985           std::advance(IPriv, 1);
2986           std::advance(IRed, 1);
2987           std::advance(ILHS, 1);
2988           std::advance(IRHS, 1);
2989         }
2990       }
2991       llvm::Value *ReductionDesc =
2992           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
2993                                                            LHSs, RHSs, Data);
2994       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2995       CGF.EmitVarDecl(*VD);
2996       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
2997                             /*Volatile=*/false, E->getType());
2998     }
2999     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3000   };
3001   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3002   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3003 }
3004 
3005 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3006   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3007     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3008       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3009                                 FlushClause->varlist_end());
3010     }
3011     return llvm::None;
3012   }(), S.getLocStart());
3013 }
3014 
3015 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3016                                             const CodeGenLoopTy &CodeGenLoop,
3017                                             Expr *IncExpr) {
3018   // Emit the loop iteration variable.
3019   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3020   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3021   EmitVarDecl(*IVDecl);
3022 
3023   // Emit the iterations count variable.
3024   // If it is not a variable, Sema decided to calculate iterations count on each
3025   // iteration (e.g., it is foldable into a constant).
3026   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3027     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3028     // Emit calculation of the iterations count.
3029     EmitIgnoredExpr(S.getCalcLastIteration());
3030   }
3031 
3032   auto &RT = CGM.getOpenMPRuntime();
3033 
3034   bool HasLastprivateClause = false;
3035   // Check pre-condition.
3036   {
3037     OMPLoopScope PreInitScope(*this, S);
3038     // Skip the entire loop if we don't meet the precondition.
3039     // If the condition constant folds and can be elided, avoid emitting the
3040     // whole loop.
3041     bool CondConstant;
3042     llvm::BasicBlock *ContBlock = nullptr;
3043     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3044       if (!CondConstant)
3045         return;
3046     } else {
3047       auto *ThenBlock = createBasicBlock("omp.precond.then");
3048       ContBlock = createBasicBlock("omp.precond.end");
3049       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3050                   getProfileCount(&S));
3051       EmitBlock(ThenBlock);
3052       incrementProfileCounter(&S);
3053     }
3054 
3055     // Emit 'then' code.
3056     {
3057       // Emit helper vars inits.
3058 
3059       LValue LB = EmitOMPHelperVar(
3060           *this, cast<DeclRefExpr>(
3061                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3062                           ? S.getCombinedLowerBoundVariable()
3063                           : S.getLowerBoundVariable())));
3064       LValue UB = EmitOMPHelperVar(
3065           *this, cast<DeclRefExpr>(
3066                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3067                           ? S.getCombinedUpperBoundVariable()
3068                           : S.getUpperBoundVariable())));
3069       LValue ST =
3070           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3071       LValue IL =
3072           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3073 
3074       OMPPrivateScope LoopScope(*this);
3075       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3076         // Emit implicit barrier to synchronize threads and avoid data races on
3077         // initialization of firstprivate variables and post-update of
3078         // lastprivate variables.
3079         CGM.getOpenMPRuntime().emitBarrierCall(
3080           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3081           /*ForceSimpleCall=*/true);
3082       }
3083       EmitOMPPrivateClause(S, LoopScope);
3084       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3085       EmitOMPPrivateLoopCounters(S, LoopScope);
3086       (void)LoopScope.Privatize();
3087 
3088       // Detect the distribute schedule kind and chunk.
3089       llvm::Value *Chunk = nullptr;
3090       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3091       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3092         ScheduleKind = C->getDistScheduleKind();
3093         if (const auto *Ch = C->getChunkSize()) {
3094           Chunk = EmitScalarExpr(Ch);
3095           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3096           S.getIterationVariable()->getType(),
3097           S.getLocStart());
3098         }
3099       }
3100       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3101       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3102 
3103       // OpenMP [2.10.8, distribute Construct, Description]
3104       // If dist_schedule is specified, kind must be static. If specified,
3105       // iterations are divided into chunks of size chunk_size, chunks are
3106       // assigned to the teams of the league in a round-robin fashion in the
3107       // order of the team number. When no chunk_size is specified, the
3108       // iteration space is divided into chunks that are approximately equal
3109       // in size, and at most one chunk is distributed to each team of the
3110       // league. The size of the chunks is unspecified in this case.
3111       if (RT.isStaticNonchunked(ScheduleKind,
3112                                 /* Chunked */ Chunk != nullptr)) {
3113         CGOpenMPRuntime::StaticRTInput StaticInit(
3114             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3115             LB.getAddress(), UB.getAddress(), ST.getAddress());
3116         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3117                                     StaticInit);
3118         auto LoopExit =
3119             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3120         // UB = min(UB, GlobalUB);
3121         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3122                             ? S.getCombinedEnsureUpperBound()
3123                             : S.getEnsureUpperBound());
3124         // IV = LB;
3125         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3126                             ? S.getCombinedInit()
3127                             : S.getInit());
3128 
3129         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3130                          ? S.getCombinedCond()
3131                          : S.getCond();
3132 
3133         // for distribute alone,  codegen
3134         // while (idx <= UB) { BODY; ++idx; }
3135         // when combined with 'for' (e.g. as in 'distribute parallel for')
3136         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3137         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3138                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3139                            CodeGenLoop(CGF, S, LoopExit);
3140                          },
3141                          [](CodeGenFunction &) {});
3142         EmitBlock(LoopExit.getBlock());
3143         // Tell the runtime we are done.
3144         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3145       } else {
3146         // Emit the outer loop, which requests its work chunk [LB..UB] from
3147         // runtime and runs the inner loop to process it.
3148         const OMPLoopArguments LoopArguments = {
3149             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3150             Chunk};
3151         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3152                                    CodeGenLoop);
3153       }
3154 
3155       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3156       if (HasLastprivateClause)
3157         EmitOMPLastprivateClauseFinal(
3158             S, /*NoFinals=*/false,
3159             Builder.CreateIsNotNull(
3160                 EmitLoadOfScalar(IL, S.getLocStart())));
3161     }
3162 
3163     // We're now done with the loop, so jump to the continuation block.
3164     if (ContBlock) {
3165       EmitBranch(ContBlock);
3166       EmitBlock(ContBlock, true);
3167     }
3168   }
3169 }
3170 
3171 void CodeGenFunction::EmitOMPDistributeDirective(
3172     const OMPDistributeDirective &S) {
3173   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3174 
3175     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3176   };
3177   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3178   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3179                                               false);
3180 }
3181 
3182 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3183                                                    const CapturedStmt *S) {
3184   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3185   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3186   CGF.CapturedStmtInfo = &CapStmtInfo;
3187   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3188   Fn->addFnAttr(llvm::Attribute::NoInline);
3189   return Fn;
3190 }
3191 
3192 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3193   if (!S.getAssociatedStmt()) {
3194     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3195       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3196     return;
3197   }
3198   auto *C = S.getSingleClause<OMPSIMDClause>();
3199   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3200                                  PrePostActionTy &Action) {
3201     if (C) {
3202       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3203       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3204       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3205       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3206       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3207                                                       OutlinedFn, CapturedVars);
3208     } else {
3209       Action.Enter(CGF);
3210       CGF.EmitStmt(
3211           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3212     }
3213   };
3214   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3215   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3216 }
3217 
3218 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3219                                          QualType SrcType, QualType DestType,
3220                                          SourceLocation Loc) {
3221   assert(CGF.hasScalarEvaluationKind(DestType) &&
3222          "DestType must have scalar evaluation kind.");
3223   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3224   return Val.isScalar()
3225              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3226                                         Loc)
3227              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3228                                                  DestType, Loc);
3229 }
3230 
3231 static CodeGenFunction::ComplexPairTy
3232 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3233                       QualType DestType, SourceLocation Loc) {
3234   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3235          "DestType must have complex evaluation kind.");
3236   CodeGenFunction::ComplexPairTy ComplexVal;
3237   if (Val.isScalar()) {
3238     // Convert the input element to the element type of the complex.
3239     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3240     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3241                                               DestElementType, Loc);
3242     ComplexVal = CodeGenFunction::ComplexPairTy(
3243         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3244   } else {
3245     assert(Val.isComplex() && "Must be a scalar or complex.");
3246     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3247     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3248     ComplexVal.first = CGF.EmitScalarConversion(
3249         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3250     ComplexVal.second = CGF.EmitScalarConversion(
3251         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3252   }
3253   return ComplexVal;
3254 }
3255 
3256 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3257                                   LValue LVal, RValue RVal) {
3258   if (LVal.isGlobalReg()) {
3259     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3260   } else {
3261     CGF.EmitAtomicStore(RVal, LVal,
3262                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3263                                  : llvm::AtomicOrdering::Monotonic,
3264                         LVal.isVolatile(), /*IsInit=*/false);
3265   }
3266 }
3267 
3268 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3269                                          QualType RValTy, SourceLocation Loc) {
3270   switch (getEvaluationKind(LVal.getType())) {
3271   case TEK_Scalar:
3272     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3273                                *this, RVal, RValTy, LVal.getType(), Loc)),
3274                            LVal);
3275     break;
3276   case TEK_Complex:
3277     EmitStoreOfComplex(
3278         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3279         /*isInit=*/false);
3280     break;
3281   case TEK_Aggregate:
3282     llvm_unreachable("Must be a scalar or complex.");
3283   }
3284 }
3285 
3286 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3287                                   const Expr *X, const Expr *V,
3288                                   SourceLocation Loc) {
3289   // v = x;
3290   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3291   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3292   LValue XLValue = CGF.EmitLValue(X);
3293   LValue VLValue = CGF.EmitLValue(V);
3294   RValue Res = XLValue.isGlobalReg()
3295                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3296                    : CGF.EmitAtomicLoad(
3297                          XLValue, Loc,
3298                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3299                                   : llvm::AtomicOrdering::Monotonic,
3300                          XLValue.isVolatile());
3301   // OpenMP, 2.12.6, atomic Construct
3302   // Any atomic construct with a seq_cst clause forces the atomically
3303   // performed operation to include an implicit flush operation without a
3304   // list.
3305   if (IsSeqCst)
3306     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3307   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3308 }
3309 
3310 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3311                                    const Expr *X, const Expr *E,
3312                                    SourceLocation Loc) {
3313   // x = expr;
3314   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3315   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3316   // OpenMP, 2.12.6, atomic Construct
3317   // Any atomic construct with a seq_cst clause forces the atomically
3318   // performed operation to include an implicit flush operation without a
3319   // list.
3320   if (IsSeqCst)
3321     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3322 }
3323 
3324 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3325                                                 RValue Update,
3326                                                 BinaryOperatorKind BO,
3327                                                 llvm::AtomicOrdering AO,
3328                                                 bool IsXLHSInRHSPart) {
3329   auto &Context = CGF.CGM.getContext();
3330   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3331   // expression is simple and atomic is allowed for the given type for the
3332   // target platform.
3333   if (BO == BO_Comma || !Update.isScalar() ||
3334       !Update.getScalarVal()->getType()->isIntegerTy() ||
3335       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3336                         (Update.getScalarVal()->getType() !=
3337                          X.getAddress().getElementType())) ||
3338       !X.getAddress().getElementType()->isIntegerTy() ||
3339       !Context.getTargetInfo().hasBuiltinAtomic(
3340           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3341     return std::make_pair(false, RValue::get(nullptr));
3342 
3343   llvm::AtomicRMWInst::BinOp RMWOp;
3344   switch (BO) {
3345   case BO_Add:
3346     RMWOp = llvm::AtomicRMWInst::Add;
3347     break;
3348   case BO_Sub:
3349     if (!IsXLHSInRHSPart)
3350       return std::make_pair(false, RValue::get(nullptr));
3351     RMWOp = llvm::AtomicRMWInst::Sub;
3352     break;
3353   case BO_And:
3354     RMWOp = llvm::AtomicRMWInst::And;
3355     break;
3356   case BO_Or:
3357     RMWOp = llvm::AtomicRMWInst::Or;
3358     break;
3359   case BO_Xor:
3360     RMWOp = llvm::AtomicRMWInst::Xor;
3361     break;
3362   case BO_LT:
3363     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3364                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3365                                    : llvm::AtomicRMWInst::Max)
3366                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3367                                    : llvm::AtomicRMWInst::UMax);
3368     break;
3369   case BO_GT:
3370     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3371                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3372                                    : llvm::AtomicRMWInst::Min)
3373                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3374                                    : llvm::AtomicRMWInst::UMin);
3375     break;
3376   case BO_Assign:
3377     RMWOp = llvm::AtomicRMWInst::Xchg;
3378     break;
3379   case BO_Mul:
3380   case BO_Div:
3381   case BO_Rem:
3382   case BO_Shl:
3383   case BO_Shr:
3384   case BO_LAnd:
3385   case BO_LOr:
3386     return std::make_pair(false, RValue::get(nullptr));
3387   case BO_PtrMemD:
3388   case BO_PtrMemI:
3389   case BO_LE:
3390   case BO_GE:
3391   case BO_EQ:
3392   case BO_NE:
3393   case BO_AddAssign:
3394   case BO_SubAssign:
3395   case BO_AndAssign:
3396   case BO_OrAssign:
3397   case BO_XorAssign:
3398   case BO_MulAssign:
3399   case BO_DivAssign:
3400   case BO_RemAssign:
3401   case BO_ShlAssign:
3402   case BO_ShrAssign:
3403   case BO_Comma:
3404     llvm_unreachable("Unsupported atomic update operation");
3405   }
3406   auto *UpdateVal = Update.getScalarVal();
3407   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3408     UpdateVal = CGF.Builder.CreateIntCast(
3409         IC, X.getAddress().getElementType(),
3410         X.getType()->hasSignedIntegerRepresentation());
3411   }
3412   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3413   return std::make_pair(true, RValue::get(Res));
3414 }
3415 
3416 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3417     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3418     llvm::AtomicOrdering AO, SourceLocation Loc,
3419     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3420   // Update expressions are allowed to have the following forms:
3421   // x binop= expr; -> xrval + expr;
3422   // x++, ++x -> xrval + 1;
3423   // x--, --x -> xrval - 1;
3424   // x = x binop expr; -> xrval binop expr
3425   // x = expr Op x; - > expr binop xrval;
3426   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3427   if (!Res.first) {
3428     if (X.isGlobalReg()) {
3429       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3430       // 'xrval'.
3431       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3432     } else {
3433       // Perform compare-and-swap procedure.
3434       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3435     }
3436   }
3437   return Res;
3438 }
3439 
3440 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3441                                     const Expr *X, const Expr *E,
3442                                     const Expr *UE, bool IsXLHSInRHSPart,
3443                                     SourceLocation Loc) {
3444   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3445          "Update expr in 'atomic update' must be a binary operator.");
3446   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3447   // Update expressions are allowed to have the following forms:
3448   // x binop= expr; -> xrval + expr;
3449   // x++, ++x -> xrval + 1;
3450   // x--, --x -> xrval - 1;
3451   // x = x binop expr; -> xrval binop expr
3452   // x = expr Op x; - > expr binop xrval;
3453   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3454   LValue XLValue = CGF.EmitLValue(X);
3455   RValue ExprRValue = CGF.EmitAnyExpr(E);
3456   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3457                      : llvm::AtomicOrdering::Monotonic;
3458   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3459   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3460   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3461   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3462   auto Gen =
3463       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3464         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3465         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3466         return CGF.EmitAnyExpr(UE);
3467       };
3468   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3469       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3470   // OpenMP, 2.12.6, atomic Construct
3471   // Any atomic construct with a seq_cst clause forces the atomically
3472   // performed operation to include an implicit flush operation without a
3473   // list.
3474   if (IsSeqCst)
3475     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3476 }
3477 
3478 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3479                             QualType SourceType, QualType ResType,
3480                             SourceLocation Loc) {
3481   switch (CGF.getEvaluationKind(ResType)) {
3482   case TEK_Scalar:
3483     return RValue::get(
3484         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3485   case TEK_Complex: {
3486     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3487     return RValue::getComplex(Res.first, Res.second);
3488   }
3489   case TEK_Aggregate:
3490     break;
3491   }
3492   llvm_unreachable("Must be a scalar or complex.");
3493 }
3494 
3495 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3496                                      bool IsPostfixUpdate, const Expr *V,
3497                                      const Expr *X, const Expr *E,
3498                                      const Expr *UE, bool IsXLHSInRHSPart,
3499                                      SourceLocation Loc) {
3500   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3501   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3502   RValue NewVVal;
3503   LValue VLValue = CGF.EmitLValue(V);
3504   LValue XLValue = CGF.EmitLValue(X);
3505   RValue ExprRValue = CGF.EmitAnyExpr(E);
3506   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3507                      : llvm::AtomicOrdering::Monotonic;
3508   QualType NewVValType;
3509   if (UE) {
3510     // 'x' is updated with some additional value.
3511     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3512            "Update expr in 'atomic capture' must be a binary operator.");
3513     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3514     // Update expressions are allowed to have the following forms:
3515     // x binop= expr; -> xrval + expr;
3516     // x++, ++x -> xrval + 1;
3517     // x--, --x -> xrval - 1;
3518     // x = x binop expr; -> xrval binop expr
3519     // x = expr Op x; - > expr binop xrval;
3520     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3521     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3522     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3523     NewVValType = XRValExpr->getType();
3524     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3525     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3526                   IsPostfixUpdate](RValue XRValue) -> RValue {
3527       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3528       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3529       RValue Res = CGF.EmitAnyExpr(UE);
3530       NewVVal = IsPostfixUpdate ? XRValue : Res;
3531       return Res;
3532     };
3533     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3534         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3535     if (Res.first) {
3536       // 'atomicrmw' instruction was generated.
3537       if (IsPostfixUpdate) {
3538         // Use old value from 'atomicrmw'.
3539         NewVVal = Res.second;
3540       } else {
3541         // 'atomicrmw' does not provide new value, so evaluate it using old
3542         // value of 'x'.
3543         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3544         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3545         NewVVal = CGF.EmitAnyExpr(UE);
3546       }
3547     }
3548   } else {
3549     // 'x' is simply rewritten with some 'expr'.
3550     NewVValType = X->getType().getNonReferenceType();
3551     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3552                                X->getType().getNonReferenceType(), Loc);
3553     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3554       NewVVal = XRValue;
3555       return ExprRValue;
3556     };
3557     // Try to perform atomicrmw xchg, otherwise simple exchange.
3558     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3559         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3560         Loc, Gen);
3561     if (Res.first) {
3562       // 'atomicrmw' instruction was generated.
3563       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3564     }
3565   }
3566   // Emit post-update store to 'v' of old/new 'x' value.
3567   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3568   // OpenMP, 2.12.6, atomic Construct
3569   // Any atomic construct with a seq_cst clause forces the atomically
3570   // performed operation to include an implicit flush operation without a
3571   // list.
3572   if (IsSeqCst)
3573     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3574 }
3575 
3576 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3577                               bool IsSeqCst, bool IsPostfixUpdate,
3578                               const Expr *X, const Expr *V, const Expr *E,
3579                               const Expr *UE, bool IsXLHSInRHSPart,
3580                               SourceLocation Loc) {
3581   switch (Kind) {
3582   case OMPC_read:
3583     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3584     break;
3585   case OMPC_write:
3586     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3587     break;
3588   case OMPC_unknown:
3589   case OMPC_update:
3590     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3591     break;
3592   case OMPC_capture:
3593     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3594                              IsXLHSInRHSPart, Loc);
3595     break;
3596   case OMPC_if:
3597   case OMPC_final:
3598   case OMPC_num_threads:
3599   case OMPC_private:
3600   case OMPC_firstprivate:
3601   case OMPC_lastprivate:
3602   case OMPC_reduction:
3603   case OMPC_task_reduction:
3604   case OMPC_in_reduction:
3605   case OMPC_safelen:
3606   case OMPC_simdlen:
3607   case OMPC_collapse:
3608   case OMPC_default:
3609   case OMPC_seq_cst:
3610   case OMPC_shared:
3611   case OMPC_linear:
3612   case OMPC_aligned:
3613   case OMPC_copyin:
3614   case OMPC_copyprivate:
3615   case OMPC_flush:
3616   case OMPC_proc_bind:
3617   case OMPC_schedule:
3618   case OMPC_ordered:
3619   case OMPC_nowait:
3620   case OMPC_untied:
3621   case OMPC_threadprivate:
3622   case OMPC_depend:
3623   case OMPC_mergeable:
3624   case OMPC_device:
3625   case OMPC_threads:
3626   case OMPC_simd:
3627   case OMPC_map:
3628   case OMPC_num_teams:
3629   case OMPC_thread_limit:
3630   case OMPC_priority:
3631   case OMPC_grainsize:
3632   case OMPC_nogroup:
3633   case OMPC_num_tasks:
3634   case OMPC_hint:
3635   case OMPC_dist_schedule:
3636   case OMPC_defaultmap:
3637   case OMPC_uniform:
3638   case OMPC_to:
3639   case OMPC_from:
3640   case OMPC_use_device_ptr:
3641   case OMPC_is_device_ptr:
3642     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3643   }
3644 }
3645 
3646 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3647   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3648   OpenMPClauseKind Kind = OMPC_unknown;
3649   for (auto *C : S.clauses()) {
3650     // Find first clause (skip seq_cst clause, if it is first).
3651     if (C->getClauseKind() != OMPC_seq_cst) {
3652       Kind = C->getClauseKind();
3653       break;
3654     }
3655   }
3656 
3657   const auto *CS =
3658       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3659   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3660     enterFullExpression(EWC);
3661   }
3662   // Processing for statements under 'atomic capture'.
3663   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3664     for (const auto *C : Compound->body()) {
3665       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3666         enterFullExpression(EWC);
3667       }
3668     }
3669   }
3670 
3671   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3672                                             PrePostActionTy &) {
3673     CGF.EmitStopPoint(CS);
3674     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3675                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3676                       S.isXLHSInRHSPart(), S.getLocStart());
3677   };
3678   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3679   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3680 }
3681 
3682 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3683                                          const OMPExecutableDirective &S,
3684                                          const RegionCodeGenTy &CodeGen) {
3685   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3686   CodeGenModule &CGM = CGF.CGM;
3687   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3688 
3689   llvm::Function *Fn = nullptr;
3690   llvm::Constant *FnID = nullptr;
3691 
3692   const Expr *IfCond = nullptr;
3693   // Check for the at most one if clause associated with the target region.
3694   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3695     if (C->getNameModifier() == OMPD_unknown ||
3696         C->getNameModifier() == OMPD_target) {
3697       IfCond = C->getCondition();
3698       break;
3699     }
3700   }
3701 
3702   // Check if we have any device clause associated with the directive.
3703   const Expr *Device = nullptr;
3704   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3705     Device = C->getDevice();
3706   }
3707 
3708   // Check if we have an if clause whose conditional always evaluates to false
3709   // or if we do not have any targets specified. If so the target region is not
3710   // an offload entry point.
3711   bool IsOffloadEntry = true;
3712   if (IfCond) {
3713     bool Val;
3714     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3715       IsOffloadEntry = false;
3716   }
3717   if (CGM.getLangOpts().OMPTargetTriples.empty())
3718     IsOffloadEntry = false;
3719 
3720   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3721   StringRef ParentName;
3722   // In case we have Ctors/Dtors we use the complete type variant to produce
3723   // the mangling of the device outlined kernel.
3724   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3725     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3726   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3727     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3728   else
3729     ParentName =
3730         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3731 
3732   // Emit target region as a standalone region.
3733   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3734                                                     IsOffloadEntry, CodeGen);
3735   OMPLexicalScope Scope(CGF, S);
3736   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3737   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3738   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3739                                         CapturedVars);
3740 }
3741 
3742 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3743                              PrePostActionTy &Action) {
3744   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3745   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3746   CGF.EmitOMPPrivateClause(S, PrivateScope);
3747   (void)PrivateScope.Privatize();
3748 
3749   Action.Enter(CGF);
3750   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3751 }
3752 
3753 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3754                                                   StringRef ParentName,
3755                                                   const OMPTargetDirective &S) {
3756   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3757     emitTargetRegion(CGF, S, Action);
3758   };
3759   llvm::Function *Fn;
3760   llvm::Constant *Addr;
3761   // Emit target region as a standalone region.
3762   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3763       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3764   assert(Fn && Addr && "Target device function emission failed.");
3765 }
3766 
3767 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3768   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3769     emitTargetRegion(CGF, S, Action);
3770   };
3771   emitCommonOMPTargetDirective(*this, S, CodeGen);
3772 }
3773 
3774 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3775                                         const OMPExecutableDirective &S,
3776                                         OpenMPDirectiveKind InnermostKind,
3777                                         const RegionCodeGenTy &CodeGen) {
3778   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3779   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3780       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3781 
3782   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3783   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3784   if (NT || TL) {
3785     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3786     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3787 
3788     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3789                                                   S.getLocStart());
3790   }
3791 
3792   OMPTeamsScope Scope(CGF, S);
3793   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3794   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3795   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3796                                            CapturedVars);
3797 }
3798 
3799 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3800   // Emit teams region as a standalone region.
3801   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3802     OMPPrivateScope PrivateScope(CGF);
3803     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3804     CGF.EmitOMPPrivateClause(S, PrivateScope);
3805     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3806     (void)PrivateScope.Privatize();
3807     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3808     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3809   };
3810   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3811   emitPostUpdateForReductionClause(
3812       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3813 }
3814 
3815 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3816                                   const OMPTargetTeamsDirective &S) {
3817   auto *CS = S.getCapturedStmt(OMPD_teams);
3818   Action.Enter(CGF);
3819   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3820     // TODO: Add support for clauses.
3821     CGF.EmitStmt(CS->getCapturedStmt());
3822   };
3823   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3824 }
3825 
3826 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3827     CodeGenModule &CGM, StringRef ParentName,
3828     const OMPTargetTeamsDirective &S) {
3829   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3830     emitTargetTeamsRegion(CGF, Action, S);
3831   };
3832   llvm::Function *Fn;
3833   llvm::Constant *Addr;
3834   // Emit target region as a standalone region.
3835   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3836       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3837   assert(Fn && Addr && "Target device function emission failed.");
3838 }
3839 
3840 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3841     const OMPTargetTeamsDirective &S) {
3842   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3843     emitTargetTeamsRegion(CGF, Action, S);
3844   };
3845   emitCommonOMPTargetDirective(*this, S, CodeGen);
3846 }
3847 
3848 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
3849     const OMPTeamsDistributeDirective &S) {
3850 
3851   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3852     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3853   };
3854 
3855   // Emit teams region as a standalone region.
3856   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3857                                             PrePostActionTy &) {
3858     OMPPrivateScope PrivateScope(CGF);
3859     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3860     (void)PrivateScope.Privatize();
3861     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3862                                                     CodeGenDistribute);
3863     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3864   };
3865   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3866   emitPostUpdateForReductionClause(*this, S,
3867                                    [](CodeGenFunction &) { return nullptr; });
3868 }
3869 
3870 void CodeGenFunction::EmitOMPCancellationPointDirective(
3871     const OMPCancellationPointDirective &S) {
3872   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3873                                                    S.getCancelRegion());
3874 }
3875 
3876 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3877   const Expr *IfCond = nullptr;
3878   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3879     if (C->getNameModifier() == OMPD_unknown ||
3880         C->getNameModifier() == OMPD_cancel) {
3881       IfCond = C->getCondition();
3882       break;
3883     }
3884   }
3885   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3886                                         S.getCancelRegion());
3887 }
3888 
3889 CodeGenFunction::JumpDest
3890 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3891   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3892       Kind == OMPD_target_parallel)
3893     return ReturnBlock;
3894   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3895          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3896          Kind == OMPD_distribute_parallel_for ||
3897          Kind == OMPD_target_parallel_for);
3898   return OMPCancelStack.getExitBlock();
3899 }
3900 
3901 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3902     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3903     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3904   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3905   auto OrigVarIt = C.varlist_begin();
3906   auto InitIt = C.inits().begin();
3907   for (auto PvtVarIt : C.private_copies()) {
3908     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3909     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3910     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3911 
3912     // In order to identify the right initializer we need to match the
3913     // declaration used by the mapping logic. In some cases we may get
3914     // OMPCapturedExprDecl that refers to the original declaration.
3915     const ValueDecl *MatchingVD = OrigVD;
3916     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3917       // OMPCapturedExprDecl are used to privative fields of the current
3918       // structure.
3919       auto *ME = cast<MemberExpr>(OED->getInit());
3920       assert(isa<CXXThisExpr>(ME->getBase()) &&
3921              "Base should be the current struct!");
3922       MatchingVD = ME->getMemberDecl();
3923     }
3924 
3925     // If we don't have information about the current list item, move on to
3926     // the next one.
3927     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3928     if (InitAddrIt == CaptureDeviceAddrMap.end())
3929       continue;
3930 
3931     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3932       // Initialize the temporary initialization variable with the address we
3933       // get from the runtime library. We have to cast the source address
3934       // because it is always a void *. References are materialized in the
3935       // privatization scope, so the initialization here disregards the fact
3936       // the original variable is a reference.
3937       QualType AddrQTy =
3938           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3939       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3940       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3941       setAddrOfLocalVar(InitVD, InitAddr);
3942 
3943       // Emit private declaration, it will be initialized by the value we
3944       // declaration we just added to the local declarations map.
3945       EmitDecl(*PvtVD);
3946 
3947       // The initialization variables reached its purpose in the emission
3948       // ofthe previous declaration, so we don't need it anymore.
3949       LocalDeclMap.erase(InitVD);
3950 
3951       // Return the address of the private variable.
3952       return GetAddrOfLocalVar(PvtVD);
3953     });
3954     assert(IsRegistered && "firstprivate var already registered as private");
3955     // Silence the warning about unused variable.
3956     (void)IsRegistered;
3957 
3958     ++OrigVarIt;
3959     ++InitIt;
3960   }
3961 }
3962 
3963 // Generate the instructions for '#pragma omp target data' directive.
3964 void CodeGenFunction::EmitOMPTargetDataDirective(
3965     const OMPTargetDataDirective &S) {
3966   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3967 
3968   // Create a pre/post action to signal the privatization of the device pointer.
3969   // This action can be replaced by the OpenMP runtime code generation to
3970   // deactivate privatization.
3971   bool PrivatizeDevicePointers = false;
3972   class DevicePointerPrivActionTy : public PrePostActionTy {
3973     bool &PrivatizeDevicePointers;
3974 
3975   public:
3976     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3977         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3978     void Enter(CodeGenFunction &CGF) override {
3979       PrivatizeDevicePointers = true;
3980     }
3981   };
3982   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3983 
3984   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3985       CodeGenFunction &CGF, PrePostActionTy &Action) {
3986     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3987       CGF.EmitStmt(
3988           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3989     };
3990 
3991     // Codegen that selects wheather to generate the privatization code or not.
3992     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3993                           &InnermostCodeGen](CodeGenFunction &CGF,
3994                                              PrePostActionTy &Action) {
3995       RegionCodeGenTy RCG(InnermostCodeGen);
3996       PrivatizeDevicePointers = false;
3997 
3998       // Call the pre-action to change the status of PrivatizeDevicePointers if
3999       // needed.
4000       Action.Enter(CGF);
4001 
4002       if (PrivatizeDevicePointers) {
4003         OMPPrivateScope PrivateScope(CGF);
4004         // Emit all instances of the use_device_ptr clause.
4005         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4006           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4007                                         Info.CaptureDeviceAddrMap);
4008         (void)PrivateScope.Privatize();
4009         RCG(CGF);
4010       } else
4011         RCG(CGF);
4012     };
4013 
4014     // Forward the provided action to the privatization codegen.
4015     RegionCodeGenTy PrivRCG(PrivCodeGen);
4016     PrivRCG.setAction(Action);
4017 
4018     // Notwithstanding the body of the region is emitted as inlined directive,
4019     // we don't use an inline scope as changes in the references inside the
4020     // region are expected to be visible outside, so we do not privative them.
4021     OMPLexicalScope Scope(CGF, S);
4022     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4023                                                     PrivRCG);
4024   };
4025 
4026   RegionCodeGenTy RCG(CodeGen);
4027 
4028   // If we don't have target devices, don't bother emitting the data mapping
4029   // code.
4030   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4031     RCG(*this);
4032     return;
4033   }
4034 
4035   // Check if we have any if clause associated with the directive.
4036   const Expr *IfCond = nullptr;
4037   if (auto *C = S.getSingleClause<OMPIfClause>())
4038     IfCond = C->getCondition();
4039 
4040   // Check if we have any device clause associated with the directive.
4041   const Expr *Device = nullptr;
4042   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4043     Device = C->getDevice();
4044 
4045   // Set the action to signal privatization of device pointers.
4046   RCG.setAction(PrivAction);
4047 
4048   // Emit region code.
4049   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4050                                              Info);
4051 }
4052 
4053 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4054     const OMPTargetEnterDataDirective &S) {
4055   // If we don't have target devices, don't bother emitting the data mapping
4056   // code.
4057   if (CGM.getLangOpts().OMPTargetTriples.empty())
4058     return;
4059 
4060   // Check if we have any if clause associated with the directive.
4061   const Expr *IfCond = nullptr;
4062   if (auto *C = S.getSingleClause<OMPIfClause>())
4063     IfCond = C->getCondition();
4064 
4065   // Check if we have any device clause associated with the directive.
4066   const Expr *Device = nullptr;
4067   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4068     Device = C->getDevice();
4069 
4070   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4071 }
4072 
4073 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4074     const OMPTargetExitDataDirective &S) {
4075   // If we don't have target devices, don't bother emitting the data mapping
4076   // code.
4077   if (CGM.getLangOpts().OMPTargetTriples.empty())
4078     return;
4079 
4080   // Check if we have any if clause associated with the directive.
4081   const Expr *IfCond = nullptr;
4082   if (auto *C = S.getSingleClause<OMPIfClause>())
4083     IfCond = C->getCondition();
4084 
4085   // Check if we have any device clause associated with the directive.
4086   const Expr *Device = nullptr;
4087   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4088     Device = C->getDevice();
4089 
4090   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4091 }
4092 
4093 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4094                                      const OMPTargetParallelDirective &S,
4095                                      PrePostActionTy &Action) {
4096   // Get the captured statement associated with the 'parallel' region.
4097   auto *CS = S.getCapturedStmt(OMPD_parallel);
4098   Action.Enter(CGF);
4099   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4100     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4101     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4102     CGF.EmitOMPPrivateClause(S, PrivateScope);
4103     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4104     (void)PrivateScope.Privatize();
4105     // TODO: Add support for clauses.
4106     CGF.EmitStmt(CS->getCapturedStmt());
4107     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4108   };
4109   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4110                                  emitEmptyBoundParameters);
4111   emitPostUpdateForReductionClause(
4112       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4113 }
4114 
4115 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4116     CodeGenModule &CGM, StringRef ParentName,
4117     const OMPTargetParallelDirective &S) {
4118   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4119     emitTargetParallelRegion(CGF, S, Action);
4120   };
4121   llvm::Function *Fn;
4122   llvm::Constant *Addr;
4123   // Emit target region as a standalone region.
4124   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4125       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4126   assert(Fn && Addr && "Target device function emission failed.");
4127 }
4128 
4129 void CodeGenFunction::EmitOMPTargetParallelDirective(
4130     const OMPTargetParallelDirective &S) {
4131   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4132     emitTargetParallelRegion(CGF, S, Action);
4133   };
4134   emitCommonOMPTargetDirective(*this, S, CodeGen);
4135 }
4136 
4137 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4138     const OMPTargetParallelForDirective &S) {
4139   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4140   CGM.getOpenMPRuntime().emitInlinedDirective(
4141       *this, OMPD_target_parallel_for,
4142       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4143         OMPLoopScope PreInitScope(CGF, S);
4144         CGF.EmitStmt(
4145             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
4146       });
4147 }
4148 
4149 /// Emit a helper variable and return corresponding lvalue.
4150 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4151                      const ImplicitParamDecl *PVD,
4152                      CodeGenFunction::OMPPrivateScope &Privates) {
4153   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4154   Privates.addPrivate(
4155       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4156 }
4157 
4158 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4159   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4160   // Emit outlined function for task construct.
4161   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4162   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4163   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4164   const Expr *IfCond = nullptr;
4165   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4166     if (C->getNameModifier() == OMPD_unknown ||
4167         C->getNameModifier() == OMPD_taskloop) {
4168       IfCond = C->getCondition();
4169       break;
4170     }
4171   }
4172 
4173   OMPTaskDataTy Data;
4174   // Check if taskloop must be emitted without taskgroup.
4175   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4176   // TODO: Check if we should emit tied or untied task.
4177   Data.Tied = true;
4178   // Set scheduling for taskloop
4179   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4180     // grainsize clause
4181     Data.Schedule.setInt(/*IntVal=*/false);
4182     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4183   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4184     // num_tasks clause
4185     Data.Schedule.setInt(/*IntVal=*/true);
4186     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4187   }
4188 
4189   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4190     // if (PreCond) {
4191     //   for (IV in 0..LastIteration) BODY;
4192     //   <Final counter/linear vars updates>;
4193     // }
4194     //
4195 
4196     // Emit: if (PreCond) - begin.
4197     // If the condition constant folds and can be elided, avoid emitting the
4198     // whole loop.
4199     bool CondConstant;
4200     llvm::BasicBlock *ContBlock = nullptr;
4201     OMPLoopScope PreInitScope(CGF, S);
4202     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4203       if (!CondConstant)
4204         return;
4205     } else {
4206       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4207       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4208       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4209                   CGF.getProfileCount(&S));
4210       CGF.EmitBlock(ThenBlock);
4211       CGF.incrementProfileCounter(&S);
4212     }
4213 
4214     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4215       CGF.EmitOMPSimdInit(S);
4216 
4217     OMPPrivateScope LoopScope(CGF);
4218     // Emit helper vars inits.
4219     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4220     auto *I = CS->getCapturedDecl()->param_begin();
4221     auto *LBP = std::next(I, LowerBound);
4222     auto *UBP = std::next(I, UpperBound);
4223     auto *STP = std::next(I, Stride);
4224     auto *LIP = std::next(I, LastIter);
4225     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4226              LoopScope);
4227     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4228              LoopScope);
4229     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4230     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4231              LoopScope);
4232     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4233     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4234     (void)LoopScope.Privatize();
4235     // Emit the loop iteration variable.
4236     const Expr *IVExpr = S.getIterationVariable();
4237     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4238     CGF.EmitVarDecl(*IVDecl);
4239     CGF.EmitIgnoredExpr(S.getInit());
4240 
4241     // Emit the iterations count variable.
4242     // If it is not a variable, Sema decided to calculate iterations count on
4243     // each iteration (e.g., it is foldable into a constant).
4244     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4245       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4246       // Emit calculation of the iterations count.
4247       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4248     }
4249 
4250     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4251                          S.getInc(),
4252                          [&S](CodeGenFunction &CGF) {
4253                            CGF.EmitOMPLoopBody(S, JumpDest());
4254                            CGF.EmitStopPoint(&S);
4255                          },
4256                          [](CodeGenFunction &) {});
4257     // Emit: if (PreCond) - end.
4258     if (ContBlock) {
4259       CGF.EmitBranch(ContBlock);
4260       CGF.EmitBlock(ContBlock, true);
4261     }
4262     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4263     if (HasLastprivateClause) {
4264       CGF.EmitOMPLastprivateClauseFinal(
4265           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4266           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4267               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4268               (*LIP)->getType(), S.getLocStart())));
4269     }
4270   };
4271   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4272                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4273                             const OMPTaskDataTy &Data) {
4274     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4275       OMPLoopScope PreInitScope(CGF, S);
4276       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4277                                                   OutlinedFn, SharedsTy,
4278                                                   CapturedStruct, IfCond, Data);
4279     };
4280     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4281                                                     CodeGen);
4282   };
4283   if (Data.Nogroup)
4284     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4285   else {
4286     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4287         *this,
4288         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4289                                         PrePostActionTy &Action) {
4290           Action.Enter(CGF);
4291           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4292         },
4293         S.getLocStart());
4294   }
4295 }
4296 
4297 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4298   EmitOMPTaskLoopBasedDirective(S);
4299 }
4300 
4301 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4302     const OMPTaskLoopSimdDirective &S) {
4303   EmitOMPTaskLoopBasedDirective(S);
4304 }
4305 
4306 // Generate the instructions for '#pragma omp target update' directive.
4307 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4308     const OMPTargetUpdateDirective &S) {
4309   // If we don't have target devices, don't bother emitting the data mapping
4310   // code.
4311   if (CGM.getLangOpts().OMPTargetTriples.empty())
4312     return;
4313 
4314   // Check if we have any if clause associated with the directive.
4315   const Expr *IfCond = nullptr;
4316   if (auto *C = S.getSingleClause<OMPIfClause>())
4317     IfCond = C->getCondition();
4318 
4319   // Check if we have any device clause associated with the directive.
4320   const Expr *Device = nullptr;
4321   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4322     Device = C->getDevice();
4323 
4324   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4325 }
4326