1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             assert(VD == VD->getCanonicalDecl() &&
69                         "Canonical decl must be captured.");
70             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
71                             isCapturedVar(CGF, VD) ||
72                                 (CGF.CapturedStmtInfo &&
73                                  InlinedShareds.isGlobalVarCaptured(VD)),
74                             VD->getType().getNonReferenceType(), VK_LValue,
75                             SourceLocation());
76             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
77               return CGF.EmitLValue(&DRE).getAddress();
78             });
79           }
80         }
81         (void)InlinedShareds.Privatize();
82       }
83     }
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S,
100                         /*AsInlined=*/false,
101                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S,
116                         /*AsInlined=*/false,
117                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 };
119 
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
123   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
125       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
126         for (const auto *I : PreInits->decls())
127           CGF.EmitVarDecl(cast<VarDecl>(*I));
128       }
129     }
130   }
131 
132 public:
133   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
134       : CodeGenFunction::RunCleanupsScope(CGF) {
135     emitPreInitStmt(CGF, S);
136   }
137 };
138 
139 } // namespace
140 
141 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
142   auto &C = getContext();
143   llvm::Value *Size = nullptr;
144   auto SizeInChars = C.getTypeSizeInChars(Ty);
145   if (SizeInChars.isZero()) {
146     // getTypeSizeInChars() returns 0 for a VLA.
147     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
148       llvm::Value *ArraySize;
149       std::tie(ArraySize, Ty) = getVLASize(VAT);
150       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
151     }
152     SizeInChars = C.getTypeSizeInChars(Ty);
153     if (SizeInChars.isZero())
154       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
155     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
156   } else
157     Size = CGM.getSize(SizeInChars);
158   return Size;
159 }
160 
161 void CodeGenFunction::GenerateOpenMPCapturedVars(
162     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
163   const RecordDecl *RD = S.getCapturedRecordDecl();
164   auto CurField = RD->field_begin();
165   auto CurCap = S.captures().begin();
166   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
167                                                  E = S.capture_init_end();
168        I != E; ++I, ++CurField, ++CurCap) {
169     if (CurField->hasCapturedVLAType()) {
170       auto VAT = CurField->getCapturedVLAType();
171       auto *Val = VLASizeMap[VAT->getSizeExpr()];
172       CapturedVars.push_back(Val);
173     } else if (CurCap->capturesThis())
174       CapturedVars.push_back(CXXThisValue);
175     else if (CurCap->capturesVariableByCopy()) {
176       llvm::Value *CV =
177           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
178 
179       // If the field is not a pointer, we need to save the actual value
180       // and load it as a void pointer.
181       if (!CurField->getType()->isAnyPointerType()) {
182         auto &Ctx = getContext();
183         auto DstAddr = CreateMemTemp(
184             Ctx.getUIntPtrType(),
185             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
186         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
187 
188         auto *SrcAddrVal = EmitScalarConversion(
189             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
190             Ctx.getPointerType(CurField->getType()), SourceLocation());
191         LValue SrcLV =
192             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
193 
194         // Store the value using the source type pointer.
195         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
196 
197         // Load the value using the destination type pointer.
198         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
199       }
200       CapturedVars.push_back(CV);
201     } else {
202       assert(CurCap->capturesVariable() && "Expected capture by reference.");
203       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
204     }
205   }
206 }
207 
208 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
209                                     StringRef Name, LValue AddrLV,
210                                     bool isReferenceType = false) {
211   ASTContext &Ctx = CGF.getContext();
212 
213   auto *CastedPtr = CGF.EmitScalarConversion(
214       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
215       Ctx.getPointerType(DstType), SourceLocation());
216   auto TmpAddr =
217       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
218           .getAddress();
219 
220   // If we are dealing with references we need to return the address of the
221   // reference instead of the reference of the value.
222   if (isReferenceType) {
223     QualType RefType = Ctx.getLValueReferenceType(DstType);
224     auto *RefVal = TmpAddr.getPointer();
225     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
226     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
227     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
228   }
229 
230   return TmpAddr;
231 }
232 
233 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
234   if (T->isLValueReferenceType()) {
235     return C.getLValueReferenceType(
236         getCanonicalParamType(C, T.getNonReferenceType()),
237         /*SpelledAsLValue=*/false);
238   }
239   if (T->isPointerType())
240     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
241   return C.getCanonicalParamType(T);
242 }
243 
244 namespace {
245   /// Contains required data for proper outlined function codegen.
246   struct FunctionOptions {
247     /// Captured statement for which the function is generated.
248     const CapturedStmt *S = nullptr;
249     /// true if cast to/from  UIntPtr is required for variables captured by
250     /// value.
251     const bool UIntPtrCastRequired = true;
252     /// true if only casted arguments must be registered as local args or VLA
253     /// sizes.
254     const bool RegisterCastedArgsOnly = false;
255     /// Name of the generated function.
256     const StringRef FunctionName;
257     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
258                              bool RegisterCastedArgsOnly,
259                              StringRef FunctionName)
260         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
261           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
262           FunctionName(FunctionName) {}
263   };
264 }
265 
266 static llvm::Function *emitOutlinedFunctionPrologue(
267     CodeGenFunction &CGF, FunctionArgList &Args,
268     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
269         &LocalAddrs,
270     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
271         &VLASizes,
272     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
273   const CapturedDecl *CD = FO.S->getCapturedDecl();
274   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
275   assert(CD->hasBody() && "missing CapturedDecl body");
276 
277   CXXThisValue = nullptr;
278   // Build the argument list.
279   CodeGenModule &CGM = CGF.CGM;
280   ASTContext &Ctx = CGM.getContext();
281   FunctionArgList TargetArgs;
282   Args.append(CD->param_begin(),
283               std::next(CD->param_begin(), CD->getContextParamPosition()));
284   TargetArgs.append(
285       CD->param_begin(),
286       std::next(CD->param_begin(), CD->getContextParamPosition()));
287   auto I = FO.S->captures().begin();
288   for (auto *FD : RD->fields()) {
289     QualType ArgType = FD->getType();
290     IdentifierInfo *II = nullptr;
291     VarDecl *CapVar = nullptr;
292 
293     // If this is a capture by copy and the type is not a pointer, the outlined
294     // function argument type should be uintptr and the value properly casted to
295     // uintptr. This is necessary given that the runtime library is only able to
296     // deal with pointers. We can pass in the same way the VLA type sizes to the
297     // outlined function.
298     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
299         I->capturesVariableArrayType()) {
300       if (FO.UIntPtrCastRequired)
301         ArgType = Ctx.getUIntPtrType();
302     }
303 
304     if (I->capturesVariable() || I->capturesVariableByCopy()) {
305       CapVar = I->getCapturedVar();
306       II = CapVar->getIdentifier();
307     } else if (I->capturesThis())
308       II = &Ctx.Idents.get("this");
309     else {
310       assert(I->capturesVariableArrayType());
311       II = &Ctx.Idents.get("vla");
312     }
313     if (ArgType->isVariablyModifiedType())
314       ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
315     auto *Arg =
316         ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
317                                   ArgType, ImplicitParamDecl::Other);
318     Args.emplace_back(Arg);
319     // Do not cast arguments if we emit function with non-original types.
320     TargetArgs.emplace_back(
321         FO.UIntPtrCastRequired
322             ? Arg
323             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
324     ++I;
325   }
326   Args.append(
327       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
328       CD->param_end());
329   TargetArgs.append(
330       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
331       CD->param_end());
332 
333   // Create the function declaration.
334   FunctionType::ExtInfo ExtInfo;
335   const CGFunctionInfo &FuncInfo =
336       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
337   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
338 
339   llvm::Function *F =
340       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
341                              FO.FunctionName, &CGM.getModule());
342   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
343   if (CD->isNothrow())
344     F->setDoesNotThrow();
345 
346   // Generate the function.
347   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
348                     FO.S->getLocStart(), CD->getBody()->getLocStart());
349   unsigned Cnt = CD->getContextParamPosition();
350   I = FO.S->captures().begin();
351   for (auto *FD : RD->fields()) {
352     // Do not map arguments if we emit function with non-original types.
353     Address LocalAddr(Address::invalid());
354     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
355       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
356                                                              TargetArgs[Cnt]);
357     } else {
358       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
359     }
360     // If we are capturing a pointer by copy we don't need to do anything, just
361     // use the value that we get from the arguments.
362     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
363       const VarDecl *CurVD = I->getCapturedVar();
364       // If the variable is a reference we need to materialize it here.
365       if (CurVD->getType()->isReferenceType()) {
366         Address RefAddr = CGF.CreateMemTemp(
367             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
368         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
369                               /*Volatile=*/false, CurVD->getType());
370         LocalAddr = RefAddr;
371       }
372       if (!FO.RegisterCastedArgsOnly)
373         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
374       ++Cnt;
375       ++I;
376       continue;
377     }
378 
379     LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
380     LValue ArgLVal =
381         CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo);
382     if (FD->hasCapturedVLAType()) {
383       if (FO.UIntPtrCastRequired) {
384         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
385                                                           Args[Cnt]->getName(),
386                                                           ArgLVal),
387                                      FD->getType(), BaseInfo);
388       }
389       auto *ExprArg =
390           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
391       auto VAT = FD->getCapturedVLAType();
392       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
393     } else if (I->capturesVariable()) {
394       auto *Var = I->getCapturedVar();
395       QualType VarTy = Var->getType();
396       Address ArgAddr = ArgLVal.getAddress();
397       if (!VarTy->isReferenceType()) {
398         if (ArgLVal.getType()->isLValueReferenceType()) {
399           ArgAddr = CGF.EmitLoadOfReference(
400               ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
401         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
402           assert(ArgLVal.getType()->isPointerType());
403           ArgAddr = CGF.EmitLoadOfPointer(
404               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
405         }
406       }
407       if (!FO.RegisterCastedArgsOnly) {
408         LocalAddrs.insert(
409             {Args[Cnt],
410              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
411       }
412     } else if (I->capturesVariableByCopy()) {
413       assert(!FD->getType()->isAnyPointerType() &&
414              "Not expecting a captured pointer.");
415       auto *Var = I->getCapturedVar();
416       QualType VarTy = Var->getType();
417       LocalAddrs.insert(
418           {Args[Cnt],
419            {Var,
420             FO.UIntPtrCastRequired
421                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
422                                        ArgLVal, VarTy->isReferenceType())
423                 : ArgLVal.getAddress()}});
424     } else {
425       // If 'this' is captured, load it into CXXThisValue.
426       assert(I->capturesThis());
427       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
428                          .getScalarVal();
429       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
430     }
431     ++Cnt;
432     ++I;
433   }
434 
435   return F;
436 }
437 
438 llvm::Function *
439 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
440   assert(
441       CapturedStmtInfo &&
442       "CapturedStmtInfo should be set when generating the captured function");
443   const CapturedDecl *CD = S.getCapturedDecl();
444   // Build the argument list.
445   bool NeedWrapperFunction =
446       getDebugInfo() &&
447       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
448   FunctionArgList Args;
449   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
450   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
451   SmallString<256> Buffer;
452   llvm::raw_svector_ostream Out(Buffer);
453   Out << CapturedStmtInfo->getHelperName();
454   if (NeedWrapperFunction)
455     Out << "_debug__";
456   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
457                      Out.str());
458   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
459                                                    VLASizes, CXXThisValue, FO);
460   for (const auto &LocalAddrPair : LocalAddrs) {
461     if (LocalAddrPair.second.first) {
462       setAddrOfLocalVar(LocalAddrPair.second.first,
463                         LocalAddrPair.second.second);
464     }
465   }
466   for (const auto &VLASizePair : VLASizes)
467     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
468   PGO.assignRegionCounters(GlobalDecl(CD), F);
469   CapturedStmtInfo->EmitBody(*this, CD->getBody());
470   FinishFunction(CD->getBodyRBrace());
471   if (!NeedWrapperFunction)
472     return F;
473 
474   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
475                             /*RegisterCastedArgsOnly=*/true,
476                             CapturedStmtInfo->getHelperName());
477   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
478   Args.clear();
479   LocalAddrs.clear();
480   VLASizes.clear();
481   llvm::Function *WrapperF =
482       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
483                                    WrapperCGF.CXXThisValue, WrapperFO);
484   LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
485   llvm::SmallVector<llvm::Value *, 4> CallArgs;
486   for (const auto *Arg : Args) {
487     llvm::Value *CallArg;
488     auto I = LocalAddrs.find(Arg);
489     if (I != LocalAddrs.end()) {
490       LValue LV =
491           WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo);
492       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
493     } else {
494       auto EI = VLASizes.find(Arg);
495       if (EI != VLASizes.end())
496         CallArg = EI->second.second;
497       else {
498         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
499                                               Arg->getType(), BaseInfo);
500         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
501       }
502     }
503     CallArgs.emplace_back(CallArg);
504   }
505   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
506                                                   F, CallArgs);
507   WrapperCGF.FinishFunction();
508   return WrapperF;
509 }
510 
511 //===----------------------------------------------------------------------===//
512 //                              OpenMP Directive Emission
513 //===----------------------------------------------------------------------===//
514 void CodeGenFunction::EmitOMPAggregateAssign(
515     Address DestAddr, Address SrcAddr, QualType OriginalType,
516     const llvm::function_ref<void(Address, Address)> &CopyGen) {
517   // Perform element-by-element initialization.
518   QualType ElementTy;
519 
520   // Drill down to the base element type on both arrays.
521   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
522   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
523   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
524 
525   auto SrcBegin = SrcAddr.getPointer();
526   auto DestBegin = DestAddr.getPointer();
527   // Cast from pointer to array type to pointer to single element.
528   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
529   // The basic structure here is a while-do loop.
530   auto BodyBB = createBasicBlock("omp.arraycpy.body");
531   auto DoneBB = createBasicBlock("omp.arraycpy.done");
532   auto IsEmpty =
533       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
534   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
535 
536   // Enter the loop body, making that address the current address.
537   auto EntryBB = Builder.GetInsertBlock();
538   EmitBlock(BodyBB);
539 
540   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
541 
542   llvm::PHINode *SrcElementPHI =
543     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
544   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
545   Address SrcElementCurrent =
546       Address(SrcElementPHI,
547               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
548 
549   llvm::PHINode *DestElementPHI =
550     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
551   DestElementPHI->addIncoming(DestBegin, EntryBB);
552   Address DestElementCurrent =
553     Address(DestElementPHI,
554             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
555 
556   // Emit copy.
557   CopyGen(DestElementCurrent, SrcElementCurrent);
558 
559   // Shift the address forward by one element.
560   auto DestElementNext = Builder.CreateConstGEP1_32(
561       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
562   auto SrcElementNext = Builder.CreateConstGEP1_32(
563       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
564   // Check whether we've reached the end.
565   auto Done =
566       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
567   Builder.CreateCondBr(Done, DoneBB, BodyBB);
568   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
569   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
570 
571   // Done.
572   EmitBlock(DoneBB, /*IsFinished=*/true);
573 }
574 
575 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
576                                   Address SrcAddr, const VarDecl *DestVD,
577                                   const VarDecl *SrcVD, const Expr *Copy) {
578   if (OriginalType->isArrayType()) {
579     auto *BO = dyn_cast<BinaryOperator>(Copy);
580     if (BO && BO->getOpcode() == BO_Assign) {
581       // Perform simple memcpy for simple copying.
582       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
583     } else {
584       // For arrays with complex element types perform element by element
585       // copying.
586       EmitOMPAggregateAssign(
587           DestAddr, SrcAddr, OriginalType,
588           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
589             // Working with the single array element, so have to remap
590             // destination and source variables to corresponding array
591             // elements.
592             CodeGenFunction::OMPPrivateScope Remap(*this);
593             Remap.addPrivate(DestVD, [DestElement]() -> Address {
594               return DestElement;
595             });
596             Remap.addPrivate(
597                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
598             (void)Remap.Privatize();
599             EmitIgnoredExpr(Copy);
600           });
601     }
602   } else {
603     // Remap pseudo source variable to private copy.
604     CodeGenFunction::OMPPrivateScope Remap(*this);
605     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
606     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
607     (void)Remap.Privatize();
608     // Emit copying of the whole variable.
609     EmitIgnoredExpr(Copy);
610   }
611 }
612 
613 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
614                                                 OMPPrivateScope &PrivateScope) {
615   if (!HaveInsertPoint())
616     return false;
617   bool FirstprivateIsLastprivate = false;
618   llvm::DenseSet<const VarDecl *> Lastprivates;
619   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
620     for (const auto *D : C->varlists())
621       Lastprivates.insert(
622           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
623   }
624   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
625   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
626   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
627     auto IRef = C->varlist_begin();
628     auto InitsRef = C->inits().begin();
629     for (auto IInit : C->private_copies()) {
630       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
631       bool ThisFirstprivateIsLastprivate =
632           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
633       auto *CapFD = CapturesInfo.lookup(OrigVD);
634       auto *FD = CapturedStmtInfo->lookup(OrigVD);
635       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
636           !FD->getType()->isReferenceType()) {
637         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
638         ++IRef;
639         ++InitsRef;
640         continue;
641       }
642       FirstprivateIsLastprivate =
643           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
644       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
645         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
646         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
647         bool IsRegistered;
648         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
649                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
650                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
651         Address OriginalAddr = EmitLValue(&DRE).getAddress();
652         QualType Type = VD->getType();
653         if (Type->isArrayType()) {
654           // Emit VarDecl with copy init for arrays.
655           // Get the address of the original variable captured in current
656           // captured region.
657           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
658             auto Emission = EmitAutoVarAlloca(*VD);
659             auto *Init = VD->getInit();
660             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
661               // Perform simple memcpy.
662               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
663                                   Type);
664             } else {
665               EmitOMPAggregateAssign(
666                   Emission.getAllocatedAddress(), OriginalAddr, Type,
667                   [this, VDInit, Init](Address DestElement,
668                                        Address SrcElement) {
669                     // Clean up any temporaries needed by the initialization.
670                     RunCleanupsScope InitScope(*this);
671                     // Emit initialization for single element.
672                     setAddrOfLocalVar(VDInit, SrcElement);
673                     EmitAnyExprToMem(Init, DestElement,
674                                      Init->getType().getQualifiers(),
675                                      /*IsInitializer*/ false);
676                     LocalDeclMap.erase(VDInit);
677                   });
678             }
679             EmitAutoVarCleanups(Emission);
680             return Emission.getAllocatedAddress();
681           });
682         } else {
683           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
684             // Emit private VarDecl with copy init.
685             // Remap temp VDInit variable to the address of the original
686             // variable
687             // (for proper handling of captured global variables).
688             setAddrOfLocalVar(VDInit, OriginalAddr);
689             EmitDecl(*VD);
690             LocalDeclMap.erase(VDInit);
691             return GetAddrOfLocalVar(VD);
692           });
693         }
694         assert(IsRegistered &&
695                "firstprivate var already registered as private");
696         // Silence the warning about unused variable.
697         (void)IsRegistered;
698       }
699       ++IRef;
700       ++InitsRef;
701     }
702   }
703   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
704 }
705 
706 void CodeGenFunction::EmitOMPPrivateClause(
707     const OMPExecutableDirective &D,
708     CodeGenFunction::OMPPrivateScope &PrivateScope) {
709   if (!HaveInsertPoint())
710     return;
711   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
712   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
713     auto IRef = C->varlist_begin();
714     for (auto IInit : C->private_copies()) {
715       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
716       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
717         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
718         bool IsRegistered =
719             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
720               // Emit private VarDecl with copy init.
721               EmitDecl(*VD);
722               return GetAddrOfLocalVar(VD);
723             });
724         assert(IsRegistered && "private var already registered as private");
725         // Silence the warning about unused variable.
726         (void)IsRegistered;
727       }
728       ++IRef;
729     }
730   }
731 }
732 
733 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
734   if (!HaveInsertPoint())
735     return false;
736   // threadprivate_var1 = master_threadprivate_var1;
737   // operator=(threadprivate_var2, master_threadprivate_var2);
738   // ...
739   // __kmpc_barrier(&loc, global_tid);
740   llvm::DenseSet<const VarDecl *> CopiedVars;
741   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
742   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
743     auto IRef = C->varlist_begin();
744     auto ISrcRef = C->source_exprs().begin();
745     auto IDestRef = C->destination_exprs().begin();
746     for (auto *AssignOp : C->assignment_ops()) {
747       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
748       QualType Type = VD->getType();
749       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
750         // Get the address of the master variable. If we are emitting code with
751         // TLS support, the address is passed from the master as field in the
752         // captured declaration.
753         Address MasterAddr = Address::invalid();
754         if (getLangOpts().OpenMPUseTLS &&
755             getContext().getTargetInfo().isTLSSupported()) {
756           assert(CapturedStmtInfo->lookup(VD) &&
757                  "Copyin threadprivates should have been captured!");
758           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
759                           VK_LValue, (*IRef)->getExprLoc());
760           MasterAddr = EmitLValue(&DRE).getAddress();
761           LocalDeclMap.erase(VD);
762         } else {
763           MasterAddr =
764             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
765                                         : CGM.GetAddrOfGlobal(VD),
766                     getContext().getDeclAlign(VD));
767         }
768         // Get the address of the threadprivate variable.
769         Address PrivateAddr = EmitLValue(*IRef).getAddress();
770         if (CopiedVars.size() == 1) {
771           // At first check if current thread is a master thread. If it is, no
772           // need to copy data.
773           CopyBegin = createBasicBlock("copyin.not.master");
774           CopyEnd = createBasicBlock("copyin.not.master.end");
775           Builder.CreateCondBr(
776               Builder.CreateICmpNE(
777                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
778                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
779               CopyBegin, CopyEnd);
780           EmitBlock(CopyBegin);
781         }
782         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
783         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
784         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
785       }
786       ++IRef;
787       ++ISrcRef;
788       ++IDestRef;
789     }
790   }
791   if (CopyEnd) {
792     // Exit out of copying procedure for non-master thread.
793     EmitBlock(CopyEnd, /*IsFinished=*/true);
794     return true;
795   }
796   return false;
797 }
798 
799 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
800     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
801   if (!HaveInsertPoint())
802     return false;
803   bool HasAtLeastOneLastprivate = false;
804   llvm::DenseSet<const VarDecl *> SIMDLCVs;
805   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
806     auto *LoopDirective = cast<OMPLoopDirective>(&D);
807     for (auto *C : LoopDirective->counters()) {
808       SIMDLCVs.insert(
809           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
810     }
811   }
812   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
813   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
814     HasAtLeastOneLastprivate = true;
815     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
816       break;
817     auto IRef = C->varlist_begin();
818     auto IDestRef = C->destination_exprs().begin();
819     for (auto *IInit : C->private_copies()) {
820       // Keep the address of the original variable for future update at the end
821       // of the loop.
822       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
823       // Taskloops do not require additional initialization, it is done in
824       // runtime support library.
825       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
826         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
827         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
828           DeclRefExpr DRE(
829               const_cast<VarDecl *>(OrigVD),
830               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
831                   OrigVD) != nullptr,
832               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
833           return EmitLValue(&DRE).getAddress();
834         });
835         // Check if the variable is also a firstprivate: in this case IInit is
836         // not generated. Initialization of this variable will happen in codegen
837         // for 'firstprivate' clause.
838         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
839           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
840           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
841             // Emit private VarDecl with copy init.
842             EmitDecl(*VD);
843             return GetAddrOfLocalVar(VD);
844           });
845           assert(IsRegistered &&
846                  "lastprivate var already registered as private");
847           (void)IsRegistered;
848         }
849       }
850       ++IRef;
851       ++IDestRef;
852     }
853   }
854   return HasAtLeastOneLastprivate;
855 }
856 
857 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
858     const OMPExecutableDirective &D, bool NoFinals,
859     llvm::Value *IsLastIterCond) {
860   if (!HaveInsertPoint())
861     return;
862   // Emit following code:
863   // if (<IsLastIterCond>) {
864   //   orig_var1 = private_orig_var1;
865   //   ...
866   //   orig_varn = private_orig_varn;
867   // }
868   llvm::BasicBlock *ThenBB = nullptr;
869   llvm::BasicBlock *DoneBB = nullptr;
870   if (IsLastIterCond) {
871     ThenBB = createBasicBlock(".omp.lastprivate.then");
872     DoneBB = createBasicBlock(".omp.lastprivate.done");
873     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
874     EmitBlock(ThenBB);
875   }
876   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
877   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
878   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
879     auto IC = LoopDirective->counters().begin();
880     for (auto F : LoopDirective->finals()) {
881       auto *D =
882           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
883       if (NoFinals)
884         AlreadyEmittedVars.insert(D);
885       else
886         LoopCountersAndUpdates[D] = F;
887       ++IC;
888     }
889   }
890   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
891     auto IRef = C->varlist_begin();
892     auto ISrcRef = C->source_exprs().begin();
893     auto IDestRef = C->destination_exprs().begin();
894     for (auto *AssignOp : C->assignment_ops()) {
895       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
896       QualType Type = PrivateVD->getType();
897       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
898       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
899         // If lastprivate variable is a loop control variable for loop-based
900         // directive, update its value before copyin back to original
901         // variable.
902         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
903           EmitIgnoredExpr(FinalExpr);
904         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
905         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
906         // Get the address of the original variable.
907         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
908         // Get the address of the private variable.
909         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
910         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
911           PrivateAddr =
912               Address(Builder.CreateLoad(PrivateAddr),
913                       getNaturalTypeAlignment(RefTy->getPointeeType()));
914         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
915       }
916       ++IRef;
917       ++ISrcRef;
918       ++IDestRef;
919     }
920     if (auto *PostUpdate = C->getPostUpdateExpr())
921       EmitIgnoredExpr(PostUpdate);
922   }
923   if (IsLastIterCond)
924     EmitBlock(DoneBB, /*IsFinished=*/true);
925 }
926 
927 void CodeGenFunction::EmitOMPReductionClauseInit(
928     const OMPExecutableDirective &D,
929     CodeGenFunction::OMPPrivateScope &PrivateScope) {
930   if (!HaveInsertPoint())
931     return;
932   SmallVector<const Expr *, 4> Shareds;
933   SmallVector<const Expr *, 4> Privates;
934   SmallVector<const Expr *, 4> ReductionOps;
935   SmallVector<const Expr *, 4> LHSs;
936   SmallVector<const Expr *, 4> RHSs;
937   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
938     auto IPriv = C->privates().begin();
939     auto IRed = C->reduction_ops().begin();
940     auto ILHS = C->lhs_exprs().begin();
941     auto IRHS = C->rhs_exprs().begin();
942     for (const auto *Ref : C->varlists()) {
943       Shareds.emplace_back(Ref);
944       Privates.emplace_back(*IPriv);
945       ReductionOps.emplace_back(*IRed);
946       LHSs.emplace_back(*ILHS);
947       RHSs.emplace_back(*IRHS);
948       std::advance(IPriv, 1);
949       std::advance(IRed, 1);
950       std::advance(ILHS, 1);
951       std::advance(IRHS, 1);
952     }
953   }
954   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
955   unsigned Count = 0;
956   auto ILHS = LHSs.begin();
957   auto IRHS = RHSs.begin();
958   auto IPriv = Privates.begin();
959   for (const auto *IRef : Shareds) {
960     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
961     // Emit private VarDecl with reduction init.
962     RedCG.emitSharedLValue(*this, Count);
963     RedCG.emitAggregateType(*this, Count);
964     auto Emission = EmitAutoVarAlloca(*PrivateVD);
965     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
966                              RedCG.getSharedLValue(Count),
967                              [&Emission](CodeGenFunction &CGF) {
968                                CGF.EmitAutoVarInit(Emission);
969                                return true;
970                              });
971     EmitAutoVarCleanups(Emission);
972     Address BaseAddr = RedCG.adjustPrivateAddress(
973         *this, Count, Emission.getAllocatedAddress());
974     bool IsRegistered = PrivateScope.addPrivate(
975         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
976     assert(IsRegistered && "private var already registered as private");
977     // Silence the warning about unused variable.
978     (void)IsRegistered;
979 
980     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
981     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
982     if (isa<OMPArraySectionExpr>(IRef)) {
983       // Store the address of the original variable associated with the LHS
984       // implicit variable.
985       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
986         return RedCG.getSharedLValue(Count).getAddress();
987       });
988       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
989         return GetAddrOfLocalVar(PrivateVD);
990       });
991     } else if (isa<ArraySubscriptExpr>(IRef)) {
992       // Store the address of the original variable associated with the LHS
993       // implicit variable.
994       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
995         return RedCG.getSharedLValue(Count).getAddress();
996       });
997       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
998         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
999                                             ConvertTypeForMem(RHSVD->getType()),
1000                                             "rhs.begin");
1001       });
1002     } else {
1003       QualType Type = PrivateVD->getType();
1004       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1005       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1006       // Store the address of the original variable associated with the LHS
1007       // implicit variable.
1008       if (IsArray) {
1009         OriginalAddr = Builder.CreateElementBitCast(
1010             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1011       }
1012       PrivateScope.addPrivate(
1013           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1014       PrivateScope.addPrivate(
1015           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1016             return IsArray
1017                        ? Builder.CreateElementBitCast(
1018                              GetAddrOfLocalVar(PrivateVD),
1019                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1020                        : GetAddrOfLocalVar(PrivateVD);
1021           });
1022     }
1023     ++ILHS;
1024     ++IRHS;
1025     ++IPriv;
1026     ++Count;
1027   }
1028 }
1029 
1030 void CodeGenFunction::EmitOMPReductionClauseFinal(
1031     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1032   if (!HaveInsertPoint())
1033     return;
1034   llvm::SmallVector<const Expr *, 8> Privates;
1035   llvm::SmallVector<const Expr *, 8> LHSExprs;
1036   llvm::SmallVector<const Expr *, 8> RHSExprs;
1037   llvm::SmallVector<const Expr *, 8> ReductionOps;
1038   bool HasAtLeastOneReduction = false;
1039   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1040     HasAtLeastOneReduction = true;
1041     Privates.append(C->privates().begin(), C->privates().end());
1042     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1043     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1044     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1045   }
1046   if (HasAtLeastOneReduction) {
1047     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1048                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1049                       D.getDirectiveKind() == OMPD_simd;
1050     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1051     // Emit nowait reduction if nowait clause is present or directive is a
1052     // parallel directive (it always has implicit barrier).
1053     CGM.getOpenMPRuntime().emitReduction(
1054         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1055         {WithNowait, SimpleReduction, ReductionKind});
1056   }
1057 }
1058 
1059 static void emitPostUpdateForReductionClause(
1060     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1061     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1062   if (!CGF.HaveInsertPoint())
1063     return;
1064   llvm::BasicBlock *DoneBB = nullptr;
1065   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1066     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1067       if (!DoneBB) {
1068         if (auto *Cond = CondGen(CGF)) {
1069           // If the first post-update expression is found, emit conditional
1070           // block if it was requested.
1071           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1072           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1073           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1074           CGF.EmitBlock(ThenBB);
1075         }
1076       }
1077       CGF.EmitIgnoredExpr(PostUpdate);
1078     }
1079   }
1080   if (DoneBB)
1081     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1082 }
1083 
1084 namespace {
1085 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1086 /// parallel function. This is necessary for combined constructs such as
1087 /// 'distribute parallel for'
1088 typedef llvm::function_ref<void(CodeGenFunction &,
1089                                 const OMPExecutableDirective &,
1090                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1091     CodeGenBoundParametersTy;
1092 } // anonymous namespace
1093 
1094 static void emitCommonOMPParallelDirective(
1095     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1096     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1097     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1098   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1099   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1100       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1101   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1102     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1103     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1104                                          /*IgnoreResultAssign*/ true);
1105     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1106         CGF, NumThreads, NumThreadsClause->getLocStart());
1107   }
1108   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1109     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1110     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1111         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1112   }
1113   const Expr *IfCond = nullptr;
1114   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1115     if (C->getNameModifier() == OMPD_unknown ||
1116         C->getNameModifier() == OMPD_parallel) {
1117       IfCond = C->getCondition();
1118       break;
1119     }
1120   }
1121 
1122   OMPParallelScope Scope(CGF, S);
1123   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1124   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1125   // lower and upper bounds with the pragma 'for' chunking mechanism.
1126   // The following lambda takes care of appending the lower and upper bound
1127   // parameters when necessary
1128   CodeGenBoundParameters(CGF, S, CapturedVars);
1129   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1130   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1131                                               CapturedVars, IfCond);
1132 }
1133 
1134 static void emitEmptyBoundParameters(CodeGenFunction &,
1135                                      const OMPExecutableDirective &,
1136                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1137 
1138 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1139   // Emit parallel region as a standalone region.
1140   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1141     OMPPrivateScope PrivateScope(CGF);
1142     bool Copyins = CGF.EmitOMPCopyinClause(S);
1143     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1144     if (Copyins) {
1145       // Emit implicit barrier to synchronize threads and avoid data races on
1146       // propagation master's thread values of threadprivate variables to local
1147       // instances of that variables of all other implicit threads.
1148       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1149           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1150           /*ForceSimpleCall=*/true);
1151     }
1152     CGF.EmitOMPPrivateClause(S, PrivateScope);
1153     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1154     (void)PrivateScope.Privatize();
1155     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1156     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1157   };
1158   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1159                                  emitEmptyBoundParameters);
1160   emitPostUpdateForReductionClause(
1161       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1162 }
1163 
1164 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1165                                       JumpDest LoopExit) {
1166   RunCleanupsScope BodyScope(*this);
1167   // Update counters values on current iteration.
1168   for (auto I : D.updates()) {
1169     EmitIgnoredExpr(I);
1170   }
1171   // Update the linear variables.
1172   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1173     for (auto *U : C->updates())
1174       EmitIgnoredExpr(U);
1175   }
1176 
1177   // On a continue in the body, jump to the end.
1178   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1179   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1180   // Emit loop body.
1181   EmitStmt(D.getBody());
1182   // The end (updates/cleanups).
1183   EmitBlock(Continue.getBlock());
1184   BreakContinueStack.pop_back();
1185 }
1186 
1187 void CodeGenFunction::EmitOMPInnerLoop(
1188     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1189     const Expr *IncExpr,
1190     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1191     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1192   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1193 
1194   // Start the loop with a block that tests the condition.
1195   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1196   EmitBlock(CondBlock);
1197   const SourceRange &R = S.getSourceRange();
1198   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1199                  SourceLocToDebugLoc(R.getEnd()));
1200 
1201   // If there are any cleanups between here and the loop-exit scope,
1202   // create a block to stage a loop exit along.
1203   auto ExitBlock = LoopExit.getBlock();
1204   if (RequiresCleanup)
1205     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1206 
1207   auto LoopBody = createBasicBlock("omp.inner.for.body");
1208 
1209   // Emit condition.
1210   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1211   if (ExitBlock != LoopExit.getBlock()) {
1212     EmitBlock(ExitBlock);
1213     EmitBranchThroughCleanup(LoopExit);
1214   }
1215 
1216   EmitBlock(LoopBody);
1217   incrementProfileCounter(&S);
1218 
1219   // Create a block for the increment.
1220   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1221   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1222 
1223   BodyGen(*this);
1224 
1225   // Emit "IV = IV + 1" and a back-edge to the condition block.
1226   EmitBlock(Continue.getBlock());
1227   EmitIgnoredExpr(IncExpr);
1228   PostIncGen(*this);
1229   BreakContinueStack.pop_back();
1230   EmitBranch(CondBlock);
1231   LoopStack.pop();
1232   // Emit the fall-through block.
1233   EmitBlock(LoopExit.getBlock());
1234 }
1235 
1236 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1237   if (!HaveInsertPoint())
1238     return false;
1239   // Emit inits for the linear variables.
1240   bool HasLinears = false;
1241   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1242     for (auto *Init : C->inits()) {
1243       HasLinears = true;
1244       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1245       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1246         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1247         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1248         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1249                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1250                         VD->getInit()->getType(), VK_LValue,
1251                         VD->getInit()->getExprLoc());
1252         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1253                                                 VD->getType()),
1254                        /*capturedByInit=*/false);
1255         EmitAutoVarCleanups(Emission);
1256       } else
1257         EmitVarDecl(*VD);
1258     }
1259     // Emit the linear steps for the linear clauses.
1260     // If a step is not constant, it is pre-calculated before the loop.
1261     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1262       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1263         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1264         // Emit calculation of the linear step.
1265         EmitIgnoredExpr(CS);
1266       }
1267   }
1268   return HasLinears;
1269 }
1270 
1271 void CodeGenFunction::EmitOMPLinearClauseFinal(
1272     const OMPLoopDirective &D,
1273     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1274   if (!HaveInsertPoint())
1275     return;
1276   llvm::BasicBlock *DoneBB = nullptr;
1277   // Emit the final values of the linear variables.
1278   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1279     auto IC = C->varlist_begin();
1280     for (auto *F : C->finals()) {
1281       if (!DoneBB) {
1282         if (auto *Cond = CondGen(*this)) {
1283           // If the first post-update expression is found, emit conditional
1284           // block if it was requested.
1285           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1286           DoneBB = createBasicBlock(".omp.linear.pu.done");
1287           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1288           EmitBlock(ThenBB);
1289         }
1290       }
1291       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1292       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1293                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1294                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1295       Address OrigAddr = EmitLValue(&DRE).getAddress();
1296       CodeGenFunction::OMPPrivateScope VarScope(*this);
1297       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1298       (void)VarScope.Privatize();
1299       EmitIgnoredExpr(F);
1300       ++IC;
1301     }
1302     if (auto *PostUpdate = C->getPostUpdateExpr())
1303       EmitIgnoredExpr(PostUpdate);
1304   }
1305   if (DoneBB)
1306     EmitBlock(DoneBB, /*IsFinished=*/true);
1307 }
1308 
1309 static void emitAlignedClause(CodeGenFunction &CGF,
1310                               const OMPExecutableDirective &D) {
1311   if (!CGF.HaveInsertPoint())
1312     return;
1313   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1314     unsigned ClauseAlignment = 0;
1315     if (auto AlignmentExpr = Clause->getAlignment()) {
1316       auto AlignmentCI =
1317           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1318       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1319     }
1320     for (auto E : Clause->varlists()) {
1321       unsigned Alignment = ClauseAlignment;
1322       if (Alignment == 0) {
1323         // OpenMP [2.8.1, Description]
1324         // If no optional parameter is specified, implementation-defined default
1325         // alignments for SIMD instructions on the target platforms are assumed.
1326         Alignment =
1327             CGF.getContext()
1328                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1329                     E->getType()->getPointeeType()))
1330                 .getQuantity();
1331       }
1332       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1333              "alignment is not power of 2");
1334       if (Alignment != 0) {
1335         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1336         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1337       }
1338     }
1339   }
1340 }
1341 
1342 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1343     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1344   if (!HaveInsertPoint())
1345     return;
1346   auto I = S.private_counters().begin();
1347   for (auto *E : S.counters()) {
1348     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1349     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1350     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1351       // Emit var without initialization.
1352       if (!LocalDeclMap.count(PrivateVD)) {
1353         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1354         EmitAutoVarCleanups(VarEmission);
1355       }
1356       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1357                       /*RefersToEnclosingVariableOrCapture=*/false,
1358                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1359       return EmitLValue(&DRE).getAddress();
1360     });
1361     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1362         VD->hasGlobalStorage()) {
1363       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1364         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1365                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1366                         E->getType(), VK_LValue, E->getExprLoc());
1367         return EmitLValue(&DRE).getAddress();
1368       });
1369     }
1370     ++I;
1371   }
1372 }
1373 
1374 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1375                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1376                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1377   if (!CGF.HaveInsertPoint())
1378     return;
1379   {
1380     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1381     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1382     (void)PreCondScope.Privatize();
1383     // Get initial values of real counters.
1384     for (auto I : S.inits()) {
1385       CGF.EmitIgnoredExpr(I);
1386     }
1387   }
1388   // Check that loop is executed at least one time.
1389   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1390 }
1391 
1392 void CodeGenFunction::EmitOMPLinearClause(
1393     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1394   if (!HaveInsertPoint())
1395     return;
1396   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1397   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1398     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1399     for (auto *C : LoopDirective->counters()) {
1400       SIMDLCVs.insert(
1401           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1402     }
1403   }
1404   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1405     auto CurPrivate = C->privates().begin();
1406     for (auto *E : C->varlists()) {
1407       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1408       auto *PrivateVD =
1409           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1410       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1411         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1412           // Emit private VarDecl with copy init.
1413           EmitVarDecl(*PrivateVD);
1414           return GetAddrOfLocalVar(PrivateVD);
1415         });
1416         assert(IsRegistered && "linear var already registered as private");
1417         // Silence the warning about unused variable.
1418         (void)IsRegistered;
1419       } else
1420         EmitVarDecl(*PrivateVD);
1421       ++CurPrivate;
1422     }
1423   }
1424 }
1425 
1426 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1427                                      const OMPExecutableDirective &D,
1428                                      bool IsMonotonic) {
1429   if (!CGF.HaveInsertPoint())
1430     return;
1431   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1432     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1433                                  /*ignoreResult=*/true);
1434     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1435     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1436     // In presence of finite 'safelen', it may be unsafe to mark all
1437     // the memory instructions parallel, because loop-carried
1438     // dependences of 'safelen' iterations are possible.
1439     if (!IsMonotonic)
1440       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1441   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1442     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1443                                  /*ignoreResult=*/true);
1444     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1445     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1446     // In presence of finite 'safelen', it may be unsafe to mark all
1447     // the memory instructions parallel, because loop-carried
1448     // dependences of 'safelen' iterations are possible.
1449     CGF.LoopStack.setParallel(false);
1450   }
1451 }
1452 
1453 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1454                                       bool IsMonotonic) {
1455   // Walk clauses and process safelen/lastprivate.
1456   LoopStack.setParallel(!IsMonotonic);
1457   LoopStack.setVectorizeEnable(true);
1458   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1459 }
1460 
1461 void CodeGenFunction::EmitOMPSimdFinal(
1462     const OMPLoopDirective &D,
1463     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1464   if (!HaveInsertPoint())
1465     return;
1466   llvm::BasicBlock *DoneBB = nullptr;
1467   auto IC = D.counters().begin();
1468   auto IPC = D.private_counters().begin();
1469   for (auto F : D.finals()) {
1470     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1471     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1472     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1473     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1474         OrigVD->hasGlobalStorage() || CED) {
1475       if (!DoneBB) {
1476         if (auto *Cond = CondGen(*this)) {
1477           // If the first post-update expression is found, emit conditional
1478           // block if it was requested.
1479           auto *ThenBB = createBasicBlock(".omp.final.then");
1480           DoneBB = createBasicBlock(".omp.final.done");
1481           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1482           EmitBlock(ThenBB);
1483         }
1484       }
1485       Address OrigAddr = Address::invalid();
1486       if (CED)
1487         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1488       else {
1489         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1490                         /*RefersToEnclosingVariableOrCapture=*/false,
1491                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1492         OrigAddr = EmitLValue(&DRE).getAddress();
1493       }
1494       OMPPrivateScope VarScope(*this);
1495       VarScope.addPrivate(OrigVD,
1496                           [OrigAddr]() -> Address { return OrigAddr; });
1497       (void)VarScope.Privatize();
1498       EmitIgnoredExpr(F);
1499     }
1500     ++IC;
1501     ++IPC;
1502   }
1503   if (DoneBB)
1504     EmitBlock(DoneBB, /*IsFinished=*/true);
1505 }
1506 
1507 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1508                                          const OMPLoopDirective &S,
1509                                          CodeGenFunction::JumpDest LoopExit) {
1510   CGF.EmitOMPLoopBody(S, LoopExit);
1511   CGF.EmitStopPoint(&S);
1512 }
1513 
1514 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1515   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1516     OMPLoopScope PreInitScope(CGF, S);
1517     // if (PreCond) {
1518     //   for (IV in 0..LastIteration) BODY;
1519     //   <Final counter/linear vars updates>;
1520     // }
1521     //
1522 
1523     // Emit: if (PreCond) - begin.
1524     // If the condition constant folds and can be elided, avoid emitting the
1525     // whole loop.
1526     bool CondConstant;
1527     llvm::BasicBlock *ContBlock = nullptr;
1528     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1529       if (!CondConstant)
1530         return;
1531     } else {
1532       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1533       ContBlock = CGF.createBasicBlock("simd.if.end");
1534       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1535                   CGF.getProfileCount(&S));
1536       CGF.EmitBlock(ThenBlock);
1537       CGF.incrementProfileCounter(&S);
1538     }
1539 
1540     // Emit the loop iteration variable.
1541     const Expr *IVExpr = S.getIterationVariable();
1542     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1543     CGF.EmitVarDecl(*IVDecl);
1544     CGF.EmitIgnoredExpr(S.getInit());
1545 
1546     // Emit the iterations count variable.
1547     // If it is not a variable, Sema decided to calculate iterations count on
1548     // each iteration (e.g., it is foldable into a constant).
1549     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1550       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1551       // Emit calculation of the iterations count.
1552       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1553     }
1554 
1555     CGF.EmitOMPSimdInit(S);
1556 
1557     emitAlignedClause(CGF, S);
1558     (void)CGF.EmitOMPLinearClauseInit(S);
1559     {
1560       OMPPrivateScope LoopScope(CGF);
1561       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1562       CGF.EmitOMPLinearClause(S, LoopScope);
1563       CGF.EmitOMPPrivateClause(S, LoopScope);
1564       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1565       bool HasLastprivateClause =
1566           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1567       (void)LoopScope.Privatize();
1568       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1569                            S.getInc(),
1570                            [&S](CodeGenFunction &CGF) {
1571                              CGF.EmitOMPLoopBody(S, JumpDest());
1572                              CGF.EmitStopPoint(&S);
1573                            },
1574                            [](CodeGenFunction &) {});
1575       CGF.EmitOMPSimdFinal(
1576           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1577       // Emit final copy of the lastprivate variables at the end of loops.
1578       if (HasLastprivateClause)
1579         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1580       CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1581       emitPostUpdateForReductionClause(
1582           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1583     }
1584     CGF.EmitOMPLinearClauseFinal(
1585         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1586     // Emit: if (PreCond) - end.
1587     if (ContBlock) {
1588       CGF.EmitBranch(ContBlock);
1589       CGF.EmitBlock(ContBlock, true);
1590     }
1591   };
1592   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1593   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1594 }
1595 
1596 void CodeGenFunction::EmitOMPOuterLoop(
1597     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1598     CodeGenFunction::OMPPrivateScope &LoopScope,
1599     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1600     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1601     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1602   auto &RT = CGM.getOpenMPRuntime();
1603 
1604   const Expr *IVExpr = S.getIterationVariable();
1605   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1606   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1607 
1608   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1609 
1610   // Start the loop with a block that tests the condition.
1611   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1612   EmitBlock(CondBlock);
1613   const SourceRange &R = S.getSourceRange();
1614   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1615                  SourceLocToDebugLoc(R.getEnd()));
1616 
1617   llvm::Value *BoolCondVal = nullptr;
1618   if (!DynamicOrOrdered) {
1619     // UB = min(UB, GlobalUB) or
1620     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1621     // 'distribute parallel for')
1622     EmitIgnoredExpr(LoopArgs.EUB);
1623     // IV = LB
1624     EmitIgnoredExpr(LoopArgs.Init);
1625     // IV < UB
1626     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1627   } else {
1628     BoolCondVal =
1629         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1630                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1631   }
1632 
1633   // If there are any cleanups between here and the loop-exit scope,
1634   // create a block to stage a loop exit along.
1635   auto ExitBlock = LoopExit.getBlock();
1636   if (LoopScope.requiresCleanups())
1637     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1638 
1639   auto LoopBody = createBasicBlock("omp.dispatch.body");
1640   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1641   if (ExitBlock != LoopExit.getBlock()) {
1642     EmitBlock(ExitBlock);
1643     EmitBranchThroughCleanup(LoopExit);
1644   }
1645   EmitBlock(LoopBody);
1646 
1647   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1648   // LB for loop condition and emitted it above).
1649   if (DynamicOrOrdered)
1650     EmitIgnoredExpr(LoopArgs.Init);
1651 
1652   // Create a block for the increment.
1653   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1654   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1655 
1656   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1657   // with dynamic/guided scheduling and without ordered clause.
1658   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1659     LoopStack.setParallel(!IsMonotonic);
1660   else
1661     EmitOMPSimdInit(S, IsMonotonic);
1662 
1663   SourceLocation Loc = S.getLocStart();
1664 
1665   // when 'distribute' is not combined with a 'for':
1666   // while (idx <= UB) { BODY; ++idx; }
1667   // when 'distribute' is combined with a 'for'
1668   // (e.g. 'distribute parallel for')
1669   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1670   EmitOMPInnerLoop(
1671       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1672       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1673         CodeGenLoop(CGF, S, LoopExit);
1674       },
1675       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1676         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1677       });
1678 
1679   EmitBlock(Continue.getBlock());
1680   BreakContinueStack.pop_back();
1681   if (!DynamicOrOrdered) {
1682     // Emit "LB = LB + Stride", "UB = UB + Stride".
1683     EmitIgnoredExpr(LoopArgs.NextLB);
1684     EmitIgnoredExpr(LoopArgs.NextUB);
1685   }
1686 
1687   EmitBranch(CondBlock);
1688   LoopStack.pop();
1689   // Emit the fall-through block.
1690   EmitBlock(LoopExit.getBlock());
1691 
1692   // Tell the runtime we are done.
1693   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1694     if (!DynamicOrOrdered)
1695       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1696                                                      S.getDirectiveKind());
1697   };
1698   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1699 }
1700 
1701 void CodeGenFunction::EmitOMPForOuterLoop(
1702     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1703     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1704     const OMPLoopArguments &LoopArgs,
1705     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1706   auto &RT = CGM.getOpenMPRuntime();
1707 
1708   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1709   const bool DynamicOrOrdered =
1710       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1711 
1712   assert((Ordered ||
1713           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1714                                  LoopArgs.Chunk != nullptr)) &&
1715          "static non-chunked schedule does not need outer loop");
1716 
1717   // Emit outer loop.
1718   //
1719   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1720   // When schedule(dynamic,chunk_size) is specified, the iterations are
1721   // distributed to threads in the team in chunks as the threads request them.
1722   // Each thread executes a chunk of iterations, then requests another chunk,
1723   // until no chunks remain to be distributed. Each chunk contains chunk_size
1724   // iterations, except for the last chunk to be distributed, which may have
1725   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1726   //
1727   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1728   // to threads in the team in chunks as the executing threads request them.
1729   // Each thread executes a chunk of iterations, then requests another chunk,
1730   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1731   // each chunk is proportional to the number of unassigned iterations divided
1732   // by the number of threads in the team, decreasing to 1. For a chunk_size
1733   // with value k (greater than 1), the size of each chunk is determined in the
1734   // same way, with the restriction that the chunks do not contain fewer than k
1735   // iterations (except for the last chunk to be assigned, which may have fewer
1736   // than k iterations).
1737   //
1738   // When schedule(auto) is specified, the decision regarding scheduling is
1739   // delegated to the compiler and/or runtime system. The programmer gives the
1740   // implementation the freedom to choose any possible mapping of iterations to
1741   // threads in the team.
1742   //
1743   // When schedule(runtime) is specified, the decision regarding scheduling is
1744   // deferred until run time, and the schedule and chunk size are taken from the
1745   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1746   // implementation defined
1747   //
1748   // while(__kmpc_dispatch_next(&LB, &UB)) {
1749   //   idx = LB;
1750   //   while (idx <= UB) { BODY; ++idx;
1751   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1752   //   } // inner loop
1753   // }
1754   //
1755   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1756   // When schedule(static, chunk_size) is specified, iterations are divided into
1757   // chunks of size chunk_size, and the chunks are assigned to the threads in
1758   // the team in a round-robin fashion in the order of the thread number.
1759   //
1760   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1761   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1762   //   LB = LB + ST;
1763   //   UB = UB + ST;
1764   // }
1765   //
1766 
1767   const Expr *IVExpr = S.getIterationVariable();
1768   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1769   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1770 
1771   if (DynamicOrOrdered) {
1772     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1773     llvm::Value *LBVal = DispatchBounds.first;
1774     llvm::Value *UBVal = DispatchBounds.second;
1775     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1776                                                              LoopArgs.Chunk};
1777     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1778                            IVSigned, Ordered, DipatchRTInputValues);
1779   } else {
1780     CGOpenMPRuntime::StaticRTInput StaticInit(
1781         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1782         LoopArgs.ST, LoopArgs.Chunk);
1783     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1784                          ScheduleKind, StaticInit);
1785   }
1786 
1787   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1788                                     const unsigned IVSize,
1789                                     const bool IVSigned) {
1790     if (Ordered) {
1791       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1792                                                             IVSigned);
1793     }
1794   };
1795 
1796   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1797                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1798   OuterLoopArgs.IncExpr = S.getInc();
1799   OuterLoopArgs.Init = S.getInit();
1800   OuterLoopArgs.Cond = S.getCond();
1801   OuterLoopArgs.NextLB = S.getNextLowerBound();
1802   OuterLoopArgs.NextUB = S.getNextUpperBound();
1803   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1804                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1805 }
1806 
1807 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1808                              const unsigned IVSize, const bool IVSigned) {}
1809 
1810 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1811     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1812     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1813     const CodeGenLoopTy &CodeGenLoopContent) {
1814 
1815   auto &RT = CGM.getOpenMPRuntime();
1816 
1817   // Emit outer loop.
1818   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1819   // dynamic
1820   //
1821 
1822   const Expr *IVExpr = S.getIterationVariable();
1823   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1824   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1825 
1826   CGOpenMPRuntime::StaticRTInput StaticInit(
1827       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1828       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1829   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1830 
1831   // for combined 'distribute' and 'for' the increment expression of distribute
1832   // is store in DistInc. For 'distribute' alone, it is in Inc.
1833   Expr *IncExpr;
1834   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1835     IncExpr = S.getDistInc();
1836   else
1837     IncExpr = S.getInc();
1838 
1839   // this routine is shared by 'omp distribute parallel for' and
1840   // 'omp distribute': select the right EUB expression depending on the
1841   // directive
1842   OMPLoopArguments OuterLoopArgs;
1843   OuterLoopArgs.LB = LoopArgs.LB;
1844   OuterLoopArgs.UB = LoopArgs.UB;
1845   OuterLoopArgs.ST = LoopArgs.ST;
1846   OuterLoopArgs.IL = LoopArgs.IL;
1847   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1848   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1849                           ? S.getCombinedEnsureUpperBound()
1850                           : S.getEnsureUpperBound();
1851   OuterLoopArgs.IncExpr = IncExpr;
1852   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1853                            ? S.getCombinedInit()
1854                            : S.getInit();
1855   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1856                            ? S.getCombinedCond()
1857                            : S.getCond();
1858   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1859                              ? S.getCombinedNextLowerBound()
1860                              : S.getNextLowerBound();
1861   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1862                              ? S.getCombinedNextUpperBound()
1863                              : S.getNextUpperBound();
1864 
1865   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1866                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1867                    emitEmptyOrdered);
1868 }
1869 
1870 /// Emit a helper variable and return corresponding lvalue.
1871 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1872                                const DeclRefExpr *Helper) {
1873   auto VDecl = cast<VarDecl>(Helper->getDecl());
1874   CGF.EmitVarDecl(*VDecl);
1875   return CGF.EmitLValue(Helper);
1876 }
1877 
1878 static std::pair<LValue, LValue>
1879 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1880                                      const OMPExecutableDirective &S) {
1881   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1882   LValue LB =
1883       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1884   LValue UB =
1885       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1886 
1887   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1888   // parallel for') we need to use the 'distribute'
1889   // chunk lower and upper bounds rather than the whole loop iteration
1890   // space. These are parameters to the outlined function for 'parallel'
1891   // and we copy the bounds of the previous schedule into the
1892   // the current ones.
1893   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1894   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1895   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1896   PrevLBVal = CGF.EmitScalarConversion(
1897       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1898       LS.getIterationVariable()->getType(), SourceLocation());
1899   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1900   PrevUBVal = CGF.EmitScalarConversion(
1901       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1902       LS.getIterationVariable()->getType(), SourceLocation());
1903 
1904   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1905   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1906 
1907   return {LB, UB};
1908 }
1909 
1910 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1911 /// we need to use the LB and UB expressions generated by the worksharing
1912 /// code generation support, whereas in non combined situations we would
1913 /// just emit 0 and the LastIteration expression
1914 /// This function is necessary due to the difference of the LB and UB
1915 /// types for the RT emission routines for 'for_static_init' and
1916 /// 'for_dispatch_init'
1917 static std::pair<llvm::Value *, llvm::Value *>
1918 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1919                                         const OMPExecutableDirective &S,
1920                                         Address LB, Address UB) {
1921   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1922   const Expr *IVExpr = LS.getIterationVariable();
1923   // when implementing a dynamic schedule for a 'for' combined with a
1924   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1925   // is not normalized as each team only executes its own assigned
1926   // distribute chunk
1927   QualType IteratorTy = IVExpr->getType();
1928   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1929                                             SourceLocation());
1930   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1931                                             SourceLocation());
1932   return {LBVal, UBVal};
1933 }
1934 
1935 static void emitDistributeParallelForDistributeInnerBoundParams(
1936     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1937     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1938   const auto &Dir = cast<OMPLoopDirective>(S);
1939   LValue LB =
1940       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1941   auto LBCast = CGF.Builder.CreateIntCast(
1942       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1943   CapturedVars.push_back(LBCast);
1944   LValue UB =
1945       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1946 
1947   auto UBCast = CGF.Builder.CreateIntCast(
1948       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1949   CapturedVars.push_back(UBCast);
1950 }
1951 
1952 static void
1953 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
1954                                  const OMPLoopDirective &S,
1955                                  CodeGenFunction::JumpDest LoopExit) {
1956   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
1957                                          PrePostActionTy &) {
1958     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
1959                                emitDistributeParallelForInnerBounds,
1960                                emitDistributeParallelForDispatchBounds);
1961   };
1962 
1963   emitCommonOMPParallelDirective(
1964       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
1965       emitDistributeParallelForDistributeInnerBoundParams);
1966 }
1967 
1968 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1969     const OMPDistributeParallelForDirective &S) {
1970   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1971     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
1972                               S.getDistInc());
1973   };
1974   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1975   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
1976                                   /*HasCancel=*/false);
1977   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
1978                                               /*HasCancel=*/false);
1979 }
1980 
1981 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
1982     const OMPDistributeParallelForSimdDirective &S) {
1983   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1984   CGM.getOpenMPRuntime().emitInlinedDirective(
1985       *this, OMPD_distribute_parallel_for_simd,
1986       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1987         OMPLoopScope PreInitScope(CGF, S);
1988         CGF.EmitStmt(
1989             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1990       });
1991 }
1992 
1993 void CodeGenFunction::EmitOMPDistributeSimdDirective(
1994     const OMPDistributeSimdDirective &S) {
1995   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1996   CGM.getOpenMPRuntime().emitInlinedDirective(
1997       *this, OMPD_distribute_simd,
1998       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1999         OMPLoopScope PreInitScope(CGF, S);
2000         CGF.EmitStmt(
2001             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2002       });
2003 }
2004 
2005 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
2006     const OMPTargetParallelForSimdDirective &S) {
2007   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2008   CGM.getOpenMPRuntime().emitInlinedDirective(
2009       *this, OMPD_target_parallel_for_simd,
2010       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2011         OMPLoopScope PreInitScope(CGF, S);
2012         CGF.EmitStmt(
2013             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2014       });
2015 }
2016 
2017 void CodeGenFunction::EmitOMPTargetSimdDirective(
2018     const OMPTargetSimdDirective &S) {
2019   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2020   CGM.getOpenMPRuntime().emitInlinedDirective(
2021       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2022         OMPLoopScope PreInitScope(CGF, S);
2023         CGF.EmitStmt(
2024             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2025       });
2026 }
2027 
2028 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
2029     const OMPTeamsDistributeDirective &S) {
2030   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2031   CGM.getOpenMPRuntime().emitInlinedDirective(
2032       *this, OMPD_teams_distribute,
2033       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2034         OMPLoopScope PreInitScope(CGF, S);
2035         CGF.EmitStmt(
2036             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2037       });
2038 }
2039 
2040 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2041     const OMPTeamsDistributeSimdDirective &S) {
2042   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2043   CGM.getOpenMPRuntime().emitInlinedDirective(
2044       *this, OMPD_teams_distribute_simd,
2045       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2046         OMPLoopScope PreInitScope(CGF, S);
2047         CGF.EmitStmt(
2048             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2049       });
2050 }
2051 
2052 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2053     const OMPTeamsDistributeParallelForSimdDirective &S) {
2054   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2055   CGM.getOpenMPRuntime().emitInlinedDirective(
2056       *this, OMPD_teams_distribute_parallel_for_simd,
2057       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2058         OMPLoopScope PreInitScope(CGF, S);
2059         CGF.EmitStmt(
2060             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2061       });
2062 }
2063 
2064 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2065     const OMPTeamsDistributeParallelForDirective &S) {
2066   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2067   CGM.getOpenMPRuntime().emitInlinedDirective(
2068       *this, OMPD_teams_distribute_parallel_for,
2069       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2070         OMPLoopScope PreInitScope(CGF, S);
2071         CGF.EmitStmt(
2072             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2073       });
2074 }
2075 
2076 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2077     const OMPTargetTeamsDistributeDirective &S) {
2078   CGM.getOpenMPRuntime().emitInlinedDirective(
2079       *this, OMPD_target_teams_distribute,
2080       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2081         CGF.EmitStmt(
2082             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2083       });
2084 }
2085 
2086 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2087     const OMPTargetTeamsDistributeParallelForDirective &S) {
2088   CGM.getOpenMPRuntime().emitInlinedDirective(
2089       *this, OMPD_target_teams_distribute_parallel_for,
2090       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2091         CGF.EmitStmt(
2092             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2093       });
2094 }
2095 
2096 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2097     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2098   CGM.getOpenMPRuntime().emitInlinedDirective(
2099       *this, OMPD_target_teams_distribute_parallel_for_simd,
2100       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2101         CGF.EmitStmt(
2102             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2103       });
2104 }
2105 
2106 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2107     const OMPTargetTeamsDistributeSimdDirective &S) {
2108   CGM.getOpenMPRuntime().emitInlinedDirective(
2109       *this, OMPD_target_teams_distribute_simd,
2110       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2111         CGF.EmitStmt(
2112             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2113       });
2114 }
2115 
2116 namespace {
2117   struct ScheduleKindModifiersTy {
2118     OpenMPScheduleClauseKind Kind;
2119     OpenMPScheduleClauseModifier M1;
2120     OpenMPScheduleClauseModifier M2;
2121     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2122                             OpenMPScheduleClauseModifier M1,
2123                             OpenMPScheduleClauseModifier M2)
2124         : Kind(Kind), M1(M1), M2(M2) {}
2125   };
2126 } // namespace
2127 
2128 bool CodeGenFunction::EmitOMPWorksharingLoop(
2129     const OMPLoopDirective &S, Expr *EUB,
2130     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2131     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2132   // Emit the loop iteration variable.
2133   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2134   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2135   EmitVarDecl(*IVDecl);
2136 
2137   // Emit the iterations count variable.
2138   // If it is not a variable, Sema decided to calculate iterations count on each
2139   // iteration (e.g., it is foldable into a constant).
2140   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2141     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2142     // Emit calculation of the iterations count.
2143     EmitIgnoredExpr(S.getCalcLastIteration());
2144   }
2145 
2146   auto &RT = CGM.getOpenMPRuntime();
2147 
2148   bool HasLastprivateClause;
2149   // Check pre-condition.
2150   {
2151     OMPLoopScope PreInitScope(*this, S);
2152     // Skip the entire loop if we don't meet the precondition.
2153     // If the condition constant folds and can be elided, avoid emitting the
2154     // whole loop.
2155     bool CondConstant;
2156     llvm::BasicBlock *ContBlock = nullptr;
2157     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2158       if (!CondConstant)
2159         return false;
2160     } else {
2161       auto *ThenBlock = createBasicBlock("omp.precond.then");
2162       ContBlock = createBasicBlock("omp.precond.end");
2163       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2164                   getProfileCount(&S));
2165       EmitBlock(ThenBlock);
2166       incrementProfileCounter(&S);
2167     }
2168 
2169     bool Ordered = false;
2170     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2171       if (OrderedClause->getNumForLoops())
2172         RT.emitDoacrossInit(*this, S);
2173       else
2174         Ordered = true;
2175     }
2176 
2177     llvm::DenseSet<const Expr *> EmittedFinals;
2178     emitAlignedClause(*this, S);
2179     bool HasLinears = EmitOMPLinearClauseInit(S);
2180     // Emit helper vars inits.
2181 
2182     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2183     LValue LB = Bounds.first;
2184     LValue UB = Bounds.second;
2185     LValue ST =
2186         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2187     LValue IL =
2188         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2189 
2190     // Emit 'then' code.
2191     {
2192       OMPPrivateScope LoopScope(*this);
2193       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2194         // Emit implicit barrier to synchronize threads and avoid data races on
2195         // initialization of firstprivate variables and post-update of
2196         // lastprivate variables.
2197         CGM.getOpenMPRuntime().emitBarrierCall(
2198             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2199             /*ForceSimpleCall=*/true);
2200       }
2201       EmitOMPPrivateClause(S, LoopScope);
2202       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2203       EmitOMPReductionClauseInit(S, LoopScope);
2204       EmitOMPPrivateLoopCounters(S, LoopScope);
2205       EmitOMPLinearClause(S, LoopScope);
2206       (void)LoopScope.Privatize();
2207 
2208       // Detect the loop schedule kind and chunk.
2209       llvm::Value *Chunk = nullptr;
2210       OpenMPScheduleTy ScheduleKind;
2211       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2212         ScheduleKind.Schedule = C->getScheduleKind();
2213         ScheduleKind.M1 = C->getFirstScheduleModifier();
2214         ScheduleKind.M2 = C->getSecondScheduleModifier();
2215         if (const auto *Ch = C->getChunkSize()) {
2216           Chunk = EmitScalarExpr(Ch);
2217           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2218                                        S.getIterationVariable()->getType(),
2219                                        S.getLocStart());
2220         }
2221       }
2222       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2223       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2224       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2225       // If the static schedule kind is specified or if the ordered clause is
2226       // specified, and if no monotonic modifier is specified, the effect will
2227       // be as if the monotonic modifier was specified.
2228       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2229                                 /* Chunked */ Chunk != nullptr) &&
2230           !Ordered) {
2231         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2232           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2233         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2234         // When no chunk_size is specified, the iteration space is divided into
2235         // chunks that are approximately equal in size, and at most one chunk is
2236         // distributed to each thread. Note that the size of the chunks is
2237         // unspecified in this case.
2238         CGOpenMPRuntime::StaticRTInput StaticInit(
2239             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2240             UB.getAddress(), ST.getAddress());
2241         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2242                              ScheduleKind, StaticInit);
2243         auto LoopExit =
2244             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2245         // UB = min(UB, GlobalUB);
2246         EmitIgnoredExpr(S.getEnsureUpperBound());
2247         // IV = LB;
2248         EmitIgnoredExpr(S.getInit());
2249         // while (idx <= UB) { BODY; ++idx; }
2250         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2251                          S.getInc(),
2252                          [&S, LoopExit](CodeGenFunction &CGF) {
2253                            CGF.EmitOMPLoopBody(S, LoopExit);
2254                            CGF.EmitStopPoint(&S);
2255                          },
2256                          [](CodeGenFunction &) {});
2257         EmitBlock(LoopExit.getBlock());
2258         // Tell the runtime we are done.
2259         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2260           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2261                                                          S.getDirectiveKind());
2262         };
2263         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2264       } else {
2265         const bool IsMonotonic =
2266             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2267             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2268             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2269             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2270         // Emit the outer loop, which requests its work chunk [LB..UB] from
2271         // runtime and runs the inner loop to process it.
2272         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2273                                              ST.getAddress(), IL.getAddress(),
2274                                              Chunk, EUB);
2275         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2276                             LoopArguments, CGDispatchBounds);
2277       }
2278       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2279         EmitOMPSimdFinal(S,
2280                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2281                            return CGF.Builder.CreateIsNotNull(
2282                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2283                          });
2284       }
2285       EmitOMPReductionClauseFinal(
2286           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2287                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2288                  : /*Parallel only*/ OMPD_parallel);
2289       // Emit post-update of the reduction variables if IsLastIter != 0.
2290       emitPostUpdateForReductionClause(
2291           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2292             return CGF.Builder.CreateIsNotNull(
2293                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2294           });
2295       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2296       if (HasLastprivateClause)
2297         EmitOMPLastprivateClauseFinal(
2298             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2299             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2300     }
2301     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2302       return CGF.Builder.CreateIsNotNull(
2303           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2304     });
2305     // We're now done with the loop, so jump to the continuation block.
2306     if (ContBlock) {
2307       EmitBranch(ContBlock);
2308       EmitBlock(ContBlock, true);
2309     }
2310   }
2311   return HasLastprivateClause;
2312 }
2313 
2314 /// The following two functions generate expressions for the loop lower
2315 /// and upper bounds in case of static and dynamic (dispatch) schedule
2316 /// of the associated 'for' or 'distribute' loop.
2317 static std::pair<LValue, LValue>
2318 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2319   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2320   LValue LB =
2321       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2322   LValue UB =
2323       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2324   return {LB, UB};
2325 }
2326 
2327 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2328 /// consider the lower and upper bound expressions generated by the
2329 /// worksharing loop support, but we use 0 and the iteration space size as
2330 /// constants
2331 static std::pair<llvm::Value *, llvm::Value *>
2332 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2333                           Address LB, Address UB) {
2334   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2335   const Expr *IVExpr = LS.getIterationVariable();
2336   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2337   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2338   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2339   return {LBVal, UBVal};
2340 }
2341 
2342 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2343   bool HasLastprivates = false;
2344   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2345                                           PrePostActionTy &) {
2346     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2347     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2348                                                  emitForLoopBounds,
2349                                                  emitDispatchForLoopBounds);
2350   };
2351   {
2352     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2353     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2354                                                 S.hasCancel());
2355   }
2356 
2357   // Emit an implicit barrier at the end.
2358   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2359     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2360   }
2361 }
2362 
2363 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2364   bool HasLastprivates = false;
2365   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2366                                           PrePostActionTy &) {
2367     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2368                                                  emitForLoopBounds,
2369                                                  emitDispatchForLoopBounds);
2370   };
2371   {
2372     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2373     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2374   }
2375 
2376   // Emit an implicit barrier at the end.
2377   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2378     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2379   }
2380 }
2381 
2382 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2383                                 const Twine &Name,
2384                                 llvm::Value *Init = nullptr) {
2385   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2386   if (Init)
2387     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2388   return LVal;
2389 }
2390 
2391 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2392   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2393   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2394   bool HasLastprivates = false;
2395   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2396                                                     PrePostActionTy &) {
2397     auto &C = CGF.CGM.getContext();
2398     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2399     // Emit helper vars inits.
2400     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2401                                   CGF.Builder.getInt32(0));
2402     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2403                                       : CGF.Builder.getInt32(0);
2404     LValue UB =
2405         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2406     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2407                                   CGF.Builder.getInt32(1));
2408     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2409                                   CGF.Builder.getInt32(0));
2410     // Loop counter.
2411     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2412     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2413     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2414     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2415     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2416     // Generate condition for loop.
2417     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2418                         OK_Ordinary, S.getLocStart(), FPOptions());
2419     // Increment for loop counter.
2420     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2421                       S.getLocStart());
2422     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2423       // Iterate through all sections and emit a switch construct:
2424       // switch (IV) {
2425       //   case 0:
2426       //     <SectionStmt[0]>;
2427       //     break;
2428       // ...
2429       //   case <NumSection> - 1:
2430       //     <SectionStmt[<NumSection> - 1]>;
2431       //     break;
2432       // }
2433       // .omp.sections.exit:
2434       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2435       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2436           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2437           CS == nullptr ? 1 : CS->size());
2438       if (CS) {
2439         unsigned CaseNumber = 0;
2440         for (auto *SubStmt : CS->children()) {
2441           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2442           CGF.EmitBlock(CaseBB);
2443           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2444           CGF.EmitStmt(SubStmt);
2445           CGF.EmitBranch(ExitBB);
2446           ++CaseNumber;
2447         }
2448       } else {
2449         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2450         CGF.EmitBlock(CaseBB);
2451         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2452         CGF.EmitStmt(Stmt);
2453         CGF.EmitBranch(ExitBB);
2454       }
2455       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2456     };
2457 
2458     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2459     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2460       // Emit implicit barrier to synchronize threads and avoid data races on
2461       // initialization of firstprivate variables and post-update of lastprivate
2462       // variables.
2463       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2464           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2465           /*ForceSimpleCall=*/true);
2466     }
2467     CGF.EmitOMPPrivateClause(S, LoopScope);
2468     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2469     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2470     (void)LoopScope.Privatize();
2471 
2472     // Emit static non-chunked loop.
2473     OpenMPScheduleTy ScheduleKind;
2474     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2475     CGOpenMPRuntime::StaticRTInput StaticInit(
2476         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2477         LB.getAddress(), UB.getAddress(), ST.getAddress());
2478     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2479         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2480     // UB = min(UB, GlobalUB);
2481     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2482     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2483         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2484     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2485     // IV = LB;
2486     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2487     // while (idx <= UB) { BODY; ++idx; }
2488     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2489                          [](CodeGenFunction &) {});
2490     // Tell the runtime we are done.
2491     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2492       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2493                                                      S.getDirectiveKind());
2494     };
2495     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2496     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2497     // Emit post-update of the reduction variables if IsLastIter != 0.
2498     emitPostUpdateForReductionClause(
2499         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2500           return CGF.Builder.CreateIsNotNull(
2501               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2502         });
2503 
2504     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2505     if (HasLastprivates)
2506       CGF.EmitOMPLastprivateClauseFinal(
2507           S, /*NoFinals=*/false,
2508           CGF.Builder.CreateIsNotNull(
2509               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2510   };
2511 
2512   bool HasCancel = false;
2513   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2514     HasCancel = OSD->hasCancel();
2515   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2516     HasCancel = OPSD->hasCancel();
2517   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2518   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2519                                               HasCancel);
2520   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2521   // clause. Otherwise the barrier will be generated by the codegen for the
2522   // directive.
2523   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2524     // Emit implicit barrier to synchronize threads and avoid data races on
2525     // initialization of firstprivate variables.
2526     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2527                                            OMPD_unknown);
2528   }
2529 }
2530 
2531 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2532   {
2533     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2534     EmitSections(S);
2535   }
2536   // Emit an implicit barrier at the end.
2537   if (!S.getSingleClause<OMPNowaitClause>()) {
2538     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2539                                            OMPD_sections);
2540   }
2541 }
2542 
2543 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2544   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2545     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2546   };
2547   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2548   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2549                                               S.hasCancel());
2550 }
2551 
2552 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2553   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2554   llvm::SmallVector<const Expr *, 8> DestExprs;
2555   llvm::SmallVector<const Expr *, 8> SrcExprs;
2556   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2557   // Check if there are any 'copyprivate' clauses associated with this
2558   // 'single' construct.
2559   // Build a list of copyprivate variables along with helper expressions
2560   // (<source>, <destination>, <destination>=<source> expressions)
2561   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2562     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2563     DestExprs.append(C->destination_exprs().begin(),
2564                      C->destination_exprs().end());
2565     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2566     AssignmentOps.append(C->assignment_ops().begin(),
2567                          C->assignment_ops().end());
2568   }
2569   // Emit code for 'single' region along with 'copyprivate' clauses
2570   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2571     Action.Enter(CGF);
2572     OMPPrivateScope SingleScope(CGF);
2573     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2574     CGF.EmitOMPPrivateClause(S, SingleScope);
2575     (void)SingleScope.Privatize();
2576     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2577   };
2578   {
2579     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2580     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2581                                             CopyprivateVars, DestExprs,
2582                                             SrcExprs, AssignmentOps);
2583   }
2584   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2585   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2586   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2587     CGM.getOpenMPRuntime().emitBarrierCall(
2588         *this, S.getLocStart(),
2589         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2590   }
2591 }
2592 
2593 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2594   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2595     Action.Enter(CGF);
2596     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2597   };
2598   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2599   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2600 }
2601 
2602 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2603   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2604     Action.Enter(CGF);
2605     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2606   };
2607   Expr *Hint = nullptr;
2608   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2609     Hint = HintClause->getHint();
2610   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2611   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2612                                             S.getDirectiveName().getAsString(),
2613                                             CodeGen, S.getLocStart(), Hint);
2614 }
2615 
2616 void CodeGenFunction::EmitOMPParallelForDirective(
2617     const OMPParallelForDirective &S) {
2618   // Emit directive as a combined directive that consists of two implicit
2619   // directives: 'parallel' with 'for' directive.
2620   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2621     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2622     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2623                                emitDispatchForLoopBounds);
2624   };
2625   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2626                                  emitEmptyBoundParameters);
2627 }
2628 
2629 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2630     const OMPParallelForSimdDirective &S) {
2631   // Emit directive as a combined directive that consists of two implicit
2632   // directives: 'parallel' with 'for' directive.
2633   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2634     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2635                                emitDispatchForLoopBounds);
2636   };
2637   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2638                                  emitEmptyBoundParameters);
2639 }
2640 
2641 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2642     const OMPParallelSectionsDirective &S) {
2643   // Emit directive as a combined directive that consists of two implicit
2644   // directives: 'parallel' with 'sections' directive.
2645   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2646     CGF.EmitSections(S);
2647   };
2648   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2649                                  emitEmptyBoundParameters);
2650 }
2651 
2652 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2653                                                 const RegionCodeGenTy &BodyGen,
2654                                                 const TaskGenTy &TaskGen,
2655                                                 OMPTaskDataTy &Data) {
2656   // Emit outlined function for task construct.
2657   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2658   auto *I = CS->getCapturedDecl()->param_begin();
2659   auto *PartId = std::next(I);
2660   auto *TaskT = std::next(I, 4);
2661   // Check if the task is final
2662   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2663     // If the condition constant folds and can be elided, try to avoid emitting
2664     // the condition and the dead arm of the if/else.
2665     auto *Cond = Clause->getCondition();
2666     bool CondConstant;
2667     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2668       Data.Final.setInt(CondConstant);
2669     else
2670       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2671   } else {
2672     // By default the task is not final.
2673     Data.Final.setInt(/*IntVal=*/false);
2674   }
2675   // Check if the task has 'priority' clause.
2676   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2677     auto *Prio = Clause->getPriority();
2678     Data.Priority.setInt(/*IntVal=*/true);
2679     Data.Priority.setPointer(EmitScalarConversion(
2680         EmitScalarExpr(Prio), Prio->getType(),
2681         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2682         Prio->getExprLoc()));
2683   }
2684   // The first function argument for tasks is a thread id, the second one is a
2685   // part id (0 for tied tasks, >=0 for untied task).
2686   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2687   // Get list of private variables.
2688   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2689     auto IRef = C->varlist_begin();
2690     for (auto *IInit : C->private_copies()) {
2691       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2692       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2693         Data.PrivateVars.push_back(*IRef);
2694         Data.PrivateCopies.push_back(IInit);
2695       }
2696       ++IRef;
2697     }
2698   }
2699   EmittedAsPrivate.clear();
2700   // Get list of firstprivate variables.
2701   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2702     auto IRef = C->varlist_begin();
2703     auto IElemInitRef = C->inits().begin();
2704     for (auto *IInit : C->private_copies()) {
2705       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2706       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2707         Data.FirstprivateVars.push_back(*IRef);
2708         Data.FirstprivateCopies.push_back(IInit);
2709         Data.FirstprivateInits.push_back(*IElemInitRef);
2710       }
2711       ++IRef;
2712       ++IElemInitRef;
2713     }
2714   }
2715   // Get list of lastprivate variables (for taskloops).
2716   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2717   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2718     auto IRef = C->varlist_begin();
2719     auto ID = C->destination_exprs().begin();
2720     for (auto *IInit : C->private_copies()) {
2721       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2722       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2723         Data.LastprivateVars.push_back(*IRef);
2724         Data.LastprivateCopies.push_back(IInit);
2725       }
2726       LastprivateDstsOrigs.insert(
2727           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2728            cast<DeclRefExpr>(*IRef)});
2729       ++IRef;
2730       ++ID;
2731     }
2732   }
2733   SmallVector<const Expr *, 4> LHSs;
2734   SmallVector<const Expr *, 4> RHSs;
2735   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2736     auto IPriv = C->privates().begin();
2737     auto IRed = C->reduction_ops().begin();
2738     auto ILHS = C->lhs_exprs().begin();
2739     auto IRHS = C->rhs_exprs().begin();
2740     for (const auto *Ref : C->varlists()) {
2741       Data.ReductionVars.emplace_back(Ref);
2742       Data.ReductionCopies.emplace_back(*IPriv);
2743       Data.ReductionOps.emplace_back(*IRed);
2744       LHSs.emplace_back(*ILHS);
2745       RHSs.emplace_back(*IRHS);
2746       std::advance(IPriv, 1);
2747       std::advance(IRed, 1);
2748       std::advance(ILHS, 1);
2749       std::advance(IRHS, 1);
2750     }
2751   }
2752   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2753       *this, S.getLocStart(), LHSs, RHSs, Data);
2754   // Build list of dependences.
2755   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2756     for (auto *IRef : C->varlists())
2757       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2758   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2759       CodeGenFunction &CGF, PrePostActionTy &Action) {
2760     // Set proper addresses for generated private copies.
2761     OMPPrivateScope Scope(CGF);
2762     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2763         !Data.LastprivateVars.empty()) {
2764       enum { PrivatesParam = 2, CopyFnParam = 3 };
2765       auto *CopyFn = CGF.Builder.CreateLoad(
2766           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2767       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2768           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2769       // Map privates.
2770       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2771       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2772       CallArgs.push_back(PrivatesPtr);
2773       for (auto *E : Data.PrivateVars) {
2774         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2775         Address PrivatePtr = CGF.CreateMemTemp(
2776             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2777         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2778         CallArgs.push_back(PrivatePtr.getPointer());
2779       }
2780       for (auto *E : Data.FirstprivateVars) {
2781         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2782         Address PrivatePtr =
2783             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2784                               ".firstpriv.ptr.addr");
2785         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2786         CallArgs.push_back(PrivatePtr.getPointer());
2787       }
2788       for (auto *E : Data.LastprivateVars) {
2789         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2790         Address PrivatePtr =
2791             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2792                               ".lastpriv.ptr.addr");
2793         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2794         CallArgs.push_back(PrivatePtr.getPointer());
2795       }
2796       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2797                                                           CopyFn, CallArgs);
2798       for (auto &&Pair : LastprivateDstsOrigs) {
2799         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2800         DeclRefExpr DRE(
2801             const_cast<VarDecl *>(OrigVD),
2802             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2803                 OrigVD) != nullptr,
2804             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2805         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2806           return CGF.EmitLValue(&DRE).getAddress();
2807         });
2808       }
2809       for (auto &&Pair : PrivatePtrs) {
2810         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2811                             CGF.getContext().getDeclAlign(Pair.first));
2812         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2813       }
2814     }
2815     if (Data.Reductions) {
2816       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2817       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2818                              Data.ReductionOps);
2819       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2820           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2821       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2822         RedCG.emitSharedLValue(CGF, Cnt);
2823         RedCG.emitAggregateType(CGF, Cnt);
2824         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2825             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2826         Replacement =
2827             Address(CGF.EmitScalarConversion(
2828                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2829                         CGF.getContext().getPointerType(
2830                             Data.ReductionCopies[Cnt]->getType()),
2831                         SourceLocation()),
2832                     Replacement.getAlignment());
2833         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2834         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2835                          [Replacement]() { return Replacement; });
2836         // FIXME: This must removed once the runtime library is fixed.
2837         // Emit required threadprivate variables for
2838         // initilizer/combiner/finalizer.
2839         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2840                                                            RedCG, Cnt);
2841       }
2842     }
2843     // Privatize all private variables except for in_reduction items.
2844     (void)Scope.Privatize();
2845     SmallVector<const Expr *, 4> InRedVars;
2846     SmallVector<const Expr *, 4> InRedPrivs;
2847     SmallVector<const Expr *, 4> InRedOps;
2848     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2849     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2850       auto IPriv = C->privates().begin();
2851       auto IRed = C->reduction_ops().begin();
2852       auto ITD = C->taskgroup_descriptors().begin();
2853       for (const auto *Ref : C->varlists()) {
2854         InRedVars.emplace_back(Ref);
2855         InRedPrivs.emplace_back(*IPriv);
2856         InRedOps.emplace_back(*IRed);
2857         TaskgroupDescriptors.emplace_back(*ITD);
2858         std::advance(IPriv, 1);
2859         std::advance(IRed, 1);
2860         std::advance(ITD, 1);
2861       }
2862     }
2863     // Privatize in_reduction items here, because taskgroup descriptors must be
2864     // privatized earlier.
2865     OMPPrivateScope InRedScope(CGF);
2866     if (!InRedVars.empty()) {
2867       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2868       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2869         RedCG.emitSharedLValue(CGF, Cnt);
2870         RedCG.emitAggregateType(CGF, Cnt);
2871         // The taskgroup descriptor variable is always implicit firstprivate and
2872         // privatized already during procoessing of the firstprivates.
2873         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2874             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2875         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2876             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2877         Replacement = Address(
2878             CGF.EmitScalarConversion(
2879                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2880                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2881                 SourceLocation()),
2882             Replacement.getAlignment());
2883         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2884         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2885                               [Replacement]() { return Replacement; });
2886         // FIXME: This must removed once the runtime library is fixed.
2887         // Emit required threadprivate variables for
2888         // initilizer/combiner/finalizer.
2889         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2890                                                            RedCG, Cnt);
2891       }
2892     }
2893     (void)InRedScope.Privatize();
2894 
2895     Action.Enter(CGF);
2896     BodyGen(CGF);
2897   };
2898   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2899       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2900       Data.NumberOfParts);
2901   OMPLexicalScope Scope(*this, S);
2902   TaskGen(*this, OutlinedFn, Data);
2903 }
2904 
2905 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2906   // Emit outlined function for task construct.
2907   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2908   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2909   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2910   const Expr *IfCond = nullptr;
2911   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2912     if (C->getNameModifier() == OMPD_unknown ||
2913         C->getNameModifier() == OMPD_task) {
2914       IfCond = C->getCondition();
2915       break;
2916     }
2917   }
2918 
2919   OMPTaskDataTy Data;
2920   // Check if we should emit tied or untied task.
2921   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2922   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2923     CGF.EmitStmt(CS->getCapturedStmt());
2924   };
2925   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2926                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2927                             const OMPTaskDataTy &Data) {
2928     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2929                                             SharedsTy, CapturedStruct, IfCond,
2930                                             Data);
2931   };
2932   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2933 }
2934 
2935 void CodeGenFunction::EmitOMPTaskyieldDirective(
2936     const OMPTaskyieldDirective &S) {
2937   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2938 }
2939 
2940 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2941   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2942 }
2943 
2944 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2945   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2946 }
2947 
2948 void CodeGenFunction::EmitOMPTaskgroupDirective(
2949     const OMPTaskgroupDirective &S) {
2950   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2951     Action.Enter(CGF);
2952     if (const Expr *E = S.getReductionRef()) {
2953       SmallVector<const Expr *, 4> LHSs;
2954       SmallVector<const Expr *, 4> RHSs;
2955       OMPTaskDataTy Data;
2956       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2957         auto IPriv = C->privates().begin();
2958         auto IRed = C->reduction_ops().begin();
2959         auto ILHS = C->lhs_exprs().begin();
2960         auto IRHS = C->rhs_exprs().begin();
2961         for (const auto *Ref : C->varlists()) {
2962           Data.ReductionVars.emplace_back(Ref);
2963           Data.ReductionCopies.emplace_back(*IPriv);
2964           Data.ReductionOps.emplace_back(*IRed);
2965           LHSs.emplace_back(*ILHS);
2966           RHSs.emplace_back(*IRHS);
2967           std::advance(IPriv, 1);
2968           std::advance(IRed, 1);
2969           std::advance(ILHS, 1);
2970           std::advance(IRHS, 1);
2971         }
2972       }
2973       llvm::Value *ReductionDesc =
2974           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
2975                                                            LHSs, RHSs, Data);
2976       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2977       CGF.EmitVarDecl(*VD);
2978       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
2979                             /*Volatile=*/false, E->getType());
2980     }
2981     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2982   };
2983   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2984   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2985 }
2986 
2987 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2988   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2989     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2990       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2991                                 FlushClause->varlist_end());
2992     }
2993     return llvm::None;
2994   }(), S.getLocStart());
2995 }
2996 
2997 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
2998                                             const CodeGenLoopTy &CodeGenLoop,
2999                                             Expr *IncExpr) {
3000   // Emit the loop iteration variable.
3001   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3002   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3003   EmitVarDecl(*IVDecl);
3004 
3005   // Emit the iterations count variable.
3006   // If it is not a variable, Sema decided to calculate iterations count on each
3007   // iteration (e.g., it is foldable into a constant).
3008   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3009     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3010     // Emit calculation of the iterations count.
3011     EmitIgnoredExpr(S.getCalcLastIteration());
3012   }
3013 
3014   auto &RT = CGM.getOpenMPRuntime();
3015 
3016   bool HasLastprivateClause = false;
3017   // Check pre-condition.
3018   {
3019     OMPLoopScope PreInitScope(*this, S);
3020     // Skip the entire loop if we don't meet the precondition.
3021     // If the condition constant folds and can be elided, avoid emitting the
3022     // whole loop.
3023     bool CondConstant;
3024     llvm::BasicBlock *ContBlock = nullptr;
3025     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3026       if (!CondConstant)
3027         return;
3028     } else {
3029       auto *ThenBlock = createBasicBlock("omp.precond.then");
3030       ContBlock = createBasicBlock("omp.precond.end");
3031       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3032                   getProfileCount(&S));
3033       EmitBlock(ThenBlock);
3034       incrementProfileCounter(&S);
3035     }
3036 
3037     // Emit 'then' code.
3038     {
3039       // Emit helper vars inits.
3040 
3041       LValue LB = EmitOMPHelperVar(
3042           *this, cast<DeclRefExpr>(
3043                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3044                           ? S.getCombinedLowerBoundVariable()
3045                           : S.getLowerBoundVariable())));
3046       LValue UB = EmitOMPHelperVar(
3047           *this, cast<DeclRefExpr>(
3048                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3049                           ? S.getCombinedUpperBoundVariable()
3050                           : S.getUpperBoundVariable())));
3051       LValue ST =
3052           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3053       LValue IL =
3054           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3055 
3056       OMPPrivateScope LoopScope(*this);
3057       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3058         // Emit implicit barrier to synchronize threads and avoid data races on
3059         // initialization of firstprivate variables and post-update of
3060         // lastprivate variables.
3061         CGM.getOpenMPRuntime().emitBarrierCall(
3062           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3063           /*ForceSimpleCall=*/true);
3064       }
3065       EmitOMPPrivateClause(S, LoopScope);
3066       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3067       EmitOMPPrivateLoopCounters(S, LoopScope);
3068       (void)LoopScope.Privatize();
3069 
3070       // Detect the distribute schedule kind and chunk.
3071       llvm::Value *Chunk = nullptr;
3072       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3073       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3074         ScheduleKind = C->getDistScheduleKind();
3075         if (const auto *Ch = C->getChunkSize()) {
3076           Chunk = EmitScalarExpr(Ch);
3077           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3078           S.getIterationVariable()->getType(),
3079           S.getLocStart());
3080         }
3081       }
3082       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3083       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3084 
3085       // OpenMP [2.10.8, distribute Construct, Description]
3086       // If dist_schedule is specified, kind must be static. If specified,
3087       // iterations are divided into chunks of size chunk_size, chunks are
3088       // assigned to the teams of the league in a round-robin fashion in the
3089       // order of the team number. When no chunk_size is specified, the
3090       // iteration space is divided into chunks that are approximately equal
3091       // in size, and at most one chunk is distributed to each team of the
3092       // league. The size of the chunks is unspecified in this case.
3093       if (RT.isStaticNonchunked(ScheduleKind,
3094                                 /* Chunked */ Chunk != nullptr)) {
3095         CGOpenMPRuntime::StaticRTInput StaticInit(
3096             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3097             LB.getAddress(), UB.getAddress(), ST.getAddress());
3098         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3099                                     StaticInit);
3100         auto LoopExit =
3101             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3102         // UB = min(UB, GlobalUB);
3103         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3104                             ? S.getCombinedEnsureUpperBound()
3105                             : S.getEnsureUpperBound());
3106         // IV = LB;
3107         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3108                             ? S.getCombinedInit()
3109                             : S.getInit());
3110 
3111         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3112                          ? S.getCombinedCond()
3113                          : S.getCond();
3114 
3115         // for distribute alone,  codegen
3116         // while (idx <= UB) { BODY; ++idx; }
3117         // when combined with 'for' (e.g. as in 'distribute parallel for')
3118         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3119         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3120                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3121                            CodeGenLoop(CGF, S, LoopExit);
3122                          },
3123                          [](CodeGenFunction &) {});
3124         EmitBlock(LoopExit.getBlock());
3125         // Tell the runtime we are done.
3126         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3127       } else {
3128         // Emit the outer loop, which requests its work chunk [LB..UB] from
3129         // runtime and runs the inner loop to process it.
3130         const OMPLoopArguments LoopArguments = {
3131             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3132             Chunk};
3133         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3134                                    CodeGenLoop);
3135       }
3136 
3137       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3138       if (HasLastprivateClause)
3139         EmitOMPLastprivateClauseFinal(
3140             S, /*NoFinals=*/false,
3141             Builder.CreateIsNotNull(
3142                 EmitLoadOfScalar(IL, S.getLocStart())));
3143     }
3144 
3145     // We're now done with the loop, so jump to the continuation block.
3146     if (ContBlock) {
3147       EmitBranch(ContBlock);
3148       EmitBlock(ContBlock, true);
3149     }
3150   }
3151 }
3152 
3153 void CodeGenFunction::EmitOMPDistributeDirective(
3154     const OMPDistributeDirective &S) {
3155   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3156 
3157     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3158   };
3159   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3160   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3161                                               false);
3162 }
3163 
3164 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3165                                                    const CapturedStmt *S) {
3166   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3167   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3168   CGF.CapturedStmtInfo = &CapStmtInfo;
3169   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3170   Fn->addFnAttr(llvm::Attribute::NoInline);
3171   return Fn;
3172 }
3173 
3174 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3175   if (!S.getAssociatedStmt()) {
3176     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3177       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3178     return;
3179   }
3180   auto *C = S.getSingleClause<OMPSIMDClause>();
3181   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3182                                  PrePostActionTy &Action) {
3183     if (C) {
3184       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3185       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3186       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3187       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3188       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3189                                                       OutlinedFn, CapturedVars);
3190     } else {
3191       Action.Enter(CGF);
3192       CGF.EmitStmt(
3193           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3194     }
3195   };
3196   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3197   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3198 }
3199 
3200 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3201                                          QualType SrcType, QualType DestType,
3202                                          SourceLocation Loc) {
3203   assert(CGF.hasScalarEvaluationKind(DestType) &&
3204          "DestType must have scalar evaluation kind.");
3205   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3206   return Val.isScalar()
3207              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3208                                         Loc)
3209              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3210                                                  DestType, Loc);
3211 }
3212 
3213 static CodeGenFunction::ComplexPairTy
3214 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3215                       QualType DestType, SourceLocation Loc) {
3216   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3217          "DestType must have complex evaluation kind.");
3218   CodeGenFunction::ComplexPairTy ComplexVal;
3219   if (Val.isScalar()) {
3220     // Convert the input element to the element type of the complex.
3221     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3222     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3223                                               DestElementType, Loc);
3224     ComplexVal = CodeGenFunction::ComplexPairTy(
3225         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3226   } else {
3227     assert(Val.isComplex() && "Must be a scalar or complex.");
3228     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3229     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3230     ComplexVal.first = CGF.EmitScalarConversion(
3231         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3232     ComplexVal.second = CGF.EmitScalarConversion(
3233         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3234   }
3235   return ComplexVal;
3236 }
3237 
3238 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3239                                   LValue LVal, RValue RVal) {
3240   if (LVal.isGlobalReg()) {
3241     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3242   } else {
3243     CGF.EmitAtomicStore(RVal, LVal,
3244                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3245                                  : llvm::AtomicOrdering::Monotonic,
3246                         LVal.isVolatile(), /*IsInit=*/false);
3247   }
3248 }
3249 
3250 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3251                                          QualType RValTy, SourceLocation Loc) {
3252   switch (getEvaluationKind(LVal.getType())) {
3253   case TEK_Scalar:
3254     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3255                                *this, RVal, RValTy, LVal.getType(), Loc)),
3256                            LVal);
3257     break;
3258   case TEK_Complex:
3259     EmitStoreOfComplex(
3260         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3261         /*isInit=*/false);
3262     break;
3263   case TEK_Aggregate:
3264     llvm_unreachable("Must be a scalar or complex.");
3265   }
3266 }
3267 
3268 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3269                                   const Expr *X, const Expr *V,
3270                                   SourceLocation Loc) {
3271   // v = x;
3272   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3273   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3274   LValue XLValue = CGF.EmitLValue(X);
3275   LValue VLValue = CGF.EmitLValue(V);
3276   RValue Res = XLValue.isGlobalReg()
3277                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3278                    : CGF.EmitAtomicLoad(
3279                          XLValue, Loc,
3280                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3281                                   : llvm::AtomicOrdering::Monotonic,
3282                          XLValue.isVolatile());
3283   // OpenMP, 2.12.6, atomic Construct
3284   // Any atomic construct with a seq_cst clause forces the atomically
3285   // performed operation to include an implicit flush operation without a
3286   // list.
3287   if (IsSeqCst)
3288     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3289   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3290 }
3291 
3292 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3293                                    const Expr *X, const Expr *E,
3294                                    SourceLocation Loc) {
3295   // x = expr;
3296   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3297   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3298   // OpenMP, 2.12.6, atomic Construct
3299   // Any atomic construct with a seq_cst clause forces the atomically
3300   // performed operation to include an implicit flush operation without a
3301   // list.
3302   if (IsSeqCst)
3303     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3304 }
3305 
3306 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3307                                                 RValue Update,
3308                                                 BinaryOperatorKind BO,
3309                                                 llvm::AtomicOrdering AO,
3310                                                 bool IsXLHSInRHSPart) {
3311   auto &Context = CGF.CGM.getContext();
3312   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3313   // expression is simple and atomic is allowed for the given type for the
3314   // target platform.
3315   if (BO == BO_Comma || !Update.isScalar() ||
3316       !Update.getScalarVal()->getType()->isIntegerTy() ||
3317       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3318                         (Update.getScalarVal()->getType() !=
3319                          X.getAddress().getElementType())) ||
3320       !X.getAddress().getElementType()->isIntegerTy() ||
3321       !Context.getTargetInfo().hasBuiltinAtomic(
3322           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3323     return std::make_pair(false, RValue::get(nullptr));
3324 
3325   llvm::AtomicRMWInst::BinOp RMWOp;
3326   switch (BO) {
3327   case BO_Add:
3328     RMWOp = llvm::AtomicRMWInst::Add;
3329     break;
3330   case BO_Sub:
3331     if (!IsXLHSInRHSPart)
3332       return std::make_pair(false, RValue::get(nullptr));
3333     RMWOp = llvm::AtomicRMWInst::Sub;
3334     break;
3335   case BO_And:
3336     RMWOp = llvm::AtomicRMWInst::And;
3337     break;
3338   case BO_Or:
3339     RMWOp = llvm::AtomicRMWInst::Or;
3340     break;
3341   case BO_Xor:
3342     RMWOp = llvm::AtomicRMWInst::Xor;
3343     break;
3344   case BO_LT:
3345     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3346                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3347                                    : llvm::AtomicRMWInst::Max)
3348                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3349                                    : llvm::AtomicRMWInst::UMax);
3350     break;
3351   case BO_GT:
3352     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3353                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3354                                    : llvm::AtomicRMWInst::Min)
3355                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3356                                    : llvm::AtomicRMWInst::UMin);
3357     break;
3358   case BO_Assign:
3359     RMWOp = llvm::AtomicRMWInst::Xchg;
3360     break;
3361   case BO_Mul:
3362   case BO_Div:
3363   case BO_Rem:
3364   case BO_Shl:
3365   case BO_Shr:
3366   case BO_LAnd:
3367   case BO_LOr:
3368     return std::make_pair(false, RValue::get(nullptr));
3369   case BO_PtrMemD:
3370   case BO_PtrMemI:
3371   case BO_LE:
3372   case BO_GE:
3373   case BO_EQ:
3374   case BO_NE:
3375   case BO_AddAssign:
3376   case BO_SubAssign:
3377   case BO_AndAssign:
3378   case BO_OrAssign:
3379   case BO_XorAssign:
3380   case BO_MulAssign:
3381   case BO_DivAssign:
3382   case BO_RemAssign:
3383   case BO_ShlAssign:
3384   case BO_ShrAssign:
3385   case BO_Comma:
3386     llvm_unreachable("Unsupported atomic update operation");
3387   }
3388   auto *UpdateVal = Update.getScalarVal();
3389   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3390     UpdateVal = CGF.Builder.CreateIntCast(
3391         IC, X.getAddress().getElementType(),
3392         X.getType()->hasSignedIntegerRepresentation());
3393   }
3394   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3395   return std::make_pair(true, RValue::get(Res));
3396 }
3397 
3398 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3399     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3400     llvm::AtomicOrdering AO, SourceLocation Loc,
3401     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3402   // Update expressions are allowed to have the following forms:
3403   // x binop= expr; -> xrval + expr;
3404   // x++, ++x -> xrval + 1;
3405   // x--, --x -> xrval - 1;
3406   // x = x binop expr; -> xrval binop expr
3407   // x = expr Op x; - > expr binop xrval;
3408   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3409   if (!Res.first) {
3410     if (X.isGlobalReg()) {
3411       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3412       // 'xrval'.
3413       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3414     } else {
3415       // Perform compare-and-swap procedure.
3416       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3417     }
3418   }
3419   return Res;
3420 }
3421 
3422 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3423                                     const Expr *X, const Expr *E,
3424                                     const Expr *UE, bool IsXLHSInRHSPart,
3425                                     SourceLocation Loc) {
3426   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3427          "Update expr in 'atomic update' must be a binary operator.");
3428   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3429   // Update expressions are allowed to have the following forms:
3430   // x binop= expr; -> xrval + expr;
3431   // x++, ++x -> xrval + 1;
3432   // x--, --x -> xrval - 1;
3433   // x = x binop expr; -> xrval binop expr
3434   // x = expr Op x; - > expr binop xrval;
3435   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3436   LValue XLValue = CGF.EmitLValue(X);
3437   RValue ExprRValue = CGF.EmitAnyExpr(E);
3438   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3439                      : llvm::AtomicOrdering::Monotonic;
3440   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3441   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3442   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3443   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3444   auto Gen =
3445       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3446         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3447         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3448         return CGF.EmitAnyExpr(UE);
3449       };
3450   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3451       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3452   // OpenMP, 2.12.6, atomic Construct
3453   // Any atomic construct with a seq_cst clause forces the atomically
3454   // performed operation to include an implicit flush operation without a
3455   // list.
3456   if (IsSeqCst)
3457     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3458 }
3459 
3460 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3461                             QualType SourceType, QualType ResType,
3462                             SourceLocation Loc) {
3463   switch (CGF.getEvaluationKind(ResType)) {
3464   case TEK_Scalar:
3465     return RValue::get(
3466         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3467   case TEK_Complex: {
3468     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3469     return RValue::getComplex(Res.first, Res.second);
3470   }
3471   case TEK_Aggregate:
3472     break;
3473   }
3474   llvm_unreachable("Must be a scalar or complex.");
3475 }
3476 
3477 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3478                                      bool IsPostfixUpdate, const Expr *V,
3479                                      const Expr *X, const Expr *E,
3480                                      const Expr *UE, bool IsXLHSInRHSPart,
3481                                      SourceLocation Loc) {
3482   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3483   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3484   RValue NewVVal;
3485   LValue VLValue = CGF.EmitLValue(V);
3486   LValue XLValue = CGF.EmitLValue(X);
3487   RValue ExprRValue = CGF.EmitAnyExpr(E);
3488   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3489                      : llvm::AtomicOrdering::Monotonic;
3490   QualType NewVValType;
3491   if (UE) {
3492     // 'x' is updated with some additional value.
3493     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3494            "Update expr in 'atomic capture' must be a binary operator.");
3495     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3496     // Update expressions are allowed to have the following forms:
3497     // x binop= expr; -> xrval + expr;
3498     // x++, ++x -> xrval + 1;
3499     // x--, --x -> xrval - 1;
3500     // x = x binop expr; -> xrval binop expr
3501     // x = expr Op x; - > expr binop xrval;
3502     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3503     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3504     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3505     NewVValType = XRValExpr->getType();
3506     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3507     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3508                   IsPostfixUpdate](RValue XRValue) -> RValue {
3509       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3510       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3511       RValue Res = CGF.EmitAnyExpr(UE);
3512       NewVVal = IsPostfixUpdate ? XRValue : Res;
3513       return Res;
3514     };
3515     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3516         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3517     if (Res.first) {
3518       // 'atomicrmw' instruction was generated.
3519       if (IsPostfixUpdate) {
3520         // Use old value from 'atomicrmw'.
3521         NewVVal = Res.second;
3522       } else {
3523         // 'atomicrmw' does not provide new value, so evaluate it using old
3524         // value of 'x'.
3525         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3526         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3527         NewVVal = CGF.EmitAnyExpr(UE);
3528       }
3529     }
3530   } else {
3531     // 'x' is simply rewritten with some 'expr'.
3532     NewVValType = X->getType().getNonReferenceType();
3533     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3534                                X->getType().getNonReferenceType(), Loc);
3535     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3536       NewVVal = XRValue;
3537       return ExprRValue;
3538     };
3539     // Try to perform atomicrmw xchg, otherwise simple exchange.
3540     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3541         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3542         Loc, Gen);
3543     if (Res.first) {
3544       // 'atomicrmw' instruction was generated.
3545       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3546     }
3547   }
3548   // Emit post-update store to 'v' of old/new 'x' value.
3549   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3550   // OpenMP, 2.12.6, atomic Construct
3551   // Any atomic construct with a seq_cst clause forces the atomically
3552   // performed operation to include an implicit flush operation without a
3553   // list.
3554   if (IsSeqCst)
3555     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3556 }
3557 
3558 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3559                               bool IsSeqCst, bool IsPostfixUpdate,
3560                               const Expr *X, const Expr *V, const Expr *E,
3561                               const Expr *UE, bool IsXLHSInRHSPart,
3562                               SourceLocation Loc) {
3563   switch (Kind) {
3564   case OMPC_read:
3565     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3566     break;
3567   case OMPC_write:
3568     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3569     break;
3570   case OMPC_unknown:
3571   case OMPC_update:
3572     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3573     break;
3574   case OMPC_capture:
3575     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3576                              IsXLHSInRHSPart, Loc);
3577     break;
3578   case OMPC_if:
3579   case OMPC_final:
3580   case OMPC_num_threads:
3581   case OMPC_private:
3582   case OMPC_firstprivate:
3583   case OMPC_lastprivate:
3584   case OMPC_reduction:
3585   case OMPC_task_reduction:
3586   case OMPC_in_reduction:
3587   case OMPC_safelen:
3588   case OMPC_simdlen:
3589   case OMPC_collapse:
3590   case OMPC_default:
3591   case OMPC_seq_cst:
3592   case OMPC_shared:
3593   case OMPC_linear:
3594   case OMPC_aligned:
3595   case OMPC_copyin:
3596   case OMPC_copyprivate:
3597   case OMPC_flush:
3598   case OMPC_proc_bind:
3599   case OMPC_schedule:
3600   case OMPC_ordered:
3601   case OMPC_nowait:
3602   case OMPC_untied:
3603   case OMPC_threadprivate:
3604   case OMPC_depend:
3605   case OMPC_mergeable:
3606   case OMPC_device:
3607   case OMPC_threads:
3608   case OMPC_simd:
3609   case OMPC_map:
3610   case OMPC_num_teams:
3611   case OMPC_thread_limit:
3612   case OMPC_priority:
3613   case OMPC_grainsize:
3614   case OMPC_nogroup:
3615   case OMPC_num_tasks:
3616   case OMPC_hint:
3617   case OMPC_dist_schedule:
3618   case OMPC_defaultmap:
3619   case OMPC_uniform:
3620   case OMPC_to:
3621   case OMPC_from:
3622   case OMPC_use_device_ptr:
3623   case OMPC_is_device_ptr:
3624     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3625   }
3626 }
3627 
3628 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3629   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3630   OpenMPClauseKind Kind = OMPC_unknown;
3631   for (auto *C : S.clauses()) {
3632     // Find first clause (skip seq_cst clause, if it is first).
3633     if (C->getClauseKind() != OMPC_seq_cst) {
3634       Kind = C->getClauseKind();
3635       break;
3636     }
3637   }
3638 
3639   const auto *CS =
3640       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3641   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3642     enterFullExpression(EWC);
3643   }
3644   // Processing for statements under 'atomic capture'.
3645   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3646     for (const auto *C : Compound->body()) {
3647       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3648         enterFullExpression(EWC);
3649       }
3650     }
3651   }
3652 
3653   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3654                                             PrePostActionTy &) {
3655     CGF.EmitStopPoint(CS);
3656     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3657                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3658                       S.isXLHSInRHSPart(), S.getLocStart());
3659   };
3660   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3661   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3662 }
3663 
3664 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3665                                          const OMPExecutableDirective &S,
3666                                          const RegionCodeGenTy &CodeGen) {
3667   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3668   CodeGenModule &CGM = CGF.CGM;
3669   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3670 
3671   llvm::Function *Fn = nullptr;
3672   llvm::Constant *FnID = nullptr;
3673 
3674   const Expr *IfCond = nullptr;
3675   // Check for the at most one if clause associated with the target region.
3676   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3677     if (C->getNameModifier() == OMPD_unknown ||
3678         C->getNameModifier() == OMPD_target) {
3679       IfCond = C->getCondition();
3680       break;
3681     }
3682   }
3683 
3684   // Check if we have any device clause associated with the directive.
3685   const Expr *Device = nullptr;
3686   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3687     Device = C->getDevice();
3688   }
3689 
3690   // Check if we have an if clause whose conditional always evaluates to false
3691   // or if we do not have any targets specified. If so the target region is not
3692   // an offload entry point.
3693   bool IsOffloadEntry = true;
3694   if (IfCond) {
3695     bool Val;
3696     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3697       IsOffloadEntry = false;
3698   }
3699   if (CGM.getLangOpts().OMPTargetTriples.empty())
3700     IsOffloadEntry = false;
3701 
3702   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3703   StringRef ParentName;
3704   // In case we have Ctors/Dtors we use the complete type variant to produce
3705   // the mangling of the device outlined kernel.
3706   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3707     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3708   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3709     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3710   else
3711     ParentName =
3712         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3713 
3714   // Emit target region as a standalone region.
3715   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3716                                                     IsOffloadEntry, CodeGen);
3717   OMPLexicalScope Scope(CGF, S);
3718   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3719   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3720   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3721                                         CapturedVars);
3722 }
3723 
3724 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3725                              PrePostActionTy &Action) {
3726   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3727   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3728   CGF.EmitOMPPrivateClause(S, PrivateScope);
3729   (void)PrivateScope.Privatize();
3730 
3731   Action.Enter(CGF);
3732   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3733 }
3734 
3735 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3736                                                   StringRef ParentName,
3737                                                   const OMPTargetDirective &S) {
3738   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3739     emitTargetRegion(CGF, S, Action);
3740   };
3741   llvm::Function *Fn;
3742   llvm::Constant *Addr;
3743   // Emit target region as a standalone region.
3744   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3745       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3746   assert(Fn && Addr && "Target device function emission failed.");
3747 }
3748 
3749 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3750   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3751     emitTargetRegion(CGF, S, Action);
3752   };
3753   emitCommonOMPTargetDirective(*this, S, CodeGen);
3754 }
3755 
3756 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3757                                         const OMPExecutableDirective &S,
3758                                         OpenMPDirectiveKind InnermostKind,
3759                                         const RegionCodeGenTy &CodeGen) {
3760   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3761   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3762       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3763 
3764   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3765   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3766   if (NT || TL) {
3767     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3768     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3769 
3770     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3771                                                   S.getLocStart());
3772   }
3773 
3774   OMPTeamsScope Scope(CGF, S);
3775   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3776   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3777   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3778                                            CapturedVars);
3779 }
3780 
3781 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3782   // Emit teams region as a standalone region.
3783   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3784     OMPPrivateScope PrivateScope(CGF);
3785     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3786     CGF.EmitOMPPrivateClause(S, PrivateScope);
3787     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3788     (void)PrivateScope.Privatize();
3789     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3790     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3791   };
3792   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3793   emitPostUpdateForReductionClause(
3794       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3795 }
3796 
3797 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3798                                   const OMPTargetTeamsDirective &S) {
3799   auto *CS = S.getCapturedStmt(OMPD_teams);
3800   Action.Enter(CGF);
3801   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3802     // TODO: Add support for clauses.
3803     CGF.EmitStmt(CS->getCapturedStmt());
3804   };
3805   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3806 }
3807 
3808 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3809     CodeGenModule &CGM, StringRef ParentName,
3810     const OMPTargetTeamsDirective &S) {
3811   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3812     emitTargetTeamsRegion(CGF, Action, S);
3813   };
3814   llvm::Function *Fn;
3815   llvm::Constant *Addr;
3816   // Emit target region as a standalone region.
3817   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3818       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3819   assert(Fn && Addr && "Target device function emission failed.");
3820 }
3821 
3822 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3823     const OMPTargetTeamsDirective &S) {
3824   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3825     emitTargetTeamsRegion(CGF, Action, S);
3826   };
3827   emitCommonOMPTargetDirective(*this, S, CodeGen);
3828 }
3829 
3830 void CodeGenFunction::EmitOMPCancellationPointDirective(
3831     const OMPCancellationPointDirective &S) {
3832   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3833                                                    S.getCancelRegion());
3834 }
3835 
3836 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3837   const Expr *IfCond = nullptr;
3838   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3839     if (C->getNameModifier() == OMPD_unknown ||
3840         C->getNameModifier() == OMPD_cancel) {
3841       IfCond = C->getCondition();
3842       break;
3843     }
3844   }
3845   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3846                                         S.getCancelRegion());
3847 }
3848 
3849 CodeGenFunction::JumpDest
3850 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3851   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3852       Kind == OMPD_target_parallel)
3853     return ReturnBlock;
3854   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3855          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3856          Kind == OMPD_distribute_parallel_for ||
3857          Kind == OMPD_target_parallel_for);
3858   return OMPCancelStack.getExitBlock();
3859 }
3860 
3861 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3862     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3863     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3864   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3865   auto OrigVarIt = C.varlist_begin();
3866   auto InitIt = C.inits().begin();
3867   for (auto PvtVarIt : C.private_copies()) {
3868     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3869     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3870     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3871 
3872     // In order to identify the right initializer we need to match the
3873     // declaration used by the mapping logic. In some cases we may get
3874     // OMPCapturedExprDecl that refers to the original declaration.
3875     const ValueDecl *MatchingVD = OrigVD;
3876     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3877       // OMPCapturedExprDecl are used to privative fields of the current
3878       // structure.
3879       auto *ME = cast<MemberExpr>(OED->getInit());
3880       assert(isa<CXXThisExpr>(ME->getBase()) &&
3881              "Base should be the current struct!");
3882       MatchingVD = ME->getMemberDecl();
3883     }
3884 
3885     // If we don't have information about the current list item, move on to
3886     // the next one.
3887     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3888     if (InitAddrIt == CaptureDeviceAddrMap.end())
3889       continue;
3890 
3891     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3892       // Initialize the temporary initialization variable with the address we
3893       // get from the runtime library. We have to cast the source address
3894       // because it is always a void *. References are materialized in the
3895       // privatization scope, so the initialization here disregards the fact
3896       // the original variable is a reference.
3897       QualType AddrQTy =
3898           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3899       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3900       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3901       setAddrOfLocalVar(InitVD, InitAddr);
3902 
3903       // Emit private declaration, it will be initialized by the value we
3904       // declaration we just added to the local declarations map.
3905       EmitDecl(*PvtVD);
3906 
3907       // The initialization variables reached its purpose in the emission
3908       // ofthe previous declaration, so we don't need it anymore.
3909       LocalDeclMap.erase(InitVD);
3910 
3911       // Return the address of the private variable.
3912       return GetAddrOfLocalVar(PvtVD);
3913     });
3914     assert(IsRegistered && "firstprivate var already registered as private");
3915     // Silence the warning about unused variable.
3916     (void)IsRegistered;
3917 
3918     ++OrigVarIt;
3919     ++InitIt;
3920   }
3921 }
3922 
3923 // Generate the instructions for '#pragma omp target data' directive.
3924 void CodeGenFunction::EmitOMPTargetDataDirective(
3925     const OMPTargetDataDirective &S) {
3926   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3927 
3928   // Create a pre/post action to signal the privatization of the device pointer.
3929   // This action can be replaced by the OpenMP runtime code generation to
3930   // deactivate privatization.
3931   bool PrivatizeDevicePointers = false;
3932   class DevicePointerPrivActionTy : public PrePostActionTy {
3933     bool &PrivatizeDevicePointers;
3934 
3935   public:
3936     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3937         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3938     void Enter(CodeGenFunction &CGF) override {
3939       PrivatizeDevicePointers = true;
3940     }
3941   };
3942   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3943 
3944   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3945       CodeGenFunction &CGF, PrePostActionTy &Action) {
3946     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3947       CGF.EmitStmt(
3948           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3949     };
3950 
3951     // Codegen that selects wheather to generate the privatization code or not.
3952     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3953                           &InnermostCodeGen](CodeGenFunction &CGF,
3954                                              PrePostActionTy &Action) {
3955       RegionCodeGenTy RCG(InnermostCodeGen);
3956       PrivatizeDevicePointers = false;
3957 
3958       // Call the pre-action to change the status of PrivatizeDevicePointers if
3959       // needed.
3960       Action.Enter(CGF);
3961 
3962       if (PrivatizeDevicePointers) {
3963         OMPPrivateScope PrivateScope(CGF);
3964         // Emit all instances of the use_device_ptr clause.
3965         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
3966           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
3967                                         Info.CaptureDeviceAddrMap);
3968         (void)PrivateScope.Privatize();
3969         RCG(CGF);
3970       } else
3971         RCG(CGF);
3972     };
3973 
3974     // Forward the provided action to the privatization codegen.
3975     RegionCodeGenTy PrivRCG(PrivCodeGen);
3976     PrivRCG.setAction(Action);
3977 
3978     // Notwithstanding the body of the region is emitted as inlined directive,
3979     // we don't use an inline scope as changes in the references inside the
3980     // region are expected to be visible outside, so we do not privative them.
3981     OMPLexicalScope Scope(CGF, S);
3982     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
3983                                                     PrivRCG);
3984   };
3985 
3986   RegionCodeGenTy RCG(CodeGen);
3987 
3988   // If we don't have target devices, don't bother emitting the data mapping
3989   // code.
3990   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
3991     RCG(*this);
3992     return;
3993   }
3994 
3995   // Check if we have any if clause associated with the directive.
3996   const Expr *IfCond = nullptr;
3997   if (auto *C = S.getSingleClause<OMPIfClause>())
3998     IfCond = C->getCondition();
3999 
4000   // Check if we have any device clause associated with the directive.
4001   const Expr *Device = nullptr;
4002   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4003     Device = C->getDevice();
4004 
4005   // Set the action to signal privatization of device pointers.
4006   RCG.setAction(PrivAction);
4007 
4008   // Emit region code.
4009   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4010                                              Info);
4011 }
4012 
4013 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4014     const OMPTargetEnterDataDirective &S) {
4015   // If we don't have target devices, don't bother emitting the data mapping
4016   // code.
4017   if (CGM.getLangOpts().OMPTargetTriples.empty())
4018     return;
4019 
4020   // Check if we have any if clause associated with the directive.
4021   const Expr *IfCond = nullptr;
4022   if (auto *C = S.getSingleClause<OMPIfClause>())
4023     IfCond = C->getCondition();
4024 
4025   // Check if we have any device clause associated with the directive.
4026   const Expr *Device = nullptr;
4027   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4028     Device = C->getDevice();
4029 
4030   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4031 }
4032 
4033 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4034     const OMPTargetExitDataDirective &S) {
4035   // If we don't have target devices, don't bother emitting the data mapping
4036   // code.
4037   if (CGM.getLangOpts().OMPTargetTriples.empty())
4038     return;
4039 
4040   // Check if we have any if clause associated with the directive.
4041   const Expr *IfCond = nullptr;
4042   if (auto *C = S.getSingleClause<OMPIfClause>())
4043     IfCond = C->getCondition();
4044 
4045   // Check if we have any device clause associated with the directive.
4046   const Expr *Device = nullptr;
4047   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4048     Device = C->getDevice();
4049 
4050   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4051 }
4052 
4053 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4054                                      const OMPTargetParallelDirective &S,
4055                                      PrePostActionTy &Action) {
4056   // Get the captured statement associated with the 'parallel' region.
4057   auto *CS = S.getCapturedStmt(OMPD_parallel);
4058   Action.Enter(CGF);
4059   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4060     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4061     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4062     CGF.EmitOMPPrivateClause(S, PrivateScope);
4063     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4064     (void)PrivateScope.Privatize();
4065     // TODO: Add support for clauses.
4066     CGF.EmitStmt(CS->getCapturedStmt());
4067     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4068   };
4069   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4070                                  emitEmptyBoundParameters);
4071   emitPostUpdateForReductionClause(
4072       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4073 }
4074 
4075 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4076     CodeGenModule &CGM, StringRef ParentName,
4077     const OMPTargetParallelDirective &S) {
4078   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4079     emitTargetParallelRegion(CGF, S, Action);
4080   };
4081   llvm::Function *Fn;
4082   llvm::Constant *Addr;
4083   // Emit target region as a standalone region.
4084   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4085       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4086   assert(Fn && Addr && "Target device function emission failed.");
4087 }
4088 
4089 void CodeGenFunction::EmitOMPTargetParallelDirective(
4090     const OMPTargetParallelDirective &S) {
4091   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4092     emitTargetParallelRegion(CGF, S, Action);
4093   };
4094   emitCommonOMPTargetDirective(*this, S, CodeGen);
4095 }
4096 
4097 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4098     const OMPTargetParallelForDirective &S) {
4099   // TODO: codegen for target parallel for.
4100 }
4101 
4102 /// Emit a helper variable and return corresponding lvalue.
4103 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4104                      const ImplicitParamDecl *PVD,
4105                      CodeGenFunction::OMPPrivateScope &Privates) {
4106   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4107   Privates.addPrivate(
4108       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4109 }
4110 
4111 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4112   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4113   // Emit outlined function for task construct.
4114   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4115   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4116   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4117   const Expr *IfCond = nullptr;
4118   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4119     if (C->getNameModifier() == OMPD_unknown ||
4120         C->getNameModifier() == OMPD_taskloop) {
4121       IfCond = C->getCondition();
4122       break;
4123     }
4124   }
4125 
4126   OMPTaskDataTy Data;
4127   // Check if taskloop must be emitted without taskgroup.
4128   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4129   // TODO: Check if we should emit tied or untied task.
4130   Data.Tied = true;
4131   // Set scheduling for taskloop
4132   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4133     // grainsize clause
4134     Data.Schedule.setInt(/*IntVal=*/false);
4135     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4136   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4137     // num_tasks clause
4138     Data.Schedule.setInt(/*IntVal=*/true);
4139     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4140   }
4141 
4142   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4143     // if (PreCond) {
4144     //   for (IV in 0..LastIteration) BODY;
4145     //   <Final counter/linear vars updates>;
4146     // }
4147     //
4148 
4149     // Emit: if (PreCond) - begin.
4150     // If the condition constant folds and can be elided, avoid emitting the
4151     // whole loop.
4152     bool CondConstant;
4153     llvm::BasicBlock *ContBlock = nullptr;
4154     OMPLoopScope PreInitScope(CGF, S);
4155     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4156       if (!CondConstant)
4157         return;
4158     } else {
4159       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4160       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4161       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4162                   CGF.getProfileCount(&S));
4163       CGF.EmitBlock(ThenBlock);
4164       CGF.incrementProfileCounter(&S);
4165     }
4166 
4167     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4168       CGF.EmitOMPSimdInit(S);
4169 
4170     OMPPrivateScope LoopScope(CGF);
4171     // Emit helper vars inits.
4172     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4173     auto *I = CS->getCapturedDecl()->param_begin();
4174     auto *LBP = std::next(I, LowerBound);
4175     auto *UBP = std::next(I, UpperBound);
4176     auto *STP = std::next(I, Stride);
4177     auto *LIP = std::next(I, LastIter);
4178     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4179              LoopScope);
4180     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4181              LoopScope);
4182     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4183     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4184              LoopScope);
4185     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4186     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4187     (void)LoopScope.Privatize();
4188     // Emit the loop iteration variable.
4189     const Expr *IVExpr = S.getIterationVariable();
4190     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4191     CGF.EmitVarDecl(*IVDecl);
4192     CGF.EmitIgnoredExpr(S.getInit());
4193 
4194     // Emit the iterations count variable.
4195     // If it is not a variable, Sema decided to calculate iterations count on
4196     // each iteration (e.g., it is foldable into a constant).
4197     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4198       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4199       // Emit calculation of the iterations count.
4200       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4201     }
4202 
4203     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4204                          S.getInc(),
4205                          [&S](CodeGenFunction &CGF) {
4206                            CGF.EmitOMPLoopBody(S, JumpDest());
4207                            CGF.EmitStopPoint(&S);
4208                          },
4209                          [](CodeGenFunction &) {});
4210     // Emit: if (PreCond) - end.
4211     if (ContBlock) {
4212       CGF.EmitBranch(ContBlock);
4213       CGF.EmitBlock(ContBlock, true);
4214     }
4215     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4216     if (HasLastprivateClause) {
4217       CGF.EmitOMPLastprivateClauseFinal(
4218           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4219           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4220               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4221               (*LIP)->getType(), S.getLocStart())));
4222     }
4223   };
4224   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4225                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4226                             const OMPTaskDataTy &Data) {
4227     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4228       OMPLoopScope PreInitScope(CGF, S);
4229       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4230                                                   OutlinedFn, SharedsTy,
4231                                                   CapturedStruct, IfCond, Data);
4232     };
4233     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4234                                                     CodeGen);
4235   };
4236   if (Data.Nogroup)
4237     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4238   else {
4239     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4240         *this,
4241         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4242                                         PrePostActionTy &Action) {
4243           Action.Enter(CGF);
4244           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4245         },
4246         S.getLocStart());
4247   }
4248 }
4249 
4250 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4251   EmitOMPTaskLoopBasedDirective(S);
4252 }
4253 
4254 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4255     const OMPTaskLoopSimdDirective &S) {
4256   EmitOMPTaskLoopBasedDirective(S);
4257 }
4258 
4259 // Generate the instructions for '#pragma omp target update' directive.
4260 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4261     const OMPTargetUpdateDirective &S) {
4262   // If we don't have target devices, don't bother emitting the data mapping
4263   // code.
4264   if (CGM.getLangOpts().OMPTargetTriples.empty())
4265     return;
4266 
4267   // Check if we have any if clause associated with the directive.
4268   const Expr *IfCond = nullptr;
4269   if (auto *C = S.getSingleClause<OMPIfClause>())
4270     IfCond = C->getCondition();
4271 
4272   // Check if we have any device clause associated with the directive.
4273   const Expr *Device = nullptr;
4274   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4275     Device = C->getDevice();
4276 
4277   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4278 }
4279