1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
69                             isCapturedVar(CGF, VD) ||
70                                 (CGF.CapturedStmtInfo &&
71                                  InlinedShareds.isGlobalVarCaptured(VD)),
72                             VD->getType().getNonReferenceType(), VK_LValue,
73                             SourceLocation());
74             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
75               return CGF.EmitLValue(&DRE).getAddress();
76             });
77           }
78         }
79         (void)InlinedShareds.Privatize();
80       }
81     }
82   }
83 };
84 
85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
86 /// for captured expressions.
87 class OMPParallelScope final : public OMPLexicalScope {
88   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
89     OpenMPDirectiveKind Kind = S.getDirectiveKind();
90     return !(isOpenMPTargetExecutionDirective(Kind) ||
91              isOpenMPLoopBoundSharingDirective(Kind)) &&
92            isOpenMPParallelDirective(Kind);
93   }
94 
95 public:
96   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
97       : OMPLexicalScope(CGF, S,
98                         /*AsInlined=*/false,
99                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
100 };
101 
102 /// Lexical scope for OpenMP teams construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPTeamsScope final : public OMPLexicalScope {
105   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106     OpenMPDirectiveKind Kind = S.getDirectiveKind();
107     return !isOpenMPTargetExecutionDirective(Kind) &&
108            isOpenMPTeamsDirective(Kind);
109   }
110 
111 public:
112   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
113       : OMPLexicalScope(CGF, S,
114                         /*AsInlined=*/false,
115                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
116 };
117 
118 /// Private scope for OpenMP loop-based directives, that supports capturing
119 /// of used expression from loop statement.
120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
121   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
122     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
123       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
124         for (const auto *I : PreInits->decls())
125           CGF.EmitVarDecl(cast<VarDecl>(*I));
126       }
127     }
128   }
129 
130 public:
131   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
132       : CodeGenFunction::RunCleanupsScope(CGF) {
133     emitPreInitStmt(CGF, S);
134   }
135 };
136 
137 } // namespace
138 
139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
140   auto &C = getContext();
141   llvm::Value *Size = nullptr;
142   auto SizeInChars = C.getTypeSizeInChars(Ty);
143   if (SizeInChars.isZero()) {
144     // getTypeSizeInChars() returns 0 for a VLA.
145     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
146       llvm::Value *ArraySize;
147       std::tie(ArraySize, Ty) = getVLASize(VAT);
148       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
149     }
150     SizeInChars = C.getTypeSizeInChars(Ty);
151     if (SizeInChars.isZero())
152       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
153     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
154   } else
155     Size = CGM.getSize(SizeInChars);
156   return Size;
157 }
158 
159 void CodeGenFunction::GenerateOpenMPCapturedVars(
160     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
161   const RecordDecl *RD = S.getCapturedRecordDecl();
162   auto CurField = RD->field_begin();
163   auto CurCap = S.captures().begin();
164   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
165                                                  E = S.capture_init_end();
166        I != E; ++I, ++CurField, ++CurCap) {
167     if (CurField->hasCapturedVLAType()) {
168       auto VAT = CurField->getCapturedVLAType();
169       auto *Val = VLASizeMap[VAT->getSizeExpr()];
170       CapturedVars.push_back(Val);
171     } else if (CurCap->capturesThis())
172       CapturedVars.push_back(CXXThisValue);
173     else if (CurCap->capturesVariableByCopy()) {
174       llvm::Value *CV =
175           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
176 
177       // If the field is not a pointer, we need to save the actual value
178       // and load it as a void pointer.
179       if (!CurField->getType()->isAnyPointerType()) {
180         auto &Ctx = getContext();
181         auto DstAddr = CreateMemTemp(
182             Ctx.getUIntPtrType(),
183             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
184         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
185 
186         auto *SrcAddrVal = EmitScalarConversion(
187             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
188             Ctx.getPointerType(CurField->getType()), SourceLocation());
189         LValue SrcLV =
190             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
191 
192         // Store the value using the source type pointer.
193         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
194 
195         // Load the value using the destination type pointer.
196         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
197       }
198       CapturedVars.push_back(CV);
199     } else {
200       assert(CurCap->capturesVariable() && "Expected capture by reference.");
201       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
202     }
203   }
204 }
205 
206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
207                                     StringRef Name, LValue AddrLV,
208                                     bool isReferenceType = false) {
209   ASTContext &Ctx = CGF.getContext();
210 
211   auto *CastedPtr = CGF.EmitScalarConversion(
212       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
213       Ctx.getPointerType(DstType), SourceLocation());
214   auto TmpAddr =
215       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
216           .getAddress();
217 
218   // If we are dealing with references we need to return the address of the
219   // reference instead of the reference of the value.
220   if (isReferenceType) {
221     QualType RefType = Ctx.getLValueReferenceType(DstType);
222     auto *RefVal = TmpAddr.getPointer();
223     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
224     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
225     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
226   }
227 
228   return TmpAddr;
229 }
230 
231 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
232   if (T->isLValueReferenceType()) {
233     return C.getLValueReferenceType(
234         getCanonicalParamType(C, T.getNonReferenceType()),
235         /*SpelledAsLValue=*/false);
236   }
237   if (T->isPointerType())
238     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
239   return C.getCanonicalParamType(T);
240 }
241 
242 namespace {
243   /// Contains required data for proper outlined function codegen.
244   struct FunctionOptions {
245     /// Captured statement for which the function is generated.
246     const CapturedStmt *S = nullptr;
247     /// true if cast to/from  UIntPtr is required for variables captured by
248     /// value.
249     const bool UIntPtrCastRequired = true;
250     /// true if only casted arguments must be registered as local args or VLA
251     /// sizes.
252     const bool RegisterCastedArgsOnly = false;
253     /// Name of the generated function.
254     const StringRef FunctionName;
255     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
256                              bool RegisterCastedArgsOnly,
257                              StringRef FunctionName)
258         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
259           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
260           FunctionName(FunctionName) {}
261   };
262 }
263 
264 static llvm::Function *emitOutlinedFunctionPrologue(
265     CodeGenFunction &CGF, FunctionArgList &Args,
266     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
267         &LocalAddrs,
268     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
269         &VLASizes,
270     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
271   const CapturedDecl *CD = FO.S->getCapturedDecl();
272   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
273   assert(CD->hasBody() && "missing CapturedDecl body");
274 
275   CXXThisValue = nullptr;
276   // Build the argument list.
277   CodeGenModule &CGM = CGF.CGM;
278   ASTContext &Ctx = CGM.getContext();
279   FunctionArgList TargetArgs;
280   Args.append(CD->param_begin(),
281               std::next(CD->param_begin(), CD->getContextParamPosition()));
282   TargetArgs.append(
283       CD->param_begin(),
284       std::next(CD->param_begin(), CD->getContextParamPosition()));
285   auto I = FO.S->captures().begin();
286   for (auto *FD : RD->fields()) {
287     QualType ArgType = FD->getType();
288     IdentifierInfo *II = nullptr;
289     VarDecl *CapVar = nullptr;
290 
291     // If this is a capture by copy and the type is not a pointer, the outlined
292     // function argument type should be uintptr and the value properly casted to
293     // uintptr. This is necessary given that the runtime library is only able to
294     // deal with pointers. We can pass in the same way the VLA type sizes to the
295     // outlined function.
296     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
297         I->capturesVariableArrayType()) {
298       if (FO.UIntPtrCastRequired)
299         ArgType = Ctx.getUIntPtrType();
300     }
301 
302     if (I->capturesVariable() || I->capturesVariableByCopy()) {
303       CapVar = I->getCapturedVar();
304       II = CapVar->getIdentifier();
305     } else if (I->capturesThis())
306       II = &Ctx.Idents.get("this");
307     else {
308       assert(I->capturesVariableArrayType());
309       II = &Ctx.Idents.get("vla");
310     }
311     if (ArgType->isVariablyModifiedType())
312       ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
313     auto *Arg =
314         ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
315                                   ArgType, ImplicitParamDecl::Other);
316     Args.emplace_back(Arg);
317     // Do not cast arguments if we emit function with non-original types.
318     TargetArgs.emplace_back(
319         FO.UIntPtrCastRequired
320             ? Arg
321             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
322     ++I;
323   }
324   Args.append(
325       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
326       CD->param_end());
327   TargetArgs.append(
328       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
329       CD->param_end());
330 
331   // Create the function declaration.
332   FunctionType::ExtInfo ExtInfo;
333   const CGFunctionInfo &FuncInfo =
334       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
335   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
336 
337   llvm::Function *F =
338       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
339                              FO.FunctionName, &CGM.getModule());
340   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
341   if (CD->isNothrow())
342     F->setDoesNotThrow();
343 
344   // Generate the function.
345   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, CD->getLocation(),
346                     CD->getBody()->getLocStart());
347   unsigned Cnt = CD->getContextParamPosition();
348   I = FO.S->captures().begin();
349   for (auto *FD : RD->fields()) {
350     // Do not map arguments if we emit function with non-original types.
351     Address LocalAddr(Address::invalid());
352     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
353       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
354                                                              TargetArgs[Cnt]);
355     } else {
356       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
357     }
358     // If we are capturing a pointer by copy we don't need to do anything, just
359     // use the value that we get from the arguments.
360     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
361       const VarDecl *CurVD = I->getCapturedVar();
362       // If the variable is a reference we need to materialize it here.
363       if (CurVD->getType()->isReferenceType()) {
364         Address RefAddr = CGF.CreateMemTemp(
365             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
366         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
367                               /*Volatile=*/false, CurVD->getType());
368         LocalAddr = RefAddr;
369       }
370       if (!FO.RegisterCastedArgsOnly)
371         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
372       ++Cnt;
373       ++I;
374       continue;
375     }
376 
377     LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
378     LValue ArgLVal =
379         CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo);
380     if (FD->hasCapturedVLAType()) {
381       if (FO.UIntPtrCastRequired) {
382         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
383                                                           Args[Cnt]->getName(),
384                                                           ArgLVal),
385                                      FD->getType(), BaseInfo);
386       }
387       auto *ExprArg =
388           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
389       auto VAT = FD->getCapturedVLAType();
390       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
391     } else if (I->capturesVariable()) {
392       auto *Var = I->getCapturedVar();
393       QualType VarTy = Var->getType();
394       Address ArgAddr = ArgLVal.getAddress();
395       if (!VarTy->isReferenceType()) {
396         if (ArgLVal.getType()->isLValueReferenceType()) {
397           ArgAddr = CGF.EmitLoadOfReference(
398               ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
399         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
400           assert(ArgLVal.getType()->isPointerType());
401           ArgAddr = CGF.EmitLoadOfPointer(
402               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
403         }
404       }
405       if (!FO.RegisterCastedArgsOnly) {
406         LocalAddrs.insert(
407             {Args[Cnt],
408              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
409       }
410     } else if (I->capturesVariableByCopy()) {
411       assert(!FD->getType()->isAnyPointerType() &&
412              "Not expecting a captured pointer.");
413       auto *Var = I->getCapturedVar();
414       QualType VarTy = Var->getType();
415       LocalAddrs.insert(
416           {Args[Cnt],
417            {Var,
418             FO.UIntPtrCastRequired
419                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
420                                        ArgLVal, VarTy->isReferenceType())
421                 : ArgLVal.getAddress()}});
422     } else {
423       // If 'this' is captured, load it into CXXThisValue.
424       assert(I->capturesThis());
425       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
426                          .getScalarVal();
427       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
428     }
429     ++Cnt;
430     ++I;
431   }
432 
433   return F;
434 }
435 
436 llvm::Function *
437 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
438   assert(
439       CapturedStmtInfo &&
440       "CapturedStmtInfo should be set when generating the captured function");
441   const CapturedDecl *CD = S.getCapturedDecl();
442   // Build the argument list.
443   bool NeedWrapperFunction =
444       getDebugInfo() &&
445       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
446   FunctionArgList Args;
447   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
448   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
449   SmallString<256> Buffer;
450   llvm::raw_svector_ostream Out(Buffer);
451   Out << CapturedStmtInfo->getHelperName();
452   if (NeedWrapperFunction)
453     Out << "_debug__";
454   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
455                      Out.str());
456   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
457                                                    VLASizes, CXXThisValue, FO);
458   for (const auto &LocalAddrPair : LocalAddrs) {
459     if (LocalAddrPair.second.first) {
460       setAddrOfLocalVar(LocalAddrPair.second.first,
461                         LocalAddrPair.second.second);
462     }
463   }
464   for (const auto &VLASizePair : VLASizes)
465     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
466   PGO.assignRegionCounters(GlobalDecl(CD), F);
467   CapturedStmtInfo->EmitBody(*this, CD->getBody());
468   FinishFunction(CD->getBodyRBrace());
469   if (!NeedWrapperFunction)
470     return F;
471 
472   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
473                             /*RegisterCastedArgsOnly=*/true,
474                             CapturedStmtInfo->getHelperName());
475   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
476   WrapperCGF.disableDebugInfo();
477   Args.clear();
478   LocalAddrs.clear();
479   VLASizes.clear();
480   llvm::Function *WrapperF =
481       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
482                                    WrapperCGF.CXXThisValue, WrapperFO);
483   LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
484   llvm::SmallVector<llvm::Value *, 4> CallArgs;
485   for (const auto *Arg : Args) {
486     llvm::Value *CallArg;
487     auto I = LocalAddrs.find(Arg);
488     if (I != LocalAddrs.end()) {
489       LValue LV =
490           WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo);
491       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
492     } else {
493       auto EI = VLASizes.find(Arg);
494       if (EI != VLASizes.end())
495         CallArg = EI->second.second;
496       else {
497         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
498                                               Arg->getType(), BaseInfo);
499         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
500       }
501     }
502     CallArgs.emplace_back(CallArg);
503   }
504   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, F, CallArgs);
505   WrapperCGF.FinishFunction();
506   return WrapperF;
507 }
508 
509 //===----------------------------------------------------------------------===//
510 //                              OpenMP Directive Emission
511 //===----------------------------------------------------------------------===//
512 void CodeGenFunction::EmitOMPAggregateAssign(
513     Address DestAddr, Address SrcAddr, QualType OriginalType,
514     const llvm::function_ref<void(Address, Address)> &CopyGen) {
515   // Perform element-by-element initialization.
516   QualType ElementTy;
517 
518   // Drill down to the base element type on both arrays.
519   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
520   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
521   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
522 
523   auto SrcBegin = SrcAddr.getPointer();
524   auto DestBegin = DestAddr.getPointer();
525   // Cast from pointer to array type to pointer to single element.
526   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
527   // The basic structure here is a while-do loop.
528   auto BodyBB = createBasicBlock("omp.arraycpy.body");
529   auto DoneBB = createBasicBlock("omp.arraycpy.done");
530   auto IsEmpty =
531       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
532   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
533 
534   // Enter the loop body, making that address the current address.
535   auto EntryBB = Builder.GetInsertBlock();
536   EmitBlock(BodyBB);
537 
538   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
539 
540   llvm::PHINode *SrcElementPHI =
541     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
542   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
543   Address SrcElementCurrent =
544       Address(SrcElementPHI,
545               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
546 
547   llvm::PHINode *DestElementPHI =
548     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
549   DestElementPHI->addIncoming(DestBegin, EntryBB);
550   Address DestElementCurrent =
551     Address(DestElementPHI,
552             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
553 
554   // Emit copy.
555   CopyGen(DestElementCurrent, SrcElementCurrent);
556 
557   // Shift the address forward by one element.
558   auto DestElementNext = Builder.CreateConstGEP1_32(
559       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
560   auto SrcElementNext = Builder.CreateConstGEP1_32(
561       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
562   // Check whether we've reached the end.
563   auto Done =
564       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
565   Builder.CreateCondBr(Done, DoneBB, BodyBB);
566   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
567   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
568 
569   // Done.
570   EmitBlock(DoneBB, /*IsFinished=*/true);
571 }
572 
573 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
574                                   Address SrcAddr, const VarDecl *DestVD,
575                                   const VarDecl *SrcVD, const Expr *Copy) {
576   if (OriginalType->isArrayType()) {
577     auto *BO = dyn_cast<BinaryOperator>(Copy);
578     if (BO && BO->getOpcode() == BO_Assign) {
579       // Perform simple memcpy for simple copying.
580       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
581     } else {
582       // For arrays with complex element types perform element by element
583       // copying.
584       EmitOMPAggregateAssign(
585           DestAddr, SrcAddr, OriginalType,
586           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
587             // Working with the single array element, so have to remap
588             // destination and source variables to corresponding array
589             // elements.
590             CodeGenFunction::OMPPrivateScope Remap(*this);
591             Remap.addPrivate(DestVD, [DestElement]() -> Address {
592               return DestElement;
593             });
594             Remap.addPrivate(
595                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
596             (void)Remap.Privatize();
597             EmitIgnoredExpr(Copy);
598           });
599     }
600   } else {
601     // Remap pseudo source variable to private copy.
602     CodeGenFunction::OMPPrivateScope Remap(*this);
603     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
604     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
605     (void)Remap.Privatize();
606     // Emit copying of the whole variable.
607     EmitIgnoredExpr(Copy);
608   }
609 }
610 
611 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
612                                                 OMPPrivateScope &PrivateScope) {
613   if (!HaveInsertPoint())
614     return false;
615   bool FirstprivateIsLastprivate = false;
616   llvm::DenseSet<const VarDecl *> Lastprivates;
617   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
618     for (const auto *D : C->varlists())
619       Lastprivates.insert(
620           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
621   }
622   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
623   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
624   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
625     auto IRef = C->varlist_begin();
626     auto InitsRef = C->inits().begin();
627     for (auto IInit : C->private_copies()) {
628       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
629       bool ThisFirstprivateIsLastprivate =
630           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
631       auto *CapFD = CapturesInfo.lookup(OrigVD);
632       auto *FD = CapturedStmtInfo->lookup(OrigVD);
633       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
634           !FD->getType()->isReferenceType()) {
635         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
636         ++IRef;
637         ++InitsRef;
638         continue;
639       }
640       FirstprivateIsLastprivate =
641           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
642       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
643         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
644         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
645         bool IsRegistered;
646         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
647                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
648                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
649         Address OriginalAddr = EmitLValue(&DRE).getAddress();
650         QualType Type = VD->getType();
651         if (Type->isArrayType()) {
652           // Emit VarDecl with copy init for arrays.
653           // Get the address of the original variable captured in current
654           // captured region.
655           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
656             auto Emission = EmitAutoVarAlloca(*VD);
657             auto *Init = VD->getInit();
658             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
659               // Perform simple memcpy.
660               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
661                                   Type);
662             } else {
663               EmitOMPAggregateAssign(
664                   Emission.getAllocatedAddress(), OriginalAddr, Type,
665                   [this, VDInit, Init](Address DestElement,
666                                        Address SrcElement) {
667                     // Clean up any temporaries needed by the initialization.
668                     RunCleanupsScope InitScope(*this);
669                     // Emit initialization for single element.
670                     setAddrOfLocalVar(VDInit, SrcElement);
671                     EmitAnyExprToMem(Init, DestElement,
672                                      Init->getType().getQualifiers(),
673                                      /*IsInitializer*/ false);
674                     LocalDeclMap.erase(VDInit);
675                   });
676             }
677             EmitAutoVarCleanups(Emission);
678             return Emission.getAllocatedAddress();
679           });
680         } else {
681           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
682             // Emit private VarDecl with copy init.
683             // Remap temp VDInit variable to the address of the original
684             // variable
685             // (for proper handling of captured global variables).
686             setAddrOfLocalVar(VDInit, OriginalAddr);
687             EmitDecl(*VD);
688             LocalDeclMap.erase(VDInit);
689             return GetAddrOfLocalVar(VD);
690           });
691         }
692         assert(IsRegistered &&
693                "firstprivate var already registered as private");
694         // Silence the warning about unused variable.
695         (void)IsRegistered;
696       }
697       ++IRef;
698       ++InitsRef;
699     }
700   }
701   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
702 }
703 
704 void CodeGenFunction::EmitOMPPrivateClause(
705     const OMPExecutableDirective &D,
706     CodeGenFunction::OMPPrivateScope &PrivateScope) {
707   if (!HaveInsertPoint())
708     return;
709   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
710   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
711     auto IRef = C->varlist_begin();
712     for (auto IInit : C->private_copies()) {
713       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
714       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
715         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
716         bool IsRegistered =
717             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
718               // Emit private VarDecl with copy init.
719               EmitDecl(*VD);
720               return GetAddrOfLocalVar(VD);
721             });
722         assert(IsRegistered && "private var already registered as private");
723         // Silence the warning about unused variable.
724         (void)IsRegistered;
725       }
726       ++IRef;
727     }
728   }
729 }
730 
731 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
732   if (!HaveInsertPoint())
733     return false;
734   // threadprivate_var1 = master_threadprivate_var1;
735   // operator=(threadprivate_var2, master_threadprivate_var2);
736   // ...
737   // __kmpc_barrier(&loc, global_tid);
738   llvm::DenseSet<const VarDecl *> CopiedVars;
739   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
740   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
741     auto IRef = C->varlist_begin();
742     auto ISrcRef = C->source_exprs().begin();
743     auto IDestRef = C->destination_exprs().begin();
744     for (auto *AssignOp : C->assignment_ops()) {
745       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
746       QualType Type = VD->getType();
747       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
748         // Get the address of the master variable. If we are emitting code with
749         // TLS support, the address is passed from the master as field in the
750         // captured declaration.
751         Address MasterAddr = Address::invalid();
752         if (getLangOpts().OpenMPUseTLS &&
753             getContext().getTargetInfo().isTLSSupported()) {
754           assert(CapturedStmtInfo->lookup(VD) &&
755                  "Copyin threadprivates should have been captured!");
756           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
757                           VK_LValue, (*IRef)->getExprLoc());
758           MasterAddr = EmitLValue(&DRE).getAddress();
759           LocalDeclMap.erase(VD);
760         } else {
761           MasterAddr =
762             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
763                                         : CGM.GetAddrOfGlobal(VD),
764                     getContext().getDeclAlign(VD));
765         }
766         // Get the address of the threadprivate variable.
767         Address PrivateAddr = EmitLValue(*IRef).getAddress();
768         if (CopiedVars.size() == 1) {
769           // At first check if current thread is a master thread. If it is, no
770           // need to copy data.
771           CopyBegin = createBasicBlock("copyin.not.master");
772           CopyEnd = createBasicBlock("copyin.not.master.end");
773           Builder.CreateCondBr(
774               Builder.CreateICmpNE(
775                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
776                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
777               CopyBegin, CopyEnd);
778           EmitBlock(CopyBegin);
779         }
780         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
781         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
782         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
783       }
784       ++IRef;
785       ++ISrcRef;
786       ++IDestRef;
787     }
788   }
789   if (CopyEnd) {
790     // Exit out of copying procedure for non-master thread.
791     EmitBlock(CopyEnd, /*IsFinished=*/true);
792     return true;
793   }
794   return false;
795 }
796 
797 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
798     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
799   if (!HaveInsertPoint())
800     return false;
801   bool HasAtLeastOneLastprivate = false;
802   llvm::DenseSet<const VarDecl *> SIMDLCVs;
803   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
804     auto *LoopDirective = cast<OMPLoopDirective>(&D);
805     for (auto *C : LoopDirective->counters()) {
806       SIMDLCVs.insert(
807           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
808     }
809   }
810   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
811   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
812     HasAtLeastOneLastprivate = true;
813     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
814       break;
815     auto IRef = C->varlist_begin();
816     auto IDestRef = C->destination_exprs().begin();
817     for (auto *IInit : C->private_copies()) {
818       // Keep the address of the original variable for future update at the end
819       // of the loop.
820       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
821       // Taskloops do not require additional initialization, it is done in
822       // runtime support library.
823       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
824         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
825         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
826           DeclRefExpr DRE(
827               const_cast<VarDecl *>(OrigVD),
828               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
829                   OrigVD) != nullptr,
830               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
831           return EmitLValue(&DRE).getAddress();
832         });
833         // Check if the variable is also a firstprivate: in this case IInit is
834         // not generated. Initialization of this variable will happen in codegen
835         // for 'firstprivate' clause.
836         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
837           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
838           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
839             // Emit private VarDecl with copy init.
840             EmitDecl(*VD);
841             return GetAddrOfLocalVar(VD);
842           });
843           assert(IsRegistered &&
844                  "lastprivate var already registered as private");
845           (void)IsRegistered;
846         }
847       }
848       ++IRef;
849       ++IDestRef;
850     }
851   }
852   return HasAtLeastOneLastprivate;
853 }
854 
855 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
856     const OMPExecutableDirective &D, bool NoFinals,
857     llvm::Value *IsLastIterCond) {
858   if (!HaveInsertPoint())
859     return;
860   // Emit following code:
861   // if (<IsLastIterCond>) {
862   //   orig_var1 = private_orig_var1;
863   //   ...
864   //   orig_varn = private_orig_varn;
865   // }
866   llvm::BasicBlock *ThenBB = nullptr;
867   llvm::BasicBlock *DoneBB = nullptr;
868   if (IsLastIterCond) {
869     ThenBB = createBasicBlock(".omp.lastprivate.then");
870     DoneBB = createBasicBlock(".omp.lastprivate.done");
871     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
872     EmitBlock(ThenBB);
873   }
874   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
875   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
876   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
877     auto IC = LoopDirective->counters().begin();
878     for (auto F : LoopDirective->finals()) {
879       auto *D =
880           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
881       if (NoFinals)
882         AlreadyEmittedVars.insert(D);
883       else
884         LoopCountersAndUpdates[D] = F;
885       ++IC;
886     }
887   }
888   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
889     auto IRef = C->varlist_begin();
890     auto ISrcRef = C->source_exprs().begin();
891     auto IDestRef = C->destination_exprs().begin();
892     for (auto *AssignOp : C->assignment_ops()) {
893       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
894       QualType Type = PrivateVD->getType();
895       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
896       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
897         // If lastprivate variable is a loop control variable for loop-based
898         // directive, update its value before copyin back to original
899         // variable.
900         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
901           EmitIgnoredExpr(FinalExpr);
902         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
903         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
904         // Get the address of the original variable.
905         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
906         // Get the address of the private variable.
907         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
908         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
909           PrivateAddr =
910               Address(Builder.CreateLoad(PrivateAddr),
911                       getNaturalTypeAlignment(RefTy->getPointeeType()));
912         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
913       }
914       ++IRef;
915       ++ISrcRef;
916       ++IDestRef;
917     }
918     if (auto *PostUpdate = C->getPostUpdateExpr())
919       EmitIgnoredExpr(PostUpdate);
920   }
921   if (IsLastIterCond)
922     EmitBlock(DoneBB, /*IsFinished=*/true);
923 }
924 
925 void CodeGenFunction::EmitOMPReductionClauseInit(
926     const OMPExecutableDirective &D,
927     CodeGenFunction::OMPPrivateScope &PrivateScope) {
928   if (!HaveInsertPoint())
929     return;
930   SmallVector<const Expr *, 4> Shareds;
931   SmallVector<const Expr *, 4> Privates;
932   SmallVector<const Expr *, 4> ReductionOps;
933   SmallVector<const Expr *, 4> LHSs;
934   SmallVector<const Expr *, 4> RHSs;
935   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
936     auto IPriv = C->privates().begin();
937     auto IRed = C->reduction_ops().begin();
938     auto ILHS = C->lhs_exprs().begin();
939     auto IRHS = C->rhs_exprs().begin();
940     for (const auto *Ref : C->varlists()) {
941       Shareds.emplace_back(Ref);
942       Privates.emplace_back(*IPriv);
943       ReductionOps.emplace_back(*IRed);
944       LHSs.emplace_back(*ILHS);
945       RHSs.emplace_back(*IRHS);
946       std::advance(IPriv, 1);
947       std::advance(IRed, 1);
948       std::advance(ILHS, 1);
949       std::advance(IRHS, 1);
950     }
951   }
952   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
953   unsigned Count = 0;
954   auto ILHS = LHSs.begin();
955   auto IRHS = RHSs.begin();
956   auto IPriv = Privates.begin();
957   for (const auto *IRef : Shareds) {
958     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
959     // Emit private VarDecl with reduction init.
960     RedCG.emitSharedLValue(*this, Count);
961     RedCG.emitAggregateType(*this, Count);
962     auto Emission = EmitAutoVarAlloca(*PrivateVD);
963     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
964                              RedCG.getSharedLValue(Count),
965                              [&Emission](CodeGenFunction &CGF) {
966                                CGF.EmitAutoVarInit(Emission);
967                                return true;
968                              });
969     EmitAutoVarCleanups(Emission);
970     Address BaseAddr = RedCG.adjustPrivateAddress(
971         *this, Count, Emission.getAllocatedAddress());
972     bool IsRegistered = PrivateScope.addPrivate(
973         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
974     assert(IsRegistered && "private var already registered as private");
975     // Silence the warning about unused variable.
976     (void)IsRegistered;
977 
978     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
979     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
980     if (isa<OMPArraySectionExpr>(IRef)) {
981       // Store the address of the original variable associated with the LHS
982       // implicit variable.
983       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
984         return RedCG.getSharedLValue(Count).getAddress();
985       });
986       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
987         return GetAddrOfLocalVar(PrivateVD);
988       });
989     } else if (isa<ArraySubscriptExpr>(IRef)) {
990       // Store the address of the original variable associated with the LHS
991       // implicit variable.
992       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
993         return RedCG.getSharedLValue(Count).getAddress();
994       });
995       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
996         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
997                                             ConvertTypeForMem(RHSVD->getType()),
998                                             "rhs.begin");
999       });
1000     } else {
1001       QualType Type = PrivateVD->getType();
1002       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1003       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1004       // Store the address of the original variable associated with the LHS
1005       // implicit variable.
1006       if (IsArray) {
1007         OriginalAddr = Builder.CreateElementBitCast(
1008             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1009       }
1010       PrivateScope.addPrivate(
1011           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1012       PrivateScope.addPrivate(
1013           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1014             return IsArray
1015                        ? Builder.CreateElementBitCast(
1016                              GetAddrOfLocalVar(PrivateVD),
1017                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1018                        : GetAddrOfLocalVar(PrivateVD);
1019           });
1020     }
1021     ++ILHS;
1022     ++IRHS;
1023     ++IPriv;
1024     ++Count;
1025   }
1026 }
1027 
1028 void CodeGenFunction::EmitOMPReductionClauseFinal(
1029     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1030   if (!HaveInsertPoint())
1031     return;
1032   llvm::SmallVector<const Expr *, 8> Privates;
1033   llvm::SmallVector<const Expr *, 8> LHSExprs;
1034   llvm::SmallVector<const Expr *, 8> RHSExprs;
1035   llvm::SmallVector<const Expr *, 8> ReductionOps;
1036   bool HasAtLeastOneReduction = false;
1037   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1038     HasAtLeastOneReduction = true;
1039     Privates.append(C->privates().begin(), C->privates().end());
1040     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1041     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1042     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1043   }
1044   if (HasAtLeastOneReduction) {
1045     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1046                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1047                       D.getDirectiveKind() == OMPD_simd;
1048     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1049     // Emit nowait reduction if nowait clause is present or directive is a
1050     // parallel directive (it always has implicit barrier).
1051     CGM.getOpenMPRuntime().emitReduction(
1052         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1053         {WithNowait, SimpleReduction, ReductionKind});
1054   }
1055 }
1056 
1057 static void emitPostUpdateForReductionClause(
1058     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1059     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1060   if (!CGF.HaveInsertPoint())
1061     return;
1062   llvm::BasicBlock *DoneBB = nullptr;
1063   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1064     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1065       if (!DoneBB) {
1066         if (auto *Cond = CondGen(CGF)) {
1067           // If the first post-update expression is found, emit conditional
1068           // block if it was requested.
1069           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1070           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1071           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1072           CGF.EmitBlock(ThenBB);
1073         }
1074       }
1075       CGF.EmitIgnoredExpr(PostUpdate);
1076     }
1077   }
1078   if (DoneBB)
1079     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1080 }
1081 
1082 namespace {
1083 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1084 /// parallel function. This is necessary for combined constructs such as
1085 /// 'distribute parallel for'
1086 typedef llvm::function_ref<void(CodeGenFunction &,
1087                                 const OMPExecutableDirective &,
1088                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1089     CodeGenBoundParametersTy;
1090 } // anonymous namespace
1091 
1092 static void emitCommonOMPParallelDirective(
1093     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1094     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1095     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1096   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1097   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1098       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1099   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1100     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1101     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1102                                          /*IgnoreResultAssign*/ true);
1103     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1104         CGF, NumThreads, NumThreadsClause->getLocStart());
1105   }
1106   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1107     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1108     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1109         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1110   }
1111   const Expr *IfCond = nullptr;
1112   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1113     if (C->getNameModifier() == OMPD_unknown ||
1114         C->getNameModifier() == OMPD_parallel) {
1115       IfCond = C->getCondition();
1116       break;
1117     }
1118   }
1119 
1120   OMPParallelScope Scope(CGF, S);
1121   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1122   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1123   // lower and upper bounds with the pragma 'for' chunking mechanism.
1124   // The following lambda takes care of appending the lower and upper bound
1125   // parameters when necessary
1126   CodeGenBoundParameters(CGF, S, CapturedVars);
1127   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1128   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1129                                               CapturedVars, IfCond);
1130 }
1131 
1132 static void emitEmptyBoundParameters(CodeGenFunction &,
1133                                      const OMPExecutableDirective &,
1134                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1135 
1136 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1137   // Emit parallel region as a standalone region.
1138   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1139     OMPPrivateScope PrivateScope(CGF);
1140     bool Copyins = CGF.EmitOMPCopyinClause(S);
1141     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1142     if (Copyins) {
1143       // Emit implicit barrier to synchronize threads and avoid data races on
1144       // propagation master's thread values of threadprivate variables to local
1145       // instances of that variables of all other implicit threads.
1146       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1147           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1148           /*ForceSimpleCall=*/true);
1149     }
1150     CGF.EmitOMPPrivateClause(S, PrivateScope);
1151     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1152     (void)PrivateScope.Privatize();
1153     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1154     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1155   };
1156   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1157                                  emitEmptyBoundParameters);
1158   emitPostUpdateForReductionClause(
1159       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1160 }
1161 
1162 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1163                                       JumpDest LoopExit) {
1164   RunCleanupsScope BodyScope(*this);
1165   // Update counters values on current iteration.
1166   for (auto I : D.updates()) {
1167     EmitIgnoredExpr(I);
1168   }
1169   // Update the linear variables.
1170   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1171     for (auto *U : C->updates())
1172       EmitIgnoredExpr(U);
1173   }
1174 
1175   // On a continue in the body, jump to the end.
1176   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1177   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1178   // Emit loop body.
1179   EmitStmt(D.getBody());
1180   // The end (updates/cleanups).
1181   EmitBlock(Continue.getBlock());
1182   BreakContinueStack.pop_back();
1183 }
1184 
1185 void CodeGenFunction::EmitOMPInnerLoop(
1186     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1187     const Expr *IncExpr,
1188     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1189     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1190   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1191 
1192   // Start the loop with a block that tests the condition.
1193   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1194   EmitBlock(CondBlock);
1195   const SourceRange &R = S.getSourceRange();
1196   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1197                  SourceLocToDebugLoc(R.getEnd()));
1198 
1199   // If there are any cleanups between here and the loop-exit scope,
1200   // create a block to stage a loop exit along.
1201   auto ExitBlock = LoopExit.getBlock();
1202   if (RequiresCleanup)
1203     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1204 
1205   auto LoopBody = createBasicBlock("omp.inner.for.body");
1206 
1207   // Emit condition.
1208   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1209   if (ExitBlock != LoopExit.getBlock()) {
1210     EmitBlock(ExitBlock);
1211     EmitBranchThroughCleanup(LoopExit);
1212   }
1213 
1214   EmitBlock(LoopBody);
1215   incrementProfileCounter(&S);
1216 
1217   // Create a block for the increment.
1218   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1219   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1220 
1221   BodyGen(*this);
1222 
1223   // Emit "IV = IV + 1" and a back-edge to the condition block.
1224   EmitBlock(Continue.getBlock());
1225   EmitIgnoredExpr(IncExpr);
1226   PostIncGen(*this);
1227   BreakContinueStack.pop_back();
1228   EmitBranch(CondBlock);
1229   LoopStack.pop();
1230   // Emit the fall-through block.
1231   EmitBlock(LoopExit.getBlock());
1232 }
1233 
1234 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1235   if (!HaveInsertPoint())
1236     return;
1237   // Emit inits for the linear variables.
1238   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1239     for (auto *Init : C->inits()) {
1240       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1241       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1242         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1243         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1244         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1245                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1246                         VD->getInit()->getType(), VK_LValue,
1247                         VD->getInit()->getExprLoc());
1248         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1249                                                 VD->getType()),
1250                        /*capturedByInit=*/false);
1251         EmitAutoVarCleanups(Emission);
1252       } else
1253         EmitVarDecl(*VD);
1254     }
1255     // Emit the linear steps for the linear clauses.
1256     // If a step is not constant, it is pre-calculated before the loop.
1257     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1258       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1259         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1260         // Emit calculation of the linear step.
1261         EmitIgnoredExpr(CS);
1262       }
1263   }
1264 }
1265 
1266 void CodeGenFunction::EmitOMPLinearClauseFinal(
1267     const OMPLoopDirective &D,
1268     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1269   if (!HaveInsertPoint())
1270     return;
1271   llvm::BasicBlock *DoneBB = nullptr;
1272   // Emit the final values of the linear variables.
1273   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1274     auto IC = C->varlist_begin();
1275     for (auto *F : C->finals()) {
1276       if (!DoneBB) {
1277         if (auto *Cond = CondGen(*this)) {
1278           // If the first post-update expression is found, emit conditional
1279           // block if it was requested.
1280           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1281           DoneBB = createBasicBlock(".omp.linear.pu.done");
1282           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1283           EmitBlock(ThenBB);
1284         }
1285       }
1286       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1287       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1288                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1289                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1290       Address OrigAddr = EmitLValue(&DRE).getAddress();
1291       CodeGenFunction::OMPPrivateScope VarScope(*this);
1292       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1293       (void)VarScope.Privatize();
1294       EmitIgnoredExpr(F);
1295       ++IC;
1296     }
1297     if (auto *PostUpdate = C->getPostUpdateExpr())
1298       EmitIgnoredExpr(PostUpdate);
1299   }
1300   if (DoneBB)
1301     EmitBlock(DoneBB, /*IsFinished=*/true);
1302 }
1303 
1304 static void emitAlignedClause(CodeGenFunction &CGF,
1305                               const OMPExecutableDirective &D) {
1306   if (!CGF.HaveInsertPoint())
1307     return;
1308   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1309     unsigned ClauseAlignment = 0;
1310     if (auto AlignmentExpr = Clause->getAlignment()) {
1311       auto AlignmentCI =
1312           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1313       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1314     }
1315     for (auto E : Clause->varlists()) {
1316       unsigned Alignment = ClauseAlignment;
1317       if (Alignment == 0) {
1318         // OpenMP [2.8.1, Description]
1319         // If no optional parameter is specified, implementation-defined default
1320         // alignments for SIMD instructions on the target platforms are assumed.
1321         Alignment =
1322             CGF.getContext()
1323                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1324                     E->getType()->getPointeeType()))
1325                 .getQuantity();
1326       }
1327       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1328              "alignment is not power of 2");
1329       if (Alignment != 0) {
1330         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1331         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1332       }
1333     }
1334   }
1335 }
1336 
1337 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1338     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1339   if (!HaveInsertPoint())
1340     return;
1341   auto I = S.private_counters().begin();
1342   for (auto *E : S.counters()) {
1343     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1344     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1345     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1346       // Emit var without initialization.
1347       if (!LocalDeclMap.count(PrivateVD)) {
1348         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1349         EmitAutoVarCleanups(VarEmission);
1350       }
1351       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1352                       /*RefersToEnclosingVariableOrCapture=*/false,
1353                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1354       return EmitLValue(&DRE).getAddress();
1355     });
1356     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1357         VD->hasGlobalStorage()) {
1358       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1359         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1360                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1361                         E->getType(), VK_LValue, E->getExprLoc());
1362         return EmitLValue(&DRE).getAddress();
1363       });
1364     }
1365     ++I;
1366   }
1367 }
1368 
1369 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1370                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1371                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1372   if (!CGF.HaveInsertPoint())
1373     return;
1374   {
1375     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1376     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1377     (void)PreCondScope.Privatize();
1378     // Get initial values of real counters.
1379     for (auto I : S.inits()) {
1380       CGF.EmitIgnoredExpr(I);
1381     }
1382   }
1383   // Check that loop is executed at least one time.
1384   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1385 }
1386 
1387 void CodeGenFunction::EmitOMPLinearClause(
1388     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1389   if (!HaveInsertPoint())
1390     return;
1391   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1392   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1393     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1394     for (auto *C : LoopDirective->counters()) {
1395       SIMDLCVs.insert(
1396           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1397     }
1398   }
1399   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1400     auto CurPrivate = C->privates().begin();
1401     for (auto *E : C->varlists()) {
1402       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1403       auto *PrivateVD =
1404           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1405       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1406         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1407           // Emit private VarDecl with copy init.
1408           EmitVarDecl(*PrivateVD);
1409           return GetAddrOfLocalVar(PrivateVD);
1410         });
1411         assert(IsRegistered && "linear var already registered as private");
1412         // Silence the warning about unused variable.
1413         (void)IsRegistered;
1414       } else
1415         EmitVarDecl(*PrivateVD);
1416       ++CurPrivate;
1417     }
1418   }
1419 }
1420 
1421 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1422                                      const OMPExecutableDirective &D,
1423                                      bool IsMonotonic) {
1424   if (!CGF.HaveInsertPoint())
1425     return;
1426   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1427     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1428                                  /*ignoreResult=*/true);
1429     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1430     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1431     // In presence of finite 'safelen', it may be unsafe to mark all
1432     // the memory instructions parallel, because loop-carried
1433     // dependences of 'safelen' iterations are possible.
1434     if (!IsMonotonic)
1435       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1436   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1437     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1438                                  /*ignoreResult=*/true);
1439     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1440     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1441     // In presence of finite 'safelen', it may be unsafe to mark all
1442     // the memory instructions parallel, because loop-carried
1443     // dependences of 'safelen' iterations are possible.
1444     CGF.LoopStack.setParallel(false);
1445   }
1446 }
1447 
1448 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1449                                       bool IsMonotonic) {
1450   // Walk clauses and process safelen/lastprivate.
1451   LoopStack.setParallel(!IsMonotonic);
1452   LoopStack.setVectorizeEnable(true);
1453   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1454 }
1455 
1456 void CodeGenFunction::EmitOMPSimdFinal(
1457     const OMPLoopDirective &D,
1458     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1459   if (!HaveInsertPoint())
1460     return;
1461   llvm::BasicBlock *DoneBB = nullptr;
1462   auto IC = D.counters().begin();
1463   auto IPC = D.private_counters().begin();
1464   for (auto F : D.finals()) {
1465     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1466     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1467     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1468     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1469         OrigVD->hasGlobalStorage() || CED) {
1470       if (!DoneBB) {
1471         if (auto *Cond = CondGen(*this)) {
1472           // If the first post-update expression is found, emit conditional
1473           // block if it was requested.
1474           auto *ThenBB = createBasicBlock(".omp.final.then");
1475           DoneBB = createBasicBlock(".omp.final.done");
1476           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1477           EmitBlock(ThenBB);
1478         }
1479       }
1480       Address OrigAddr = Address::invalid();
1481       if (CED)
1482         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1483       else {
1484         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1485                         /*RefersToEnclosingVariableOrCapture=*/false,
1486                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1487         OrigAddr = EmitLValue(&DRE).getAddress();
1488       }
1489       OMPPrivateScope VarScope(*this);
1490       VarScope.addPrivate(OrigVD,
1491                           [OrigAddr]() -> Address { return OrigAddr; });
1492       (void)VarScope.Privatize();
1493       EmitIgnoredExpr(F);
1494     }
1495     ++IC;
1496     ++IPC;
1497   }
1498   if (DoneBB)
1499     EmitBlock(DoneBB, /*IsFinished=*/true);
1500 }
1501 
1502 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1503                                          const OMPLoopDirective &S,
1504                                          CodeGenFunction::JumpDest LoopExit) {
1505   CGF.EmitOMPLoopBody(S, LoopExit);
1506   CGF.EmitStopPoint(&S);
1507 }
1508 
1509 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1510   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1511     OMPLoopScope PreInitScope(CGF, S);
1512     // if (PreCond) {
1513     //   for (IV in 0..LastIteration) BODY;
1514     //   <Final counter/linear vars updates>;
1515     // }
1516     //
1517 
1518     // Emit: if (PreCond) - begin.
1519     // If the condition constant folds and can be elided, avoid emitting the
1520     // whole loop.
1521     bool CondConstant;
1522     llvm::BasicBlock *ContBlock = nullptr;
1523     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1524       if (!CondConstant)
1525         return;
1526     } else {
1527       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1528       ContBlock = CGF.createBasicBlock("simd.if.end");
1529       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1530                   CGF.getProfileCount(&S));
1531       CGF.EmitBlock(ThenBlock);
1532       CGF.incrementProfileCounter(&S);
1533     }
1534 
1535     // Emit the loop iteration variable.
1536     const Expr *IVExpr = S.getIterationVariable();
1537     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1538     CGF.EmitVarDecl(*IVDecl);
1539     CGF.EmitIgnoredExpr(S.getInit());
1540 
1541     // Emit the iterations count variable.
1542     // If it is not a variable, Sema decided to calculate iterations count on
1543     // each iteration (e.g., it is foldable into a constant).
1544     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1545       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1546       // Emit calculation of the iterations count.
1547       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1548     }
1549 
1550     CGF.EmitOMPSimdInit(S);
1551 
1552     emitAlignedClause(CGF, S);
1553     CGF.EmitOMPLinearClauseInit(S);
1554     {
1555       OMPPrivateScope LoopScope(CGF);
1556       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1557       CGF.EmitOMPLinearClause(S, LoopScope);
1558       CGF.EmitOMPPrivateClause(S, LoopScope);
1559       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1560       bool HasLastprivateClause =
1561           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1562       (void)LoopScope.Privatize();
1563       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1564                            S.getInc(),
1565                            [&S](CodeGenFunction &CGF) {
1566                              CGF.EmitOMPLoopBody(S, JumpDest());
1567                              CGF.EmitStopPoint(&S);
1568                            },
1569                            [](CodeGenFunction &) {});
1570       CGF.EmitOMPSimdFinal(
1571           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1572       // Emit final copy of the lastprivate variables at the end of loops.
1573       if (HasLastprivateClause)
1574         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1575       CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1576       emitPostUpdateForReductionClause(
1577           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1578     }
1579     CGF.EmitOMPLinearClauseFinal(
1580         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1581     // Emit: if (PreCond) - end.
1582     if (ContBlock) {
1583       CGF.EmitBranch(ContBlock);
1584       CGF.EmitBlock(ContBlock, true);
1585     }
1586   };
1587   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1588   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1589 }
1590 
1591 void CodeGenFunction::EmitOMPOuterLoop(
1592     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1593     CodeGenFunction::OMPPrivateScope &LoopScope,
1594     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1595     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1596     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1597   auto &RT = CGM.getOpenMPRuntime();
1598 
1599   const Expr *IVExpr = S.getIterationVariable();
1600   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1601   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1602 
1603   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1604 
1605   // Start the loop with a block that tests the condition.
1606   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1607   EmitBlock(CondBlock);
1608   const SourceRange &R = S.getSourceRange();
1609   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1610                  SourceLocToDebugLoc(R.getEnd()));
1611 
1612   llvm::Value *BoolCondVal = nullptr;
1613   if (!DynamicOrOrdered) {
1614     // UB = min(UB, GlobalUB) or
1615     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1616     // 'distribute parallel for')
1617     EmitIgnoredExpr(LoopArgs.EUB);
1618     // IV = LB
1619     EmitIgnoredExpr(LoopArgs.Init);
1620     // IV < UB
1621     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1622   } else {
1623     BoolCondVal =
1624         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1625                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1626   }
1627 
1628   // If there are any cleanups between here and the loop-exit scope,
1629   // create a block to stage a loop exit along.
1630   auto ExitBlock = LoopExit.getBlock();
1631   if (LoopScope.requiresCleanups())
1632     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1633 
1634   auto LoopBody = createBasicBlock("omp.dispatch.body");
1635   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1636   if (ExitBlock != LoopExit.getBlock()) {
1637     EmitBlock(ExitBlock);
1638     EmitBranchThroughCleanup(LoopExit);
1639   }
1640   EmitBlock(LoopBody);
1641 
1642   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1643   // LB for loop condition and emitted it above).
1644   if (DynamicOrOrdered)
1645     EmitIgnoredExpr(LoopArgs.Init);
1646 
1647   // Create a block for the increment.
1648   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1649   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1650 
1651   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1652   // with dynamic/guided scheduling and without ordered clause.
1653   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1654     LoopStack.setParallel(!IsMonotonic);
1655   else
1656     EmitOMPSimdInit(S, IsMonotonic);
1657 
1658   SourceLocation Loc = S.getLocStart();
1659 
1660   // when 'distribute' is not combined with a 'for':
1661   // while (idx <= UB) { BODY; ++idx; }
1662   // when 'distribute' is combined with a 'for'
1663   // (e.g. 'distribute parallel for')
1664   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1665   EmitOMPInnerLoop(
1666       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1667       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1668         CodeGenLoop(CGF, S, LoopExit);
1669       },
1670       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1671         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1672       });
1673 
1674   EmitBlock(Continue.getBlock());
1675   BreakContinueStack.pop_back();
1676   if (!DynamicOrOrdered) {
1677     // Emit "LB = LB + Stride", "UB = UB + Stride".
1678     EmitIgnoredExpr(LoopArgs.NextLB);
1679     EmitIgnoredExpr(LoopArgs.NextUB);
1680   }
1681 
1682   EmitBranch(CondBlock);
1683   LoopStack.pop();
1684   // Emit the fall-through block.
1685   EmitBlock(LoopExit.getBlock());
1686 
1687   // Tell the runtime we are done.
1688   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1689     if (!DynamicOrOrdered)
1690       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
1691   };
1692   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1693 }
1694 
1695 void CodeGenFunction::EmitOMPForOuterLoop(
1696     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1697     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1698     const OMPLoopArguments &LoopArgs,
1699     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1700   auto &RT = CGM.getOpenMPRuntime();
1701 
1702   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1703   const bool DynamicOrOrdered =
1704       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1705 
1706   assert((Ordered ||
1707           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1708                                  LoopArgs.Chunk != nullptr)) &&
1709          "static non-chunked schedule does not need outer loop");
1710 
1711   // Emit outer loop.
1712   //
1713   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1714   // When schedule(dynamic,chunk_size) is specified, the iterations are
1715   // distributed to threads in the team in chunks as the threads request them.
1716   // Each thread executes a chunk of iterations, then requests another chunk,
1717   // until no chunks remain to be distributed. Each chunk contains chunk_size
1718   // iterations, except for the last chunk to be distributed, which may have
1719   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1720   //
1721   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1722   // to threads in the team in chunks as the executing threads request them.
1723   // Each thread executes a chunk of iterations, then requests another chunk,
1724   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1725   // each chunk is proportional to the number of unassigned iterations divided
1726   // by the number of threads in the team, decreasing to 1. For a chunk_size
1727   // with value k (greater than 1), the size of each chunk is determined in the
1728   // same way, with the restriction that the chunks do not contain fewer than k
1729   // iterations (except for the last chunk to be assigned, which may have fewer
1730   // than k iterations).
1731   //
1732   // When schedule(auto) is specified, the decision regarding scheduling is
1733   // delegated to the compiler and/or runtime system. The programmer gives the
1734   // implementation the freedom to choose any possible mapping of iterations to
1735   // threads in the team.
1736   //
1737   // When schedule(runtime) is specified, the decision regarding scheduling is
1738   // deferred until run time, and the schedule and chunk size are taken from the
1739   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1740   // implementation defined
1741   //
1742   // while(__kmpc_dispatch_next(&LB, &UB)) {
1743   //   idx = LB;
1744   //   while (idx <= UB) { BODY; ++idx;
1745   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1746   //   } // inner loop
1747   // }
1748   //
1749   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1750   // When schedule(static, chunk_size) is specified, iterations are divided into
1751   // chunks of size chunk_size, and the chunks are assigned to the threads in
1752   // the team in a round-robin fashion in the order of the thread number.
1753   //
1754   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1755   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1756   //   LB = LB + ST;
1757   //   UB = UB + ST;
1758   // }
1759   //
1760 
1761   const Expr *IVExpr = S.getIterationVariable();
1762   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1763   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1764 
1765   if (DynamicOrOrdered) {
1766     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1767     llvm::Value *LBVal = DispatchBounds.first;
1768     llvm::Value *UBVal = DispatchBounds.second;
1769     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1770                                                              LoopArgs.Chunk};
1771     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1772                            IVSigned, Ordered, DipatchRTInputValues);
1773   } else {
1774     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1775                          Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1776                          LoopArgs.ST, LoopArgs.Chunk);
1777   }
1778 
1779   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1780                                     const unsigned IVSize,
1781                                     const bool IVSigned) {
1782     if (Ordered) {
1783       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1784                                                             IVSigned);
1785     }
1786   };
1787 
1788   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1789                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1790   OuterLoopArgs.IncExpr = S.getInc();
1791   OuterLoopArgs.Init = S.getInit();
1792   OuterLoopArgs.Cond = S.getCond();
1793   OuterLoopArgs.NextLB = S.getNextLowerBound();
1794   OuterLoopArgs.NextUB = S.getNextUpperBound();
1795   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1796                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1797 }
1798 
1799 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1800                              const unsigned IVSize, const bool IVSigned) {}
1801 
1802 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1803     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1804     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1805     const CodeGenLoopTy &CodeGenLoopContent) {
1806 
1807   auto &RT = CGM.getOpenMPRuntime();
1808 
1809   // Emit outer loop.
1810   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1811   // dynamic
1812   //
1813 
1814   const Expr *IVExpr = S.getIterationVariable();
1815   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1816   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1817 
1818   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1819                               IVSigned, /* Ordered = */ false, LoopArgs.IL,
1820                               LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1821                               LoopArgs.Chunk);
1822 
1823   // for combined 'distribute' and 'for' the increment expression of distribute
1824   // is store in DistInc. For 'distribute' alone, it is in Inc.
1825   Expr *IncExpr;
1826   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1827     IncExpr = S.getDistInc();
1828   else
1829     IncExpr = S.getInc();
1830 
1831   // this routine is shared by 'omp distribute parallel for' and
1832   // 'omp distribute': select the right EUB expression depending on the
1833   // directive
1834   OMPLoopArguments OuterLoopArgs;
1835   OuterLoopArgs.LB = LoopArgs.LB;
1836   OuterLoopArgs.UB = LoopArgs.UB;
1837   OuterLoopArgs.ST = LoopArgs.ST;
1838   OuterLoopArgs.IL = LoopArgs.IL;
1839   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1840   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1841                           ? S.getCombinedEnsureUpperBound()
1842                           : S.getEnsureUpperBound();
1843   OuterLoopArgs.IncExpr = IncExpr;
1844   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1845                            ? S.getCombinedInit()
1846                            : S.getInit();
1847   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1848                            ? S.getCombinedCond()
1849                            : S.getCond();
1850   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1851                              ? S.getCombinedNextLowerBound()
1852                              : S.getNextLowerBound();
1853   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1854                              ? S.getCombinedNextUpperBound()
1855                              : S.getNextUpperBound();
1856 
1857   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1858                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1859                    emitEmptyOrdered);
1860 }
1861 
1862 /// Emit a helper variable and return corresponding lvalue.
1863 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1864                                const DeclRefExpr *Helper) {
1865   auto VDecl = cast<VarDecl>(Helper->getDecl());
1866   CGF.EmitVarDecl(*VDecl);
1867   return CGF.EmitLValue(Helper);
1868 }
1869 
1870 static std::pair<LValue, LValue>
1871 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1872                                      const OMPExecutableDirective &S) {
1873   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1874   LValue LB =
1875       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1876   LValue UB =
1877       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1878 
1879   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1880   // parallel for') we need to use the 'distribute'
1881   // chunk lower and upper bounds rather than the whole loop iteration
1882   // space. These are parameters to the outlined function for 'parallel'
1883   // and we copy the bounds of the previous schedule into the
1884   // the current ones.
1885   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1886   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1887   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1888   PrevLBVal = CGF.EmitScalarConversion(
1889       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1890       LS.getIterationVariable()->getType(), SourceLocation());
1891   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1892   PrevUBVal = CGF.EmitScalarConversion(
1893       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1894       LS.getIterationVariable()->getType(), SourceLocation());
1895 
1896   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1897   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1898 
1899   return {LB, UB};
1900 }
1901 
1902 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1903 /// we need to use the LB and UB expressions generated by the worksharing
1904 /// code generation support, whereas in non combined situations we would
1905 /// just emit 0 and the LastIteration expression
1906 /// This function is necessary due to the difference of the LB and UB
1907 /// types for the RT emission routines for 'for_static_init' and
1908 /// 'for_dispatch_init'
1909 static std::pair<llvm::Value *, llvm::Value *>
1910 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1911                                         const OMPExecutableDirective &S,
1912                                         Address LB, Address UB) {
1913   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1914   const Expr *IVExpr = LS.getIterationVariable();
1915   // when implementing a dynamic schedule for a 'for' combined with a
1916   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1917   // is not normalized as each team only executes its own assigned
1918   // distribute chunk
1919   QualType IteratorTy = IVExpr->getType();
1920   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1921                                             SourceLocation());
1922   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1923                                             SourceLocation());
1924   return {LBVal, UBVal};
1925 }
1926 
1927 static void emitDistributeParallelForDistributeInnerBoundParams(
1928     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1929     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1930   const auto &Dir = cast<OMPLoopDirective>(S);
1931   LValue LB =
1932       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1933   auto LBCast = CGF.Builder.CreateIntCast(
1934       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1935   CapturedVars.push_back(LBCast);
1936   LValue UB =
1937       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1938 
1939   auto UBCast = CGF.Builder.CreateIntCast(
1940       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1941   CapturedVars.push_back(UBCast);
1942 }
1943 
1944 static void
1945 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
1946                                  const OMPLoopDirective &S,
1947                                  CodeGenFunction::JumpDest LoopExit) {
1948   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
1949                                          PrePostActionTy &) {
1950     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
1951                                emitDistributeParallelForInnerBounds,
1952                                emitDistributeParallelForDispatchBounds);
1953   };
1954 
1955   emitCommonOMPParallelDirective(
1956       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
1957       emitDistributeParallelForDistributeInnerBoundParams);
1958 }
1959 
1960 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1961     const OMPDistributeParallelForDirective &S) {
1962   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1963     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
1964                               S.getDistInc());
1965   };
1966   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1967   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
1968                                   /*HasCancel=*/false);
1969   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
1970                                               /*HasCancel=*/false);
1971 }
1972 
1973 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
1974     const OMPDistributeParallelForSimdDirective &S) {
1975   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1976   CGM.getOpenMPRuntime().emitInlinedDirective(
1977       *this, OMPD_distribute_parallel_for_simd,
1978       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1979         OMPLoopScope PreInitScope(CGF, S);
1980         CGF.EmitStmt(
1981             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1982       });
1983 }
1984 
1985 void CodeGenFunction::EmitOMPDistributeSimdDirective(
1986     const OMPDistributeSimdDirective &S) {
1987   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1988   CGM.getOpenMPRuntime().emitInlinedDirective(
1989       *this, OMPD_distribute_simd,
1990       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1991         OMPLoopScope PreInitScope(CGF, S);
1992         CGF.EmitStmt(
1993             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1994       });
1995 }
1996 
1997 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
1998     const OMPTargetParallelForSimdDirective &S) {
1999   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2000   CGM.getOpenMPRuntime().emitInlinedDirective(
2001       *this, OMPD_target_parallel_for_simd,
2002       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2003         OMPLoopScope PreInitScope(CGF, S);
2004         CGF.EmitStmt(
2005             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2006       });
2007 }
2008 
2009 void CodeGenFunction::EmitOMPTargetSimdDirective(
2010     const OMPTargetSimdDirective &S) {
2011   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2012   CGM.getOpenMPRuntime().emitInlinedDirective(
2013       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2014         OMPLoopScope PreInitScope(CGF, S);
2015         CGF.EmitStmt(
2016             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2017       });
2018 }
2019 
2020 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
2021     const OMPTeamsDistributeDirective &S) {
2022   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2023   CGM.getOpenMPRuntime().emitInlinedDirective(
2024       *this, OMPD_teams_distribute,
2025       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2026         OMPLoopScope PreInitScope(CGF, S);
2027         CGF.EmitStmt(
2028             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2029       });
2030 }
2031 
2032 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2033     const OMPTeamsDistributeSimdDirective &S) {
2034   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2035   CGM.getOpenMPRuntime().emitInlinedDirective(
2036       *this, OMPD_teams_distribute_simd,
2037       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2038         OMPLoopScope PreInitScope(CGF, S);
2039         CGF.EmitStmt(
2040             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2041       });
2042 }
2043 
2044 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2045     const OMPTeamsDistributeParallelForSimdDirective &S) {
2046   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2047   CGM.getOpenMPRuntime().emitInlinedDirective(
2048       *this, OMPD_teams_distribute_parallel_for_simd,
2049       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2050         OMPLoopScope PreInitScope(CGF, S);
2051         CGF.EmitStmt(
2052             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2053       });
2054 }
2055 
2056 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2057     const OMPTeamsDistributeParallelForDirective &S) {
2058   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2059   CGM.getOpenMPRuntime().emitInlinedDirective(
2060       *this, OMPD_teams_distribute_parallel_for,
2061       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2062         OMPLoopScope PreInitScope(CGF, S);
2063         CGF.EmitStmt(
2064             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2065       });
2066 }
2067 
2068 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2069     const OMPTargetTeamsDistributeDirective &S) {
2070   CGM.getOpenMPRuntime().emitInlinedDirective(
2071       *this, OMPD_target_teams_distribute,
2072       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2073         CGF.EmitStmt(
2074             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2075       });
2076 }
2077 
2078 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2079     const OMPTargetTeamsDistributeParallelForDirective &S) {
2080   CGM.getOpenMPRuntime().emitInlinedDirective(
2081       *this, OMPD_target_teams_distribute_parallel_for,
2082       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2083         CGF.EmitStmt(
2084             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2085       });
2086 }
2087 
2088 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2089     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2090   CGM.getOpenMPRuntime().emitInlinedDirective(
2091       *this, OMPD_target_teams_distribute_parallel_for_simd,
2092       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2093         CGF.EmitStmt(
2094             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2095       });
2096 }
2097 
2098 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2099     const OMPTargetTeamsDistributeSimdDirective &S) {
2100   CGM.getOpenMPRuntime().emitInlinedDirective(
2101       *this, OMPD_target_teams_distribute_simd,
2102       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2103         CGF.EmitStmt(
2104             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2105       });
2106 }
2107 
2108 namespace {
2109   struct ScheduleKindModifiersTy {
2110     OpenMPScheduleClauseKind Kind;
2111     OpenMPScheduleClauseModifier M1;
2112     OpenMPScheduleClauseModifier M2;
2113     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2114                             OpenMPScheduleClauseModifier M1,
2115                             OpenMPScheduleClauseModifier M2)
2116         : Kind(Kind), M1(M1), M2(M2) {}
2117   };
2118 } // namespace
2119 
2120 bool CodeGenFunction::EmitOMPWorksharingLoop(
2121     const OMPLoopDirective &S, Expr *EUB,
2122     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2123     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2124   // Emit the loop iteration variable.
2125   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2126   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2127   EmitVarDecl(*IVDecl);
2128 
2129   // Emit the iterations count variable.
2130   // If it is not a variable, Sema decided to calculate iterations count on each
2131   // iteration (e.g., it is foldable into a constant).
2132   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2133     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2134     // Emit calculation of the iterations count.
2135     EmitIgnoredExpr(S.getCalcLastIteration());
2136   }
2137 
2138   auto &RT = CGM.getOpenMPRuntime();
2139 
2140   bool HasLastprivateClause;
2141   // Check pre-condition.
2142   {
2143     OMPLoopScope PreInitScope(*this, S);
2144     // Skip the entire loop if we don't meet the precondition.
2145     // If the condition constant folds and can be elided, avoid emitting the
2146     // whole loop.
2147     bool CondConstant;
2148     llvm::BasicBlock *ContBlock = nullptr;
2149     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2150       if (!CondConstant)
2151         return false;
2152     } else {
2153       auto *ThenBlock = createBasicBlock("omp.precond.then");
2154       ContBlock = createBasicBlock("omp.precond.end");
2155       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2156                   getProfileCount(&S));
2157       EmitBlock(ThenBlock);
2158       incrementProfileCounter(&S);
2159     }
2160 
2161     bool Ordered = false;
2162     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2163       if (OrderedClause->getNumForLoops())
2164         RT.emitDoacrossInit(*this, S);
2165       else
2166         Ordered = true;
2167     }
2168 
2169     llvm::DenseSet<const Expr *> EmittedFinals;
2170     emitAlignedClause(*this, S);
2171     EmitOMPLinearClauseInit(S);
2172     // Emit helper vars inits.
2173 
2174     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2175     LValue LB = Bounds.first;
2176     LValue UB = Bounds.second;
2177     LValue ST =
2178         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2179     LValue IL =
2180         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2181 
2182     // Emit 'then' code.
2183     {
2184       OMPPrivateScope LoopScope(*this);
2185       if (EmitOMPFirstprivateClause(S, LoopScope)) {
2186         // Emit implicit barrier to synchronize threads and avoid data races on
2187         // initialization of firstprivate variables and post-update of
2188         // lastprivate variables.
2189         CGM.getOpenMPRuntime().emitBarrierCall(
2190             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2191             /*ForceSimpleCall=*/true);
2192       }
2193       EmitOMPPrivateClause(S, LoopScope);
2194       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2195       EmitOMPReductionClauseInit(S, LoopScope);
2196       EmitOMPPrivateLoopCounters(S, LoopScope);
2197       EmitOMPLinearClause(S, LoopScope);
2198       (void)LoopScope.Privatize();
2199 
2200       // Detect the loop schedule kind and chunk.
2201       llvm::Value *Chunk = nullptr;
2202       OpenMPScheduleTy ScheduleKind;
2203       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2204         ScheduleKind.Schedule = C->getScheduleKind();
2205         ScheduleKind.M1 = C->getFirstScheduleModifier();
2206         ScheduleKind.M2 = C->getSecondScheduleModifier();
2207         if (const auto *Ch = C->getChunkSize()) {
2208           Chunk = EmitScalarExpr(Ch);
2209           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2210                                        S.getIterationVariable()->getType(),
2211                                        S.getLocStart());
2212         }
2213       }
2214       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2215       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2216       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2217       // If the static schedule kind is specified or if the ordered clause is
2218       // specified, and if no monotonic modifier is specified, the effect will
2219       // be as if the monotonic modifier was specified.
2220       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2221                                 /* Chunked */ Chunk != nullptr) &&
2222           !Ordered) {
2223         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2224           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2225         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2226         // When no chunk_size is specified, the iteration space is divided into
2227         // chunks that are approximately equal in size, and at most one chunk is
2228         // distributed to each thread. Note that the size of the chunks is
2229         // unspecified in this case.
2230         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
2231                              IVSize, IVSigned, Ordered,
2232                              IL.getAddress(), LB.getAddress(),
2233                              UB.getAddress(), ST.getAddress());
2234         auto LoopExit =
2235             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2236         // UB = min(UB, GlobalUB);
2237         EmitIgnoredExpr(S.getEnsureUpperBound());
2238         // IV = LB;
2239         EmitIgnoredExpr(S.getInit());
2240         // while (idx <= UB) { BODY; ++idx; }
2241         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2242                          S.getInc(),
2243                          [&S, LoopExit](CodeGenFunction &CGF) {
2244                            CGF.EmitOMPLoopBody(S, LoopExit);
2245                            CGF.EmitStopPoint(&S);
2246                          },
2247                          [](CodeGenFunction &) {});
2248         EmitBlock(LoopExit.getBlock());
2249         // Tell the runtime we are done.
2250         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2251           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2252         };
2253         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2254       } else {
2255         const bool IsMonotonic =
2256             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2257             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2258             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2259             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2260         // Emit the outer loop, which requests its work chunk [LB..UB] from
2261         // runtime and runs the inner loop to process it.
2262         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2263                                              ST.getAddress(), IL.getAddress(),
2264                                              Chunk, EUB);
2265         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2266                             LoopArguments, CGDispatchBounds);
2267       }
2268       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2269         EmitOMPSimdFinal(S,
2270                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2271                            return CGF.Builder.CreateIsNotNull(
2272                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2273                          });
2274       }
2275       EmitOMPReductionClauseFinal(
2276           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2277                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2278                  : /*Parallel only*/ OMPD_parallel);
2279       // Emit post-update of the reduction variables if IsLastIter != 0.
2280       emitPostUpdateForReductionClause(
2281           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2282             return CGF.Builder.CreateIsNotNull(
2283                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2284           });
2285       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2286       if (HasLastprivateClause)
2287         EmitOMPLastprivateClauseFinal(
2288             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2289             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2290     }
2291     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2292       return CGF.Builder.CreateIsNotNull(
2293           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2294     });
2295     // We're now done with the loop, so jump to the continuation block.
2296     if (ContBlock) {
2297       EmitBranch(ContBlock);
2298       EmitBlock(ContBlock, true);
2299     }
2300   }
2301   return HasLastprivateClause;
2302 }
2303 
2304 /// The following two functions generate expressions for the loop lower
2305 /// and upper bounds in case of static and dynamic (dispatch) schedule
2306 /// of the associated 'for' or 'distribute' loop.
2307 static std::pair<LValue, LValue>
2308 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2309   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2310   LValue LB =
2311       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2312   LValue UB =
2313       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2314   return {LB, UB};
2315 }
2316 
2317 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2318 /// consider the lower and upper bound expressions generated by the
2319 /// worksharing loop support, but we use 0 and the iteration space size as
2320 /// constants
2321 static std::pair<llvm::Value *, llvm::Value *>
2322 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2323                           Address LB, Address UB) {
2324   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2325   const Expr *IVExpr = LS.getIterationVariable();
2326   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2327   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2328   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2329   return {LBVal, UBVal};
2330 }
2331 
2332 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2333   bool HasLastprivates = false;
2334   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2335                                           PrePostActionTy &) {
2336     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2337     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2338                                                  emitForLoopBounds,
2339                                                  emitDispatchForLoopBounds);
2340   };
2341   {
2342     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2343     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2344                                                 S.hasCancel());
2345   }
2346 
2347   // Emit an implicit barrier at the end.
2348   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2349     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2350   }
2351 }
2352 
2353 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2354   bool HasLastprivates = false;
2355   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2356                                           PrePostActionTy &) {
2357     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2358                                                  emitForLoopBounds,
2359                                                  emitDispatchForLoopBounds);
2360   };
2361   {
2362     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2363     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2364   }
2365 
2366   // Emit an implicit barrier at the end.
2367   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2368     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2369   }
2370 }
2371 
2372 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2373                                 const Twine &Name,
2374                                 llvm::Value *Init = nullptr) {
2375   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2376   if (Init)
2377     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2378   return LVal;
2379 }
2380 
2381 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2382   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2383   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2384   bool HasLastprivates = false;
2385   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2386                                                     PrePostActionTy &) {
2387     auto &C = CGF.CGM.getContext();
2388     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2389     // Emit helper vars inits.
2390     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2391                                   CGF.Builder.getInt32(0));
2392     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2393                                       : CGF.Builder.getInt32(0);
2394     LValue UB =
2395         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2396     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2397                                   CGF.Builder.getInt32(1));
2398     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2399                                   CGF.Builder.getInt32(0));
2400     // Loop counter.
2401     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2402     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2403     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2404     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2405     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2406     // Generate condition for loop.
2407     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2408                         OK_Ordinary, S.getLocStart(), FPOptions());
2409     // Increment for loop counter.
2410     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2411                       S.getLocStart());
2412     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2413       // Iterate through all sections and emit a switch construct:
2414       // switch (IV) {
2415       //   case 0:
2416       //     <SectionStmt[0]>;
2417       //     break;
2418       // ...
2419       //   case <NumSection> - 1:
2420       //     <SectionStmt[<NumSection> - 1]>;
2421       //     break;
2422       // }
2423       // .omp.sections.exit:
2424       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2425       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2426           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2427           CS == nullptr ? 1 : CS->size());
2428       if (CS) {
2429         unsigned CaseNumber = 0;
2430         for (auto *SubStmt : CS->children()) {
2431           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2432           CGF.EmitBlock(CaseBB);
2433           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2434           CGF.EmitStmt(SubStmt);
2435           CGF.EmitBranch(ExitBB);
2436           ++CaseNumber;
2437         }
2438       } else {
2439         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2440         CGF.EmitBlock(CaseBB);
2441         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2442         CGF.EmitStmt(Stmt);
2443         CGF.EmitBranch(ExitBB);
2444       }
2445       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2446     };
2447 
2448     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2449     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2450       // Emit implicit barrier to synchronize threads and avoid data races on
2451       // initialization of firstprivate variables and post-update of lastprivate
2452       // variables.
2453       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2454           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2455           /*ForceSimpleCall=*/true);
2456     }
2457     CGF.EmitOMPPrivateClause(S, LoopScope);
2458     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2459     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2460     (void)LoopScope.Privatize();
2461 
2462     // Emit static non-chunked loop.
2463     OpenMPScheduleTy ScheduleKind;
2464     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2465     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2466         CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
2467         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
2468         UB.getAddress(), ST.getAddress());
2469     // UB = min(UB, GlobalUB);
2470     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2471     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2472         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2473     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2474     // IV = LB;
2475     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2476     // while (idx <= UB) { BODY; ++idx; }
2477     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2478                          [](CodeGenFunction &) {});
2479     // Tell the runtime we are done.
2480     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2481       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2482     };
2483     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2484     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2485     // Emit post-update of the reduction variables if IsLastIter != 0.
2486     emitPostUpdateForReductionClause(
2487         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2488           return CGF.Builder.CreateIsNotNull(
2489               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2490         });
2491 
2492     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2493     if (HasLastprivates)
2494       CGF.EmitOMPLastprivateClauseFinal(
2495           S, /*NoFinals=*/false,
2496           CGF.Builder.CreateIsNotNull(
2497               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2498   };
2499 
2500   bool HasCancel = false;
2501   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2502     HasCancel = OSD->hasCancel();
2503   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2504     HasCancel = OPSD->hasCancel();
2505   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2506   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2507                                               HasCancel);
2508   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2509   // clause. Otherwise the barrier will be generated by the codegen for the
2510   // directive.
2511   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2512     // Emit implicit barrier to synchronize threads and avoid data races on
2513     // initialization of firstprivate variables.
2514     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2515                                            OMPD_unknown);
2516   }
2517 }
2518 
2519 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2520   {
2521     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2522     EmitSections(S);
2523   }
2524   // Emit an implicit barrier at the end.
2525   if (!S.getSingleClause<OMPNowaitClause>()) {
2526     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2527                                            OMPD_sections);
2528   }
2529 }
2530 
2531 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2532   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2533     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2534   };
2535   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2536   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2537                                               S.hasCancel());
2538 }
2539 
2540 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2541   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2542   llvm::SmallVector<const Expr *, 8> DestExprs;
2543   llvm::SmallVector<const Expr *, 8> SrcExprs;
2544   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2545   // Check if there are any 'copyprivate' clauses associated with this
2546   // 'single' construct.
2547   // Build a list of copyprivate variables along with helper expressions
2548   // (<source>, <destination>, <destination>=<source> expressions)
2549   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2550     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2551     DestExprs.append(C->destination_exprs().begin(),
2552                      C->destination_exprs().end());
2553     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2554     AssignmentOps.append(C->assignment_ops().begin(),
2555                          C->assignment_ops().end());
2556   }
2557   // Emit code for 'single' region along with 'copyprivate' clauses
2558   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2559     Action.Enter(CGF);
2560     OMPPrivateScope SingleScope(CGF);
2561     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2562     CGF.EmitOMPPrivateClause(S, SingleScope);
2563     (void)SingleScope.Privatize();
2564     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2565   };
2566   {
2567     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2568     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2569                                             CopyprivateVars, DestExprs,
2570                                             SrcExprs, AssignmentOps);
2571   }
2572   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2573   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2574   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2575     CGM.getOpenMPRuntime().emitBarrierCall(
2576         *this, S.getLocStart(),
2577         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2578   }
2579 }
2580 
2581 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2582   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2583     Action.Enter(CGF);
2584     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2585   };
2586   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2587   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2588 }
2589 
2590 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2591   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2592     Action.Enter(CGF);
2593     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2594   };
2595   Expr *Hint = nullptr;
2596   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2597     Hint = HintClause->getHint();
2598   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2599   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2600                                             S.getDirectiveName().getAsString(),
2601                                             CodeGen, S.getLocStart(), Hint);
2602 }
2603 
2604 void CodeGenFunction::EmitOMPParallelForDirective(
2605     const OMPParallelForDirective &S) {
2606   // Emit directive as a combined directive that consists of two implicit
2607   // directives: 'parallel' with 'for' directive.
2608   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2609     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2610     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2611                                emitDispatchForLoopBounds);
2612   };
2613   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2614                                  emitEmptyBoundParameters);
2615 }
2616 
2617 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2618     const OMPParallelForSimdDirective &S) {
2619   // Emit directive as a combined directive that consists of two implicit
2620   // directives: 'parallel' with 'for' directive.
2621   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2622     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2623                                emitDispatchForLoopBounds);
2624   };
2625   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2626                                  emitEmptyBoundParameters);
2627 }
2628 
2629 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2630     const OMPParallelSectionsDirective &S) {
2631   // Emit directive as a combined directive that consists of two implicit
2632   // directives: 'parallel' with 'sections' directive.
2633   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2634     CGF.EmitSections(S);
2635   };
2636   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2637                                  emitEmptyBoundParameters);
2638 }
2639 
2640 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2641                                                 const RegionCodeGenTy &BodyGen,
2642                                                 const TaskGenTy &TaskGen,
2643                                                 OMPTaskDataTy &Data) {
2644   // Emit outlined function for task construct.
2645   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2646   auto *I = CS->getCapturedDecl()->param_begin();
2647   auto *PartId = std::next(I);
2648   auto *TaskT = std::next(I, 4);
2649   // Check if the task is final
2650   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2651     // If the condition constant folds and can be elided, try to avoid emitting
2652     // the condition and the dead arm of the if/else.
2653     auto *Cond = Clause->getCondition();
2654     bool CondConstant;
2655     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2656       Data.Final.setInt(CondConstant);
2657     else
2658       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2659   } else {
2660     // By default the task is not final.
2661     Data.Final.setInt(/*IntVal=*/false);
2662   }
2663   // Check if the task has 'priority' clause.
2664   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2665     auto *Prio = Clause->getPriority();
2666     Data.Priority.setInt(/*IntVal=*/true);
2667     Data.Priority.setPointer(EmitScalarConversion(
2668         EmitScalarExpr(Prio), Prio->getType(),
2669         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2670         Prio->getExprLoc()));
2671   }
2672   // The first function argument for tasks is a thread id, the second one is a
2673   // part id (0 for tied tasks, >=0 for untied task).
2674   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2675   // Get list of private variables.
2676   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2677     auto IRef = C->varlist_begin();
2678     for (auto *IInit : C->private_copies()) {
2679       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2680       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2681         Data.PrivateVars.push_back(*IRef);
2682         Data.PrivateCopies.push_back(IInit);
2683       }
2684       ++IRef;
2685     }
2686   }
2687   EmittedAsPrivate.clear();
2688   // Get list of firstprivate variables.
2689   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2690     auto IRef = C->varlist_begin();
2691     auto IElemInitRef = C->inits().begin();
2692     for (auto *IInit : C->private_copies()) {
2693       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2694       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2695         Data.FirstprivateVars.push_back(*IRef);
2696         Data.FirstprivateCopies.push_back(IInit);
2697         Data.FirstprivateInits.push_back(*IElemInitRef);
2698       }
2699       ++IRef;
2700       ++IElemInitRef;
2701     }
2702   }
2703   // Get list of lastprivate variables (for taskloops).
2704   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2705   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2706     auto IRef = C->varlist_begin();
2707     auto ID = C->destination_exprs().begin();
2708     for (auto *IInit : C->private_copies()) {
2709       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2710       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2711         Data.LastprivateVars.push_back(*IRef);
2712         Data.LastprivateCopies.push_back(IInit);
2713       }
2714       LastprivateDstsOrigs.insert(
2715           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2716            cast<DeclRefExpr>(*IRef)});
2717       ++IRef;
2718       ++ID;
2719     }
2720   }
2721   SmallVector<const Expr *, 4> LHSs;
2722   SmallVector<const Expr *, 4> RHSs;
2723   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2724     auto IPriv = C->privates().begin();
2725     auto IRed = C->reduction_ops().begin();
2726     auto ILHS = C->lhs_exprs().begin();
2727     auto IRHS = C->rhs_exprs().begin();
2728     for (const auto *Ref : C->varlists()) {
2729       Data.ReductionVars.emplace_back(Ref);
2730       Data.ReductionCopies.emplace_back(*IPriv);
2731       Data.ReductionOps.emplace_back(*IRed);
2732       LHSs.emplace_back(*ILHS);
2733       RHSs.emplace_back(*IRHS);
2734       std::advance(IPriv, 1);
2735       std::advance(IRed, 1);
2736       std::advance(ILHS, 1);
2737       std::advance(IRHS, 1);
2738     }
2739   }
2740   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2741       *this, S.getLocStart(), LHSs, RHSs, Data);
2742   // Build list of dependences.
2743   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2744     for (auto *IRef : C->varlists())
2745       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2746   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2747       CodeGenFunction &CGF, PrePostActionTy &Action) {
2748     // Set proper addresses for generated private copies.
2749     OMPPrivateScope Scope(CGF);
2750     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2751         !Data.LastprivateVars.empty()) {
2752       auto *CopyFn = CGF.Builder.CreateLoad(
2753           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2754       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2755           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2756       // Map privates.
2757       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2758       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2759       CallArgs.push_back(PrivatesPtr);
2760       for (auto *E : Data.PrivateVars) {
2761         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2762         Address PrivatePtr = CGF.CreateMemTemp(
2763             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2764         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2765         CallArgs.push_back(PrivatePtr.getPointer());
2766       }
2767       for (auto *E : Data.FirstprivateVars) {
2768         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2769         Address PrivatePtr =
2770             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2771                               ".firstpriv.ptr.addr");
2772         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2773         CallArgs.push_back(PrivatePtr.getPointer());
2774       }
2775       for (auto *E : Data.LastprivateVars) {
2776         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2777         Address PrivatePtr =
2778             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2779                               ".lastpriv.ptr.addr");
2780         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2781         CallArgs.push_back(PrivatePtr.getPointer());
2782       }
2783       CGF.EmitRuntimeCall(CopyFn, CallArgs);
2784       for (auto &&Pair : LastprivateDstsOrigs) {
2785         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2786         DeclRefExpr DRE(
2787             const_cast<VarDecl *>(OrigVD),
2788             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2789                 OrigVD) != nullptr,
2790             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2791         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2792           return CGF.EmitLValue(&DRE).getAddress();
2793         });
2794       }
2795       for (auto &&Pair : PrivatePtrs) {
2796         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2797                             CGF.getContext().getDeclAlign(Pair.first));
2798         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2799       }
2800     }
2801     if (Data.Reductions) {
2802       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2803       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2804                              Data.ReductionOps);
2805       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2806           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2807       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2808         RedCG.emitSharedLValue(CGF, Cnt);
2809         RedCG.emitAggregateType(CGF, Cnt);
2810         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2811             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2812         Replacement =
2813             Address(CGF.EmitScalarConversion(
2814                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2815                         CGF.getContext().getPointerType(
2816                             Data.ReductionCopies[Cnt]->getType()),
2817                         SourceLocation()),
2818                     Replacement.getAlignment());
2819         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2820         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2821                          [Replacement]() { return Replacement; });
2822         // FIXME: This must removed once the runtime library is fixed.
2823         // Emit required threadprivate variables for
2824         // initilizer/combiner/finalizer.
2825         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2826                                                            RedCG, Cnt);
2827       }
2828     }
2829     // Privatize all private variables except for in_reduction items.
2830     (void)Scope.Privatize();
2831     SmallVector<const Expr *, 4> InRedVars;
2832     SmallVector<const Expr *, 4> InRedPrivs;
2833     SmallVector<const Expr *, 4> InRedOps;
2834     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2835     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2836       auto IPriv = C->privates().begin();
2837       auto IRed = C->reduction_ops().begin();
2838       auto ITD = C->taskgroup_descriptors().begin();
2839       for (const auto *Ref : C->varlists()) {
2840         InRedVars.emplace_back(Ref);
2841         InRedPrivs.emplace_back(*IPriv);
2842         InRedOps.emplace_back(*IRed);
2843         TaskgroupDescriptors.emplace_back(*ITD);
2844         std::advance(IPriv, 1);
2845         std::advance(IRed, 1);
2846         std::advance(ITD, 1);
2847       }
2848     }
2849     // Privatize in_reduction items here, because taskgroup descriptors must be
2850     // privatized earlier.
2851     OMPPrivateScope InRedScope(CGF);
2852     if (!InRedVars.empty()) {
2853       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2854       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2855         RedCG.emitSharedLValue(CGF, Cnt);
2856         RedCG.emitAggregateType(CGF, Cnt);
2857         // The taskgroup descriptor variable is always implicit firstprivate and
2858         // privatized already during procoessing of the firstprivates.
2859         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2860             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2861         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2862             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2863         Replacement = Address(
2864             CGF.EmitScalarConversion(
2865                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2866                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2867                 SourceLocation()),
2868             Replacement.getAlignment());
2869         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2870         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2871                               [Replacement]() { return Replacement; });
2872         // FIXME: This must removed once the runtime library is fixed.
2873         // Emit required threadprivate variables for
2874         // initilizer/combiner/finalizer.
2875         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2876                                                            RedCG, Cnt);
2877       }
2878     }
2879     (void)InRedScope.Privatize();
2880 
2881     Action.Enter(CGF);
2882     BodyGen(CGF);
2883   };
2884   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2885       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2886       Data.NumberOfParts);
2887   OMPLexicalScope Scope(*this, S);
2888   TaskGen(*this, OutlinedFn, Data);
2889 }
2890 
2891 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2892   // Emit outlined function for task construct.
2893   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2894   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2895   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2896   const Expr *IfCond = nullptr;
2897   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2898     if (C->getNameModifier() == OMPD_unknown ||
2899         C->getNameModifier() == OMPD_task) {
2900       IfCond = C->getCondition();
2901       break;
2902     }
2903   }
2904 
2905   OMPTaskDataTy Data;
2906   // Check if we should emit tied or untied task.
2907   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2908   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2909     CGF.EmitStmt(CS->getCapturedStmt());
2910   };
2911   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2912                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2913                             const OMPTaskDataTy &Data) {
2914     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2915                                             SharedsTy, CapturedStruct, IfCond,
2916                                             Data);
2917   };
2918   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2919 }
2920 
2921 void CodeGenFunction::EmitOMPTaskyieldDirective(
2922     const OMPTaskyieldDirective &S) {
2923   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2924 }
2925 
2926 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2927   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2928 }
2929 
2930 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2931   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2932 }
2933 
2934 void CodeGenFunction::EmitOMPTaskgroupDirective(
2935     const OMPTaskgroupDirective &S) {
2936   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2937     Action.Enter(CGF);
2938     if (const Expr *E = S.getReductionRef()) {
2939       SmallVector<const Expr *, 4> LHSs;
2940       SmallVector<const Expr *, 4> RHSs;
2941       OMPTaskDataTy Data;
2942       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2943         auto IPriv = C->privates().begin();
2944         auto IRed = C->reduction_ops().begin();
2945         auto ILHS = C->lhs_exprs().begin();
2946         auto IRHS = C->rhs_exprs().begin();
2947         for (const auto *Ref : C->varlists()) {
2948           Data.ReductionVars.emplace_back(Ref);
2949           Data.ReductionCopies.emplace_back(*IPriv);
2950           Data.ReductionOps.emplace_back(*IRed);
2951           LHSs.emplace_back(*ILHS);
2952           RHSs.emplace_back(*IRHS);
2953           std::advance(IPriv, 1);
2954           std::advance(IRed, 1);
2955           std::advance(ILHS, 1);
2956           std::advance(IRHS, 1);
2957         }
2958       }
2959       llvm::Value *ReductionDesc =
2960           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
2961                                                            LHSs, RHSs, Data);
2962       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2963       CGF.EmitVarDecl(*VD);
2964       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
2965                             /*Volatile=*/false, E->getType());
2966     }
2967     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2968   };
2969   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2970   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2971 }
2972 
2973 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2974   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2975     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2976       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2977                                 FlushClause->varlist_end());
2978     }
2979     return llvm::None;
2980   }(), S.getLocStart());
2981 }
2982 
2983 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
2984                                             const CodeGenLoopTy &CodeGenLoop,
2985                                             Expr *IncExpr) {
2986   // Emit the loop iteration variable.
2987   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2988   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2989   EmitVarDecl(*IVDecl);
2990 
2991   // Emit the iterations count variable.
2992   // If it is not a variable, Sema decided to calculate iterations count on each
2993   // iteration (e.g., it is foldable into a constant).
2994   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2995     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2996     // Emit calculation of the iterations count.
2997     EmitIgnoredExpr(S.getCalcLastIteration());
2998   }
2999 
3000   auto &RT = CGM.getOpenMPRuntime();
3001 
3002   bool HasLastprivateClause = false;
3003   // Check pre-condition.
3004   {
3005     OMPLoopScope PreInitScope(*this, S);
3006     // Skip the entire loop if we don't meet the precondition.
3007     // If the condition constant folds and can be elided, avoid emitting the
3008     // whole loop.
3009     bool CondConstant;
3010     llvm::BasicBlock *ContBlock = nullptr;
3011     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3012       if (!CondConstant)
3013         return;
3014     } else {
3015       auto *ThenBlock = createBasicBlock("omp.precond.then");
3016       ContBlock = createBasicBlock("omp.precond.end");
3017       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3018                   getProfileCount(&S));
3019       EmitBlock(ThenBlock);
3020       incrementProfileCounter(&S);
3021     }
3022 
3023     // Emit 'then' code.
3024     {
3025       // Emit helper vars inits.
3026 
3027       LValue LB = EmitOMPHelperVar(
3028           *this, cast<DeclRefExpr>(
3029                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3030                           ? S.getCombinedLowerBoundVariable()
3031                           : S.getLowerBoundVariable())));
3032       LValue UB = EmitOMPHelperVar(
3033           *this, cast<DeclRefExpr>(
3034                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3035                           ? S.getCombinedUpperBoundVariable()
3036                           : S.getUpperBoundVariable())));
3037       LValue ST =
3038           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3039       LValue IL =
3040           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3041 
3042       OMPPrivateScope LoopScope(*this);
3043       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3044         // Emit implicit barrier to synchronize threads and avoid data races on
3045         // initialization of firstprivate variables and post-update of
3046         // lastprivate variables.
3047         CGM.getOpenMPRuntime().emitBarrierCall(
3048           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3049           /*ForceSimpleCall=*/true);
3050       }
3051       EmitOMPPrivateClause(S, LoopScope);
3052       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3053       EmitOMPPrivateLoopCounters(S, LoopScope);
3054       (void)LoopScope.Privatize();
3055 
3056       // Detect the distribute schedule kind and chunk.
3057       llvm::Value *Chunk = nullptr;
3058       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3059       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3060         ScheduleKind = C->getDistScheduleKind();
3061         if (const auto *Ch = C->getChunkSize()) {
3062           Chunk = EmitScalarExpr(Ch);
3063           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3064           S.getIterationVariable()->getType(),
3065           S.getLocStart());
3066         }
3067       }
3068       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3069       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3070 
3071       // OpenMP [2.10.8, distribute Construct, Description]
3072       // If dist_schedule is specified, kind must be static. If specified,
3073       // iterations are divided into chunks of size chunk_size, chunks are
3074       // assigned to the teams of the league in a round-robin fashion in the
3075       // order of the team number. When no chunk_size is specified, the
3076       // iteration space is divided into chunks that are approximately equal
3077       // in size, and at most one chunk is distributed to each team of the
3078       // league. The size of the chunks is unspecified in this case.
3079       if (RT.isStaticNonchunked(ScheduleKind,
3080                                 /* Chunked */ Chunk != nullptr)) {
3081         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3082                              IVSize, IVSigned, /* Ordered = */ false,
3083                              IL.getAddress(), LB.getAddress(),
3084                              UB.getAddress(), ST.getAddress());
3085         auto LoopExit =
3086             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3087         // UB = min(UB, GlobalUB);
3088         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3089                             ? S.getCombinedEnsureUpperBound()
3090                             : S.getEnsureUpperBound());
3091         // IV = LB;
3092         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3093                             ? S.getCombinedInit()
3094                             : S.getInit());
3095 
3096         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3097                          ? S.getCombinedCond()
3098                          : S.getCond();
3099 
3100         // for distribute alone,  codegen
3101         // while (idx <= UB) { BODY; ++idx; }
3102         // when combined with 'for' (e.g. as in 'distribute parallel for')
3103         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3104         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3105                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3106                            CodeGenLoop(CGF, S, LoopExit);
3107                          },
3108                          [](CodeGenFunction &) {});
3109         EmitBlock(LoopExit.getBlock());
3110         // Tell the runtime we are done.
3111         RT.emitForStaticFinish(*this, S.getLocStart());
3112       } else {
3113         // Emit the outer loop, which requests its work chunk [LB..UB] from
3114         // runtime and runs the inner loop to process it.
3115         const OMPLoopArguments LoopArguments = {
3116             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3117             Chunk};
3118         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3119                                    CodeGenLoop);
3120       }
3121 
3122       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3123       if (HasLastprivateClause)
3124         EmitOMPLastprivateClauseFinal(
3125             S, /*NoFinals=*/false,
3126             Builder.CreateIsNotNull(
3127                 EmitLoadOfScalar(IL, S.getLocStart())));
3128     }
3129 
3130     // We're now done with the loop, so jump to the continuation block.
3131     if (ContBlock) {
3132       EmitBranch(ContBlock);
3133       EmitBlock(ContBlock, true);
3134     }
3135   }
3136 }
3137 
3138 void CodeGenFunction::EmitOMPDistributeDirective(
3139     const OMPDistributeDirective &S) {
3140   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3141 
3142     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3143   };
3144   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3145   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3146                                               false);
3147 }
3148 
3149 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3150                                                    const CapturedStmt *S) {
3151   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3152   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3153   CGF.CapturedStmtInfo = &CapStmtInfo;
3154   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3155   Fn->addFnAttr(llvm::Attribute::NoInline);
3156   return Fn;
3157 }
3158 
3159 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3160   if (!S.getAssociatedStmt()) {
3161     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3162       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3163     return;
3164   }
3165   auto *C = S.getSingleClause<OMPSIMDClause>();
3166   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3167                                  PrePostActionTy &Action) {
3168     if (C) {
3169       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3170       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3171       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3172       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3173       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, OutlinedFn,
3174                                                       CapturedVars);
3175     } else {
3176       Action.Enter(CGF);
3177       CGF.EmitStmt(
3178           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3179     }
3180   };
3181   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3182   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3183 }
3184 
3185 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3186                                          QualType SrcType, QualType DestType,
3187                                          SourceLocation Loc) {
3188   assert(CGF.hasScalarEvaluationKind(DestType) &&
3189          "DestType must have scalar evaluation kind.");
3190   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3191   return Val.isScalar()
3192              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3193                                         Loc)
3194              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3195                                                  DestType, Loc);
3196 }
3197 
3198 static CodeGenFunction::ComplexPairTy
3199 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3200                       QualType DestType, SourceLocation Loc) {
3201   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3202          "DestType must have complex evaluation kind.");
3203   CodeGenFunction::ComplexPairTy ComplexVal;
3204   if (Val.isScalar()) {
3205     // Convert the input element to the element type of the complex.
3206     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3207     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3208                                               DestElementType, Loc);
3209     ComplexVal = CodeGenFunction::ComplexPairTy(
3210         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3211   } else {
3212     assert(Val.isComplex() && "Must be a scalar or complex.");
3213     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3214     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3215     ComplexVal.first = CGF.EmitScalarConversion(
3216         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3217     ComplexVal.second = CGF.EmitScalarConversion(
3218         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3219   }
3220   return ComplexVal;
3221 }
3222 
3223 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3224                                   LValue LVal, RValue RVal) {
3225   if (LVal.isGlobalReg()) {
3226     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3227   } else {
3228     CGF.EmitAtomicStore(RVal, LVal,
3229                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3230                                  : llvm::AtomicOrdering::Monotonic,
3231                         LVal.isVolatile(), /*IsInit=*/false);
3232   }
3233 }
3234 
3235 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3236                                          QualType RValTy, SourceLocation Loc) {
3237   switch (getEvaluationKind(LVal.getType())) {
3238   case TEK_Scalar:
3239     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3240                                *this, RVal, RValTy, LVal.getType(), Loc)),
3241                            LVal);
3242     break;
3243   case TEK_Complex:
3244     EmitStoreOfComplex(
3245         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3246         /*isInit=*/false);
3247     break;
3248   case TEK_Aggregate:
3249     llvm_unreachable("Must be a scalar or complex.");
3250   }
3251 }
3252 
3253 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3254                                   const Expr *X, const Expr *V,
3255                                   SourceLocation Loc) {
3256   // v = x;
3257   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3258   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3259   LValue XLValue = CGF.EmitLValue(X);
3260   LValue VLValue = CGF.EmitLValue(V);
3261   RValue Res = XLValue.isGlobalReg()
3262                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3263                    : CGF.EmitAtomicLoad(
3264                          XLValue, Loc,
3265                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3266                                   : llvm::AtomicOrdering::Monotonic,
3267                          XLValue.isVolatile());
3268   // OpenMP, 2.12.6, atomic Construct
3269   // Any atomic construct with a seq_cst clause forces the atomically
3270   // performed operation to include an implicit flush operation without a
3271   // list.
3272   if (IsSeqCst)
3273     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3274   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3275 }
3276 
3277 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3278                                    const Expr *X, const Expr *E,
3279                                    SourceLocation Loc) {
3280   // x = expr;
3281   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3282   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3283   // OpenMP, 2.12.6, atomic Construct
3284   // Any atomic construct with a seq_cst clause forces the atomically
3285   // performed operation to include an implicit flush operation without a
3286   // list.
3287   if (IsSeqCst)
3288     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3289 }
3290 
3291 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3292                                                 RValue Update,
3293                                                 BinaryOperatorKind BO,
3294                                                 llvm::AtomicOrdering AO,
3295                                                 bool IsXLHSInRHSPart) {
3296   auto &Context = CGF.CGM.getContext();
3297   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3298   // expression is simple and atomic is allowed for the given type for the
3299   // target platform.
3300   if (BO == BO_Comma || !Update.isScalar() ||
3301       !Update.getScalarVal()->getType()->isIntegerTy() ||
3302       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3303                         (Update.getScalarVal()->getType() !=
3304                          X.getAddress().getElementType())) ||
3305       !X.getAddress().getElementType()->isIntegerTy() ||
3306       !Context.getTargetInfo().hasBuiltinAtomic(
3307           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3308     return std::make_pair(false, RValue::get(nullptr));
3309 
3310   llvm::AtomicRMWInst::BinOp RMWOp;
3311   switch (BO) {
3312   case BO_Add:
3313     RMWOp = llvm::AtomicRMWInst::Add;
3314     break;
3315   case BO_Sub:
3316     if (!IsXLHSInRHSPart)
3317       return std::make_pair(false, RValue::get(nullptr));
3318     RMWOp = llvm::AtomicRMWInst::Sub;
3319     break;
3320   case BO_And:
3321     RMWOp = llvm::AtomicRMWInst::And;
3322     break;
3323   case BO_Or:
3324     RMWOp = llvm::AtomicRMWInst::Or;
3325     break;
3326   case BO_Xor:
3327     RMWOp = llvm::AtomicRMWInst::Xor;
3328     break;
3329   case BO_LT:
3330     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3331                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3332                                    : llvm::AtomicRMWInst::Max)
3333                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3334                                    : llvm::AtomicRMWInst::UMax);
3335     break;
3336   case BO_GT:
3337     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3338                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3339                                    : llvm::AtomicRMWInst::Min)
3340                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3341                                    : llvm::AtomicRMWInst::UMin);
3342     break;
3343   case BO_Assign:
3344     RMWOp = llvm::AtomicRMWInst::Xchg;
3345     break;
3346   case BO_Mul:
3347   case BO_Div:
3348   case BO_Rem:
3349   case BO_Shl:
3350   case BO_Shr:
3351   case BO_LAnd:
3352   case BO_LOr:
3353     return std::make_pair(false, RValue::get(nullptr));
3354   case BO_PtrMemD:
3355   case BO_PtrMemI:
3356   case BO_LE:
3357   case BO_GE:
3358   case BO_EQ:
3359   case BO_NE:
3360   case BO_AddAssign:
3361   case BO_SubAssign:
3362   case BO_AndAssign:
3363   case BO_OrAssign:
3364   case BO_XorAssign:
3365   case BO_MulAssign:
3366   case BO_DivAssign:
3367   case BO_RemAssign:
3368   case BO_ShlAssign:
3369   case BO_ShrAssign:
3370   case BO_Comma:
3371     llvm_unreachable("Unsupported atomic update operation");
3372   }
3373   auto *UpdateVal = Update.getScalarVal();
3374   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3375     UpdateVal = CGF.Builder.CreateIntCast(
3376         IC, X.getAddress().getElementType(),
3377         X.getType()->hasSignedIntegerRepresentation());
3378   }
3379   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3380   return std::make_pair(true, RValue::get(Res));
3381 }
3382 
3383 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3384     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3385     llvm::AtomicOrdering AO, SourceLocation Loc,
3386     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3387   // Update expressions are allowed to have the following forms:
3388   // x binop= expr; -> xrval + expr;
3389   // x++, ++x -> xrval + 1;
3390   // x--, --x -> xrval - 1;
3391   // x = x binop expr; -> xrval binop expr
3392   // x = expr Op x; - > expr binop xrval;
3393   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3394   if (!Res.first) {
3395     if (X.isGlobalReg()) {
3396       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3397       // 'xrval'.
3398       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3399     } else {
3400       // Perform compare-and-swap procedure.
3401       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3402     }
3403   }
3404   return Res;
3405 }
3406 
3407 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3408                                     const Expr *X, const Expr *E,
3409                                     const Expr *UE, bool IsXLHSInRHSPart,
3410                                     SourceLocation Loc) {
3411   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3412          "Update expr in 'atomic update' must be a binary operator.");
3413   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3414   // Update expressions are allowed to have the following forms:
3415   // x binop= expr; -> xrval + expr;
3416   // x++, ++x -> xrval + 1;
3417   // x--, --x -> xrval - 1;
3418   // x = x binop expr; -> xrval binop expr
3419   // x = expr Op x; - > expr binop xrval;
3420   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3421   LValue XLValue = CGF.EmitLValue(X);
3422   RValue ExprRValue = CGF.EmitAnyExpr(E);
3423   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3424                      : llvm::AtomicOrdering::Monotonic;
3425   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3426   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3427   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3428   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3429   auto Gen =
3430       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3431         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3432         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3433         return CGF.EmitAnyExpr(UE);
3434       };
3435   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3436       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3437   // OpenMP, 2.12.6, atomic Construct
3438   // Any atomic construct with a seq_cst clause forces the atomically
3439   // performed operation to include an implicit flush operation without a
3440   // list.
3441   if (IsSeqCst)
3442     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3443 }
3444 
3445 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3446                             QualType SourceType, QualType ResType,
3447                             SourceLocation Loc) {
3448   switch (CGF.getEvaluationKind(ResType)) {
3449   case TEK_Scalar:
3450     return RValue::get(
3451         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3452   case TEK_Complex: {
3453     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3454     return RValue::getComplex(Res.first, Res.second);
3455   }
3456   case TEK_Aggregate:
3457     break;
3458   }
3459   llvm_unreachable("Must be a scalar or complex.");
3460 }
3461 
3462 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3463                                      bool IsPostfixUpdate, const Expr *V,
3464                                      const Expr *X, const Expr *E,
3465                                      const Expr *UE, bool IsXLHSInRHSPart,
3466                                      SourceLocation Loc) {
3467   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3468   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3469   RValue NewVVal;
3470   LValue VLValue = CGF.EmitLValue(V);
3471   LValue XLValue = CGF.EmitLValue(X);
3472   RValue ExprRValue = CGF.EmitAnyExpr(E);
3473   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3474                      : llvm::AtomicOrdering::Monotonic;
3475   QualType NewVValType;
3476   if (UE) {
3477     // 'x' is updated with some additional value.
3478     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3479            "Update expr in 'atomic capture' must be a binary operator.");
3480     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3481     // Update expressions are allowed to have the following forms:
3482     // x binop= expr; -> xrval + expr;
3483     // x++, ++x -> xrval + 1;
3484     // x--, --x -> xrval - 1;
3485     // x = x binop expr; -> xrval binop expr
3486     // x = expr Op x; - > expr binop xrval;
3487     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3488     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3489     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3490     NewVValType = XRValExpr->getType();
3491     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3492     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3493                   IsPostfixUpdate](RValue XRValue) -> RValue {
3494       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3495       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3496       RValue Res = CGF.EmitAnyExpr(UE);
3497       NewVVal = IsPostfixUpdate ? XRValue : Res;
3498       return Res;
3499     };
3500     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3501         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3502     if (Res.first) {
3503       // 'atomicrmw' instruction was generated.
3504       if (IsPostfixUpdate) {
3505         // Use old value from 'atomicrmw'.
3506         NewVVal = Res.second;
3507       } else {
3508         // 'atomicrmw' does not provide new value, so evaluate it using old
3509         // value of 'x'.
3510         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3511         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3512         NewVVal = CGF.EmitAnyExpr(UE);
3513       }
3514     }
3515   } else {
3516     // 'x' is simply rewritten with some 'expr'.
3517     NewVValType = X->getType().getNonReferenceType();
3518     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3519                                X->getType().getNonReferenceType(), Loc);
3520     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3521       NewVVal = XRValue;
3522       return ExprRValue;
3523     };
3524     // Try to perform atomicrmw xchg, otherwise simple exchange.
3525     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3526         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3527         Loc, Gen);
3528     if (Res.first) {
3529       // 'atomicrmw' instruction was generated.
3530       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3531     }
3532   }
3533   // Emit post-update store to 'v' of old/new 'x' value.
3534   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3535   // OpenMP, 2.12.6, atomic Construct
3536   // Any atomic construct with a seq_cst clause forces the atomically
3537   // performed operation to include an implicit flush operation without a
3538   // list.
3539   if (IsSeqCst)
3540     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3541 }
3542 
3543 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3544                               bool IsSeqCst, bool IsPostfixUpdate,
3545                               const Expr *X, const Expr *V, const Expr *E,
3546                               const Expr *UE, bool IsXLHSInRHSPart,
3547                               SourceLocation Loc) {
3548   switch (Kind) {
3549   case OMPC_read:
3550     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3551     break;
3552   case OMPC_write:
3553     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3554     break;
3555   case OMPC_unknown:
3556   case OMPC_update:
3557     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3558     break;
3559   case OMPC_capture:
3560     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3561                              IsXLHSInRHSPart, Loc);
3562     break;
3563   case OMPC_if:
3564   case OMPC_final:
3565   case OMPC_num_threads:
3566   case OMPC_private:
3567   case OMPC_firstprivate:
3568   case OMPC_lastprivate:
3569   case OMPC_reduction:
3570   case OMPC_task_reduction:
3571   case OMPC_in_reduction:
3572   case OMPC_safelen:
3573   case OMPC_simdlen:
3574   case OMPC_collapse:
3575   case OMPC_default:
3576   case OMPC_seq_cst:
3577   case OMPC_shared:
3578   case OMPC_linear:
3579   case OMPC_aligned:
3580   case OMPC_copyin:
3581   case OMPC_copyprivate:
3582   case OMPC_flush:
3583   case OMPC_proc_bind:
3584   case OMPC_schedule:
3585   case OMPC_ordered:
3586   case OMPC_nowait:
3587   case OMPC_untied:
3588   case OMPC_threadprivate:
3589   case OMPC_depend:
3590   case OMPC_mergeable:
3591   case OMPC_device:
3592   case OMPC_threads:
3593   case OMPC_simd:
3594   case OMPC_map:
3595   case OMPC_num_teams:
3596   case OMPC_thread_limit:
3597   case OMPC_priority:
3598   case OMPC_grainsize:
3599   case OMPC_nogroup:
3600   case OMPC_num_tasks:
3601   case OMPC_hint:
3602   case OMPC_dist_schedule:
3603   case OMPC_defaultmap:
3604   case OMPC_uniform:
3605   case OMPC_to:
3606   case OMPC_from:
3607   case OMPC_use_device_ptr:
3608   case OMPC_is_device_ptr:
3609     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3610   }
3611 }
3612 
3613 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3614   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3615   OpenMPClauseKind Kind = OMPC_unknown;
3616   for (auto *C : S.clauses()) {
3617     // Find first clause (skip seq_cst clause, if it is first).
3618     if (C->getClauseKind() != OMPC_seq_cst) {
3619       Kind = C->getClauseKind();
3620       break;
3621     }
3622   }
3623 
3624   const auto *CS =
3625       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3626   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3627     enterFullExpression(EWC);
3628   }
3629   // Processing for statements under 'atomic capture'.
3630   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3631     for (const auto *C : Compound->body()) {
3632       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3633         enterFullExpression(EWC);
3634       }
3635     }
3636   }
3637 
3638   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3639                                             PrePostActionTy &) {
3640     CGF.EmitStopPoint(CS);
3641     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3642                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3643                       S.isXLHSInRHSPart(), S.getLocStart());
3644   };
3645   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3646   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3647 }
3648 
3649 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3650                                          const OMPExecutableDirective &S,
3651                                          const RegionCodeGenTy &CodeGen) {
3652   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3653   CodeGenModule &CGM = CGF.CGM;
3654   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3655 
3656   llvm::Function *Fn = nullptr;
3657   llvm::Constant *FnID = nullptr;
3658 
3659   const Expr *IfCond = nullptr;
3660   // Check for the at most one if clause associated with the target region.
3661   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3662     if (C->getNameModifier() == OMPD_unknown ||
3663         C->getNameModifier() == OMPD_target) {
3664       IfCond = C->getCondition();
3665       break;
3666     }
3667   }
3668 
3669   // Check if we have any device clause associated with the directive.
3670   const Expr *Device = nullptr;
3671   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3672     Device = C->getDevice();
3673   }
3674 
3675   // Check if we have an if clause whose conditional always evaluates to false
3676   // or if we do not have any targets specified. If so the target region is not
3677   // an offload entry point.
3678   bool IsOffloadEntry = true;
3679   if (IfCond) {
3680     bool Val;
3681     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3682       IsOffloadEntry = false;
3683   }
3684   if (CGM.getLangOpts().OMPTargetTriples.empty())
3685     IsOffloadEntry = false;
3686 
3687   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3688   StringRef ParentName;
3689   // In case we have Ctors/Dtors we use the complete type variant to produce
3690   // the mangling of the device outlined kernel.
3691   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3692     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3693   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3694     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3695   else
3696     ParentName =
3697         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3698 
3699   // Emit target region as a standalone region.
3700   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3701                                                     IsOffloadEntry, CodeGen);
3702   OMPLexicalScope Scope(CGF, S);
3703   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3704   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3705   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3706                                         CapturedVars);
3707 }
3708 
3709 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3710                              PrePostActionTy &Action) {
3711   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3712   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3713   CGF.EmitOMPPrivateClause(S, PrivateScope);
3714   (void)PrivateScope.Privatize();
3715 
3716   Action.Enter(CGF);
3717   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3718 }
3719 
3720 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3721                                                   StringRef ParentName,
3722                                                   const OMPTargetDirective &S) {
3723   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3724     emitTargetRegion(CGF, S, Action);
3725   };
3726   llvm::Function *Fn;
3727   llvm::Constant *Addr;
3728   // Emit target region as a standalone region.
3729   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3730       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3731   assert(Fn && Addr && "Target device function emission failed.");
3732 }
3733 
3734 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3735   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3736     emitTargetRegion(CGF, S, Action);
3737   };
3738   emitCommonOMPTargetDirective(*this, S, CodeGen);
3739 }
3740 
3741 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3742                                         const OMPExecutableDirective &S,
3743                                         OpenMPDirectiveKind InnermostKind,
3744                                         const RegionCodeGenTy &CodeGen) {
3745   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3746   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3747       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3748 
3749   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3750   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3751   if (NT || TL) {
3752     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3753     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3754 
3755     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3756                                                   S.getLocStart());
3757   }
3758 
3759   OMPTeamsScope Scope(CGF, S);
3760   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3761   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3762   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3763                                            CapturedVars);
3764 }
3765 
3766 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3767   // Emit teams region as a standalone region.
3768   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3769     OMPPrivateScope PrivateScope(CGF);
3770     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3771     CGF.EmitOMPPrivateClause(S, PrivateScope);
3772     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3773     (void)PrivateScope.Privatize();
3774     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3775     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3776   };
3777   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3778   emitPostUpdateForReductionClause(
3779       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3780 }
3781 
3782 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3783                                   const OMPTargetTeamsDirective &S) {
3784   auto *CS = S.getCapturedStmt(OMPD_teams);
3785   Action.Enter(CGF);
3786   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3787     // TODO: Add support for clauses.
3788     CGF.EmitStmt(CS->getCapturedStmt());
3789   };
3790   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3791 }
3792 
3793 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3794     CodeGenModule &CGM, StringRef ParentName,
3795     const OMPTargetTeamsDirective &S) {
3796   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3797     emitTargetTeamsRegion(CGF, Action, S);
3798   };
3799   llvm::Function *Fn;
3800   llvm::Constant *Addr;
3801   // Emit target region as a standalone region.
3802   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3803       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3804   assert(Fn && Addr && "Target device function emission failed.");
3805 }
3806 
3807 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3808     const OMPTargetTeamsDirective &S) {
3809   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3810     emitTargetTeamsRegion(CGF, Action, S);
3811   };
3812   emitCommonOMPTargetDirective(*this, S, CodeGen);
3813 }
3814 
3815 void CodeGenFunction::EmitOMPCancellationPointDirective(
3816     const OMPCancellationPointDirective &S) {
3817   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3818                                                    S.getCancelRegion());
3819 }
3820 
3821 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3822   const Expr *IfCond = nullptr;
3823   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3824     if (C->getNameModifier() == OMPD_unknown ||
3825         C->getNameModifier() == OMPD_cancel) {
3826       IfCond = C->getCondition();
3827       break;
3828     }
3829   }
3830   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3831                                         S.getCancelRegion());
3832 }
3833 
3834 CodeGenFunction::JumpDest
3835 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3836   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3837       Kind == OMPD_target_parallel)
3838     return ReturnBlock;
3839   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3840          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3841          Kind == OMPD_distribute_parallel_for ||
3842          Kind == OMPD_target_parallel_for);
3843   return OMPCancelStack.getExitBlock();
3844 }
3845 
3846 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3847     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3848     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3849   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3850   auto OrigVarIt = C.varlist_begin();
3851   auto InitIt = C.inits().begin();
3852   for (auto PvtVarIt : C.private_copies()) {
3853     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3854     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3855     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3856 
3857     // In order to identify the right initializer we need to match the
3858     // declaration used by the mapping logic. In some cases we may get
3859     // OMPCapturedExprDecl that refers to the original declaration.
3860     const ValueDecl *MatchingVD = OrigVD;
3861     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3862       // OMPCapturedExprDecl are used to privative fields of the current
3863       // structure.
3864       auto *ME = cast<MemberExpr>(OED->getInit());
3865       assert(isa<CXXThisExpr>(ME->getBase()) &&
3866              "Base should be the current struct!");
3867       MatchingVD = ME->getMemberDecl();
3868     }
3869 
3870     // If we don't have information about the current list item, move on to
3871     // the next one.
3872     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3873     if (InitAddrIt == CaptureDeviceAddrMap.end())
3874       continue;
3875 
3876     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3877       // Initialize the temporary initialization variable with the address we
3878       // get from the runtime library. We have to cast the source address
3879       // because it is always a void *. References are materialized in the
3880       // privatization scope, so the initialization here disregards the fact
3881       // the original variable is a reference.
3882       QualType AddrQTy =
3883           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3884       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3885       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3886       setAddrOfLocalVar(InitVD, InitAddr);
3887 
3888       // Emit private declaration, it will be initialized by the value we
3889       // declaration we just added to the local declarations map.
3890       EmitDecl(*PvtVD);
3891 
3892       // The initialization variables reached its purpose in the emission
3893       // ofthe previous declaration, so we don't need it anymore.
3894       LocalDeclMap.erase(InitVD);
3895 
3896       // Return the address of the private variable.
3897       return GetAddrOfLocalVar(PvtVD);
3898     });
3899     assert(IsRegistered && "firstprivate var already registered as private");
3900     // Silence the warning about unused variable.
3901     (void)IsRegistered;
3902 
3903     ++OrigVarIt;
3904     ++InitIt;
3905   }
3906 }
3907 
3908 // Generate the instructions for '#pragma omp target data' directive.
3909 void CodeGenFunction::EmitOMPTargetDataDirective(
3910     const OMPTargetDataDirective &S) {
3911   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3912 
3913   // Create a pre/post action to signal the privatization of the device pointer.
3914   // This action can be replaced by the OpenMP runtime code generation to
3915   // deactivate privatization.
3916   bool PrivatizeDevicePointers = false;
3917   class DevicePointerPrivActionTy : public PrePostActionTy {
3918     bool &PrivatizeDevicePointers;
3919 
3920   public:
3921     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3922         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3923     void Enter(CodeGenFunction &CGF) override {
3924       PrivatizeDevicePointers = true;
3925     }
3926   };
3927   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3928 
3929   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3930       CodeGenFunction &CGF, PrePostActionTy &Action) {
3931     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3932       CGF.EmitStmt(
3933           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3934     };
3935 
3936     // Codegen that selects wheather to generate the privatization code or not.
3937     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3938                           &InnermostCodeGen](CodeGenFunction &CGF,
3939                                              PrePostActionTy &Action) {
3940       RegionCodeGenTy RCG(InnermostCodeGen);
3941       PrivatizeDevicePointers = false;
3942 
3943       // Call the pre-action to change the status of PrivatizeDevicePointers if
3944       // needed.
3945       Action.Enter(CGF);
3946 
3947       if (PrivatizeDevicePointers) {
3948         OMPPrivateScope PrivateScope(CGF);
3949         // Emit all instances of the use_device_ptr clause.
3950         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
3951           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
3952                                         Info.CaptureDeviceAddrMap);
3953         (void)PrivateScope.Privatize();
3954         RCG(CGF);
3955       } else
3956         RCG(CGF);
3957     };
3958 
3959     // Forward the provided action to the privatization codegen.
3960     RegionCodeGenTy PrivRCG(PrivCodeGen);
3961     PrivRCG.setAction(Action);
3962 
3963     // Notwithstanding the body of the region is emitted as inlined directive,
3964     // we don't use an inline scope as changes in the references inside the
3965     // region are expected to be visible outside, so we do not privative them.
3966     OMPLexicalScope Scope(CGF, S);
3967     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
3968                                                     PrivRCG);
3969   };
3970 
3971   RegionCodeGenTy RCG(CodeGen);
3972 
3973   // If we don't have target devices, don't bother emitting the data mapping
3974   // code.
3975   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
3976     RCG(*this);
3977     return;
3978   }
3979 
3980   // Check if we have any if clause associated with the directive.
3981   const Expr *IfCond = nullptr;
3982   if (auto *C = S.getSingleClause<OMPIfClause>())
3983     IfCond = C->getCondition();
3984 
3985   // Check if we have any device clause associated with the directive.
3986   const Expr *Device = nullptr;
3987   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3988     Device = C->getDevice();
3989 
3990   // Set the action to signal privatization of device pointers.
3991   RCG.setAction(PrivAction);
3992 
3993   // Emit region code.
3994   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
3995                                              Info);
3996 }
3997 
3998 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
3999     const OMPTargetEnterDataDirective &S) {
4000   // If we don't have target devices, don't bother emitting the data mapping
4001   // code.
4002   if (CGM.getLangOpts().OMPTargetTriples.empty())
4003     return;
4004 
4005   // Check if we have any if clause associated with the directive.
4006   const Expr *IfCond = nullptr;
4007   if (auto *C = S.getSingleClause<OMPIfClause>())
4008     IfCond = C->getCondition();
4009 
4010   // Check if we have any device clause associated with the directive.
4011   const Expr *Device = nullptr;
4012   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4013     Device = C->getDevice();
4014 
4015   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4016 }
4017 
4018 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4019     const OMPTargetExitDataDirective &S) {
4020   // If we don't have target devices, don't bother emitting the data mapping
4021   // code.
4022   if (CGM.getLangOpts().OMPTargetTriples.empty())
4023     return;
4024 
4025   // Check if we have any if clause associated with the directive.
4026   const Expr *IfCond = nullptr;
4027   if (auto *C = S.getSingleClause<OMPIfClause>())
4028     IfCond = C->getCondition();
4029 
4030   // Check if we have any device clause associated with the directive.
4031   const Expr *Device = nullptr;
4032   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4033     Device = C->getDevice();
4034 
4035   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4036 }
4037 
4038 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4039                                      const OMPTargetParallelDirective &S,
4040                                      PrePostActionTy &Action) {
4041   // Get the captured statement associated with the 'parallel' region.
4042   auto *CS = S.getCapturedStmt(OMPD_parallel);
4043   Action.Enter(CGF);
4044   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4045     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4046     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4047     CGF.EmitOMPPrivateClause(S, PrivateScope);
4048     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4049     (void)PrivateScope.Privatize();
4050     // TODO: Add support for clauses.
4051     CGF.EmitStmt(CS->getCapturedStmt());
4052     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4053   };
4054   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4055                                  emitEmptyBoundParameters);
4056   emitPostUpdateForReductionClause(
4057       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4058 }
4059 
4060 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4061     CodeGenModule &CGM, StringRef ParentName,
4062     const OMPTargetParallelDirective &S) {
4063   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4064     emitTargetParallelRegion(CGF, S, Action);
4065   };
4066   llvm::Function *Fn;
4067   llvm::Constant *Addr;
4068   // Emit target region as a standalone region.
4069   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4070       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4071   assert(Fn && Addr && "Target device function emission failed.");
4072 }
4073 
4074 void CodeGenFunction::EmitOMPTargetParallelDirective(
4075     const OMPTargetParallelDirective &S) {
4076   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4077     emitTargetParallelRegion(CGF, S, Action);
4078   };
4079   emitCommonOMPTargetDirective(*this, S, CodeGen);
4080 }
4081 
4082 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4083     const OMPTargetParallelForDirective &S) {
4084   // TODO: codegen for target parallel for.
4085 }
4086 
4087 /// Emit a helper variable and return corresponding lvalue.
4088 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4089                      const ImplicitParamDecl *PVD,
4090                      CodeGenFunction::OMPPrivateScope &Privates) {
4091   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4092   Privates.addPrivate(
4093       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4094 }
4095 
4096 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4097   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4098   // Emit outlined function for task construct.
4099   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4100   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4101   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4102   const Expr *IfCond = nullptr;
4103   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4104     if (C->getNameModifier() == OMPD_unknown ||
4105         C->getNameModifier() == OMPD_taskloop) {
4106       IfCond = C->getCondition();
4107       break;
4108     }
4109   }
4110 
4111   OMPTaskDataTy Data;
4112   // Check if taskloop must be emitted without taskgroup.
4113   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4114   // TODO: Check if we should emit tied or untied task.
4115   Data.Tied = true;
4116   // Set scheduling for taskloop
4117   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4118     // grainsize clause
4119     Data.Schedule.setInt(/*IntVal=*/false);
4120     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4121   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4122     // num_tasks clause
4123     Data.Schedule.setInt(/*IntVal=*/true);
4124     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4125   }
4126 
4127   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4128     // if (PreCond) {
4129     //   for (IV in 0..LastIteration) BODY;
4130     //   <Final counter/linear vars updates>;
4131     // }
4132     //
4133 
4134     // Emit: if (PreCond) - begin.
4135     // If the condition constant folds and can be elided, avoid emitting the
4136     // whole loop.
4137     bool CondConstant;
4138     llvm::BasicBlock *ContBlock = nullptr;
4139     OMPLoopScope PreInitScope(CGF, S);
4140     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4141       if (!CondConstant)
4142         return;
4143     } else {
4144       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4145       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4146       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4147                   CGF.getProfileCount(&S));
4148       CGF.EmitBlock(ThenBlock);
4149       CGF.incrementProfileCounter(&S);
4150     }
4151 
4152     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4153       CGF.EmitOMPSimdInit(S);
4154 
4155     OMPPrivateScope LoopScope(CGF);
4156     // Emit helper vars inits.
4157     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4158     auto *I = CS->getCapturedDecl()->param_begin();
4159     auto *LBP = std::next(I, LowerBound);
4160     auto *UBP = std::next(I, UpperBound);
4161     auto *STP = std::next(I, Stride);
4162     auto *LIP = std::next(I, LastIter);
4163     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4164              LoopScope);
4165     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4166              LoopScope);
4167     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4168     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4169              LoopScope);
4170     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4171     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4172     (void)LoopScope.Privatize();
4173     // Emit the loop iteration variable.
4174     const Expr *IVExpr = S.getIterationVariable();
4175     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4176     CGF.EmitVarDecl(*IVDecl);
4177     CGF.EmitIgnoredExpr(S.getInit());
4178 
4179     // Emit the iterations count variable.
4180     // If it is not a variable, Sema decided to calculate iterations count on
4181     // each iteration (e.g., it is foldable into a constant).
4182     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4183       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4184       // Emit calculation of the iterations count.
4185       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4186     }
4187 
4188     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4189                          S.getInc(),
4190                          [&S](CodeGenFunction &CGF) {
4191                            CGF.EmitOMPLoopBody(S, JumpDest());
4192                            CGF.EmitStopPoint(&S);
4193                          },
4194                          [](CodeGenFunction &) {});
4195     // Emit: if (PreCond) - end.
4196     if (ContBlock) {
4197       CGF.EmitBranch(ContBlock);
4198       CGF.EmitBlock(ContBlock, true);
4199     }
4200     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4201     if (HasLastprivateClause) {
4202       CGF.EmitOMPLastprivateClauseFinal(
4203           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4204           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4205               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4206               (*LIP)->getType(), S.getLocStart())));
4207     }
4208   };
4209   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4210                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4211                             const OMPTaskDataTy &Data) {
4212     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4213       OMPLoopScope PreInitScope(CGF, S);
4214       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4215                                                   OutlinedFn, SharedsTy,
4216                                                   CapturedStruct, IfCond, Data);
4217     };
4218     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4219                                                     CodeGen);
4220   };
4221   if (Data.Nogroup)
4222     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4223   else {
4224     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4225         *this,
4226         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4227                                         PrePostActionTy &Action) {
4228           Action.Enter(CGF);
4229           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4230         },
4231         S.getLocStart());
4232   }
4233 }
4234 
4235 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4236   EmitOMPTaskLoopBasedDirective(S);
4237 }
4238 
4239 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4240     const OMPTaskLoopSimdDirective &S) {
4241   EmitOMPTaskLoopBasedDirective(S);
4242 }
4243 
4244 // Generate the instructions for '#pragma omp target update' directive.
4245 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4246     const OMPTargetUpdateDirective &S) {
4247   // If we don't have target devices, don't bother emitting the data mapping
4248   // code.
4249   if (CGM.getLangOpts().OMPTargetTriples.empty())
4250     return;
4251 
4252   // Check if we have any if clause associated with the directive.
4253   const Expr *IfCond = nullptr;
4254   if (auto *C = S.getSingleClause<OMPIfClause>())
4255     IfCond = C->getCondition();
4256 
4257   // Check if we have any device clause associated with the directive.
4258   const Expr *Device = nullptr;
4259   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4260     Device = C->getDevice();
4261 
4262   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4263 }
4264