1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             assert(VD == VD->getCanonicalDecl() &&
69                         "Canonical decl must be captured.");
70             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
71                             isCapturedVar(CGF, VD) ||
72                                 (CGF.CapturedStmtInfo &&
73                                  InlinedShareds.isGlobalVarCaptured(VD)),
74                             VD->getType().getNonReferenceType(), VK_LValue,
75                             SourceLocation());
76             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
77               return CGF.EmitLValue(&DRE).getAddress();
78             });
79           }
80         }
81         (void)InlinedShareds.Privatize();
82       }
83     }
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S,
100                         /*AsInlined=*/false,
101                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S,
116                         /*AsInlined=*/false,
117                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 };
119 
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
123   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
125       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
126         for (const auto *I : PreInits->decls())
127           CGF.EmitVarDecl(cast<VarDecl>(*I));
128       }
129     }
130   }
131 
132 public:
133   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
134       : CodeGenFunction::RunCleanupsScope(CGF) {
135     emitPreInitStmt(CGF, S);
136   }
137 };
138 
139 } // namespace
140 
141 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
142                                          const OMPExecutableDirective &S,
143                                          const RegionCodeGenTy &CodeGen);
144 
145 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
146   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
147     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
148       OrigVD = OrigVD->getCanonicalDecl();
149       bool IsCaptured =
150           LambdaCaptureFields.lookup(OrigVD) ||
151           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
152           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
153       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
154                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
155       return EmitLValue(&DRE);
156     }
157   }
158   return EmitLValue(E);
159 }
160 
161 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
162   auto &C = getContext();
163   llvm::Value *Size = nullptr;
164   auto SizeInChars = C.getTypeSizeInChars(Ty);
165   if (SizeInChars.isZero()) {
166     // getTypeSizeInChars() returns 0 for a VLA.
167     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
168       llvm::Value *ArraySize;
169       std::tie(ArraySize, Ty) = getVLASize(VAT);
170       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
171     }
172     SizeInChars = C.getTypeSizeInChars(Ty);
173     if (SizeInChars.isZero())
174       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
175     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
176   } else
177     Size = CGM.getSize(SizeInChars);
178   return Size;
179 }
180 
181 void CodeGenFunction::GenerateOpenMPCapturedVars(
182     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
183   const RecordDecl *RD = S.getCapturedRecordDecl();
184   auto CurField = RD->field_begin();
185   auto CurCap = S.captures().begin();
186   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
187                                                  E = S.capture_init_end();
188        I != E; ++I, ++CurField, ++CurCap) {
189     if (CurField->hasCapturedVLAType()) {
190       auto VAT = CurField->getCapturedVLAType();
191       auto *Val = VLASizeMap[VAT->getSizeExpr()];
192       CapturedVars.push_back(Val);
193     } else if (CurCap->capturesThis())
194       CapturedVars.push_back(CXXThisValue);
195     else if (CurCap->capturesVariableByCopy()) {
196       llvm::Value *CV =
197           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
198 
199       // If the field is not a pointer, we need to save the actual value
200       // and load it as a void pointer.
201       if (!CurField->getType()->isAnyPointerType()) {
202         auto &Ctx = getContext();
203         auto DstAddr = CreateMemTemp(
204             Ctx.getUIntPtrType(),
205             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
206         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
207 
208         auto *SrcAddrVal = EmitScalarConversion(
209             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
210             Ctx.getPointerType(CurField->getType()), SourceLocation());
211         LValue SrcLV =
212             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
213 
214         // Store the value using the source type pointer.
215         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
216 
217         // Load the value using the destination type pointer.
218         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
219       }
220       CapturedVars.push_back(CV);
221     } else {
222       assert(CurCap->capturesVariable() && "Expected capture by reference.");
223       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
224     }
225   }
226 }
227 
228 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
229                                     StringRef Name, LValue AddrLV,
230                                     bool isReferenceType = false) {
231   ASTContext &Ctx = CGF.getContext();
232 
233   auto *CastedPtr = CGF.EmitScalarConversion(
234       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
235       Ctx.getPointerType(DstType), SourceLocation());
236   auto TmpAddr =
237       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
238           .getAddress();
239 
240   // If we are dealing with references we need to return the address of the
241   // reference instead of the reference of the value.
242   if (isReferenceType) {
243     QualType RefType = Ctx.getLValueReferenceType(DstType);
244     auto *RefVal = TmpAddr.getPointer();
245     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
246     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
247     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
248   }
249 
250   return TmpAddr;
251 }
252 
253 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
254   if (T->isLValueReferenceType()) {
255     return C.getLValueReferenceType(
256         getCanonicalParamType(C, T.getNonReferenceType()),
257         /*SpelledAsLValue=*/false);
258   }
259   if (T->isPointerType())
260     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
261   if (auto *A = T->getAsArrayTypeUnsafe()) {
262     if (auto *VLA = dyn_cast<VariableArrayType>(A))
263       return getCanonicalParamType(C, VLA->getElementType());
264     else if (!A->isVariablyModifiedType())
265       return C.getCanonicalType(T);
266   }
267   return C.getCanonicalParamType(T);
268 }
269 
270 namespace {
271   /// Contains required data for proper outlined function codegen.
272   struct FunctionOptions {
273     /// Captured statement for which the function is generated.
274     const CapturedStmt *S = nullptr;
275     /// true if cast to/from  UIntPtr is required for variables captured by
276     /// value.
277     const bool UIntPtrCastRequired = true;
278     /// true if only casted arguments must be registered as local args or VLA
279     /// sizes.
280     const bool RegisterCastedArgsOnly = false;
281     /// Name of the generated function.
282     const StringRef FunctionName;
283     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
284                              bool RegisterCastedArgsOnly,
285                              StringRef FunctionName)
286         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
287           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
288           FunctionName(FunctionName) {}
289   };
290 }
291 
292 static llvm::Function *emitOutlinedFunctionPrologue(
293     CodeGenFunction &CGF, FunctionArgList &Args,
294     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
295         &LocalAddrs,
296     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
297         &VLASizes,
298     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
299   const CapturedDecl *CD = FO.S->getCapturedDecl();
300   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
301   assert(CD->hasBody() && "missing CapturedDecl body");
302 
303   CXXThisValue = nullptr;
304   // Build the argument list.
305   CodeGenModule &CGM = CGF.CGM;
306   ASTContext &Ctx = CGM.getContext();
307   FunctionArgList TargetArgs;
308   Args.append(CD->param_begin(),
309               std::next(CD->param_begin(), CD->getContextParamPosition()));
310   TargetArgs.append(
311       CD->param_begin(),
312       std::next(CD->param_begin(), CD->getContextParamPosition()));
313   auto I = FO.S->captures().begin();
314   for (auto *FD : RD->fields()) {
315     QualType ArgType = FD->getType();
316     IdentifierInfo *II = nullptr;
317     VarDecl *CapVar = nullptr;
318 
319     // If this is a capture by copy and the type is not a pointer, the outlined
320     // function argument type should be uintptr and the value properly casted to
321     // uintptr. This is necessary given that the runtime library is only able to
322     // deal with pointers. We can pass in the same way the VLA type sizes to the
323     // outlined function.
324     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
325         I->capturesVariableArrayType()) {
326       if (FO.UIntPtrCastRequired)
327         ArgType = Ctx.getUIntPtrType();
328     }
329 
330     if (I->capturesVariable() || I->capturesVariableByCopy()) {
331       CapVar = I->getCapturedVar();
332       II = CapVar->getIdentifier();
333     } else if (I->capturesThis())
334       II = &Ctx.Idents.get("this");
335     else {
336       assert(I->capturesVariableArrayType());
337       II = &Ctx.Idents.get("vla");
338     }
339     if (ArgType->isVariablyModifiedType())
340       ArgType = getCanonicalParamType(Ctx, ArgType);
341     auto *Arg =
342         ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
343                                   ArgType, ImplicitParamDecl::Other);
344     Args.emplace_back(Arg);
345     // Do not cast arguments if we emit function with non-original types.
346     TargetArgs.emplace_back(
347         FO.UIntPtrCastRequired
348             ? Arg
349             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
350     ++I;
351   }
352   Args.append(
353       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
354       CD->param_end());
355   TargetArgs.append(
356       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
357       CD->param_end());
358 
359   // Create the function declaration.
360   const CGFunctionInfo &FuncInfo =
361       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
362   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
363 
364   llvm::Function *F =
365       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
366                              FO.FunctionName, &CGM.getModule());
367   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
368   if (CD->isNothrow())
369     F->setDoesNotThrow();
370 
371   // Generate the function.
372   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
373                     FO.S->getLocStart(), CD->getBody()->getLocStart());
374   unsigned Cnt = CD->getContextParamPosition();
375   I = FO.S->captures().begin();
376   for (auto *FD : RD->fields()) {
377     // Do not map arguments if we emit function with non-original types.
378     Address LocalAddr(Address::invalid());
379     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
380       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
381                                                              TargetArgs[Cnt]);
382     } else {
383       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
384     }
385     // If we are capturing a pointer by copy we don't need to do anything, just
386     // use the value that we get from the arguments.
387     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
388       const VarDecl *CurVD = I->getCapturedVar();
389       // If the variable is a reference we need to materialize it here.
390       if (CurVD->getType()->isReferenceType()) {
391         Address RefAddr = CGF.CreateMemTemp(
392             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
393         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
394                               /*Volatile=*/false, CurVD->getType());
395         LocalAddr = RefAddr;
396       }
397       if (!FO.RegisterCastedArgsOnly)
398         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
399       ++Cnt;
400       ++I;
401       continue;
402     }
403 
404     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
405                                         AlignmentSource::Decl);
406     if (FD->hasCapturedVLAType()) {
407       if (FO.UIntPtrCastRequired) {
408         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
409                                                           Args[Cnt]->getName(),
410                                                           ArgLVal),
411                                      FD->getType(), AlignmentSource::Decl);
412       }
413       auto *ExprArg =
414           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
415       auto VAT = FD->getCapturedVLAType();
416       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
417     } else if (I->capturesVariable()) {
418       auto *Var = I->getCapturedVar();
419       QualType VarTy = Var->getType();
420       Address ArgAddr = ArgLVal.getAddress();
421       if (!VarTy->isReferenceType()) {
422         if (ArgLVal.getType()->isLValueReferenceType()) {
423           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
424         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
425           assert(ArgLVal.getType()->isPointerType());
426           ArgAddr = CGF.EmitLoadOfPointer(
427               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
428         }
429       }
430       if (!FO.RegisterCastedArgsOnly) {
431         LocalAddrs.insert(
432             {Args[Cnt],
433              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
434       }
435     } else if (I->capturesVariableByCopy()) {
436       assert(!FD->getType()->isAnyPointerType() &&
437              "Not expecting a captured pointer.");
438       auto *Var = I->getCapturedVar();
439       QualType VarTy = Var->getType();
440       LocalAddrs.insert(
441           {Args[Cnt],
442            {Var,
443             FO.UIntPtrCastRequired
444                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
445                                        ArgLVal, VarTy->isReferenceType())
446                 : ArgLVal.getAddress()}});
447     } else {
448       // If 'this' is captured, load it into CXXThisValue.
449       assert(I->capturesThis());
450       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
451                          .getScalarVal();
452       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
453     }
454     ++Cnt;
455     ++I;
456   }
457 
458   return F;
459 }
460 
461 llvm::Function *
462 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
463   assert(
464       CapturedStmtInfo &&
465       "CapturedStmtInfo should be set when generating the captured function");
466   const CapturedDecl *CD = S.getCapturedDecl();
467   // Build the argument list.
468   bool NeedWrapperFunction =
469       getDebugInfo() &&
470       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
471   FunctionArgList Args;
472   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
473   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
474   SmallString<256> Buffer;
475   llvm::raw_svector_ostream Out(Buffer);
476   Out << CapturedStmtInfo->getHelperName();
477   if (NeedWrapperFunction)
478     Out << "_debug__";
479   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
480                      Out.str());
481   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
482                                                    VLASizes, CXXThisValue, FO);
483   for (const auto &LocalAddrPair : LocalAddrs) {
484     if (LocalAddrPair.second.first) {
485       setAddrOfLocalVar(LocalAddrPair.second.first,
486                         LocalAddrPair.second.second);
487     }
488   }
489   for (const auto &VLASizePair : VLASizes)
490     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
491   PGO.assignRegionCounters(GlobalDecl(CD), F);
492   CapturedStmtInfo->EmitBody(*this, CD->getBody());
493   FinishFunction(CD->getBodyRBrace());
494   if (!NeedWrapperFunction)
495     return F;
496 
497   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
498                             /*RegisterCastedArgsOnly=*/true,
499                             CapturedStmtInfo->getHelperName());
500   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
501   Args.clear();
502   LocalAddrs.clear();
503   VLASizes.clear();
504   llvm::Function *WrapperF =
505       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
506                                    WrapperCGF.CXXThisValue, WrapperFO);
507   llvm::SmallVector<llvm::Value *, 4> CallArgs;
508   for (const auto *Arg : Args) {
509     llvm::Value *CallArg;
510     auto I = LocalAddrs.find(Arg);
511     if (I != LocalAddrs.end()) {
512       LValue LV = WrapperCGF.MakeAddrLValue(
513           I->second.second,
514           I->second.first ? I->second.first->getType() : Arg->getType(),
515           AlignmentSource::Decl);
516       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
517     } else {
518       auto EI = VLASizes.find(Arg);
519       if (EI != VLASizes.end())
520         CallArg = EI->second.second;
521       else {
522         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
523                                               Arg->getType(),
524                                               AlignmentSource::Decl);
525         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
526       }
527     }
528     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
529   }
530   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
531                                                   F, CallArgs);
532   WrapperCGF.FinishFunction();
533   return WrapperF;
534 }
535 
536 //===----------------------------------------------------------------------===//
537 //                              OpenMP Directive Emission
538 //===----------------------------------------------------------------------===//
539 void CodeGenFunction::EmitOMPAggregateAssign(
540     Address DestAddr, Address SrcAddr, QualType OriginalType,
541     const llvm::function_ref<void(Address, Address)> &CopyGen) {
542   // Perform element-by-element initialization.
543   QualType ElementTy;
544 
545   // Drill down to the base element type on both arrays.
546   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
547   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
548   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
549 
550   auto SrcBegin = SrcAddr.getPointer();
551   auto DestBegin = DestAddr.getPointer();
552   // Cast from pointer to array type to pointer to single element.
553   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
554   // The basic structure here is a while-do loop.
555   auto BodyBB = createBasicBlock("omp.arraycpy.body");
556   auto DoneBB = createBasicBlock("omp.arraycpy.done");
557   auto IsEmpty =
558       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
559   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
560 
561   // Enter the loop body, making that address the current address.
562   auto EntryBB = Builder.GetInsertBlock();
563   EmitBlock(BodyBB);
564 
565   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
566 
567   llvm::PHINode *SrcElementPHI =
568     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
569   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
570   Address SrcElementCurrent =
571       Address(SrcElementPHI,
572               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
573 
574   llvm::PHINode *DestElementPHI =
575     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
576   DestElementPHI->addIncoming(DestBegin, EntryBB);
577   Address DestElementCurrent =
578     Address(DestElementPHI,
579             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
580 
581   // Emit copy.
582   CopyGen(DestElementCurrent, SrcElementCurrent);
583 
584   // Shift the address forward by one element.
585   auto DestElementNext = Builder.CreateConstGEP1_32(
586       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
587   auto SrcElementNext = Builder.CreateConstGEP1_32(
588       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
589   // Check whether we've reached the end.
590   auto Done =
591       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
592   Builder.CreateCondBr(Done, DoneBB, BodyBB);
593   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
594   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
595 
596   // Done.
597   EmitBlock(DoneBB, /*IsFinished=*/true);
598 }
599 
600 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
601                                   Address SrcAddr, const VarDecl *DestVD,
602                                   const VarDecl *SrcVD, const Expr *Copy) {
603   if (OriginalType->isArrayType()) {
604     auto *BO = dyn_cast<BinaryOperator>(Copy);
605     if (BO && BO->getOpcode() == BO_Assign) {
606       // Perform simple memcpy for simple copying.
607       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
608     } else {
609       // For arrays with complex element types perform element by element
610       // copying.
611       EmitOMPAggregateAssign(
612           DestAddr, SrcAddr, OriginalType,
613           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
614             // Working with the single array element, so have to remap
615             // destination and source variables to corresponding array
616             // elements.
617             CodeGenFunction::OMPPrivateScope Remap(*this);
618             Remap.addPrivate(DestVD, [DestElement]() -> Address {
619               return DestElement;
620             });
621             Remap.addPrivate(
622                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
623             (void)Remap.Privatize();
624             EmitIgnoredExpr(Copy);
625           });
626     }
627   } else {
628     // Remap pseudo source variable to private copy.
629     CodeGenFunction::OMPPrivateScope Remap(*this);
630     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
631     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
632     (void)Remap.Privatize();
633     // Emit copying of the whole variable.
634     EmitIgnoredExpr(Copy);
635   }
636 }
637 
638 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
639                                                 OMPPrivateScope &PrivateScope) {
640   if (!HaveInsertPoint())
641     return false;
642   bool FirstprivateIsLastprivate = false;
643   llvm::DenseSet<const VarDecl *> Lastprivates;
644   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
645     for (const auto *D : C->varlists())
646       Lastprivates.insert(
647           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
648   }
649   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
650   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
651   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
652     auto IRef = C->varlist_begin();
653     auto InitsRef = C->inits().begin();
654     for (auto IInit : C->private_copies()) {
655       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
656       bool ThisFirstprivateIsLastprivate =
657           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
658       auto *CapFD = CapturesInfo.lookup(OrigVD);
659       auto *FD = CapturedStmtInfo->lookup(OrigVD);
660       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
661           !FD->getType()->isReferenceType()) {
662         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
663         ++IRef;
664         ++InitsRef;
665         continue;
666       }
667       FirstprivateIsLastprivate =
668           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
669       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
670         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
671         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
672         bool IsRegistered;
673         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
674                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
675                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
676         Address OriginalAddr = EmitLValue(&DRE).getAddress();
677         QualType Type = VD->getType();
678         if (Type->isArrayType()) {
679           // Emit VarDecl with copy init for arrays.
680           // Get the address of the original variable captured in current
681           // captured region.
682           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
683             auto Emission = EmitAutoVarAlloca(*VD);
684             auto *Init = VD->getInit();
685             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
686               // Perform simple memcpy.
687               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
688                                   Type);
689             } else {
690               EmitOMPAggregateAssign(
691                   Emission.getAllocatedAddress(), OriginalAddr, Type,
692                   [this, VDInit, Init](Address DestElement,
693                                        Address SrcElement) {
694                     // Clean up any temporaries needed by the initialization.
695                     RunCleanupsScope InitScope(*this);
696                     // Emit initialization for single element.
697                     setAddrOfLocalVar(VDInit, SrcElement);
698                     EmitAnyExprToMem(Init, DestElement,
699                                      Init->getType().getQualifiers(),
700                                      /*IsInitializer*/ false);
701                     LocalDeclMap.erase(VDInit);
702                   });
703             }
704             EmitAutoVarCleanups(Emission);
705             return Emission.getAllocatedAddress();
706           });
707         } else {
708           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
709             // Emit private VarDecl with copy init.
710             // Remap temp VDInit variable to the address of the original
711             // variable
712             // (for proper handling of captured global variables).
713             setAddrOfLocalVar(VDInit, OriginalAddr);
714             EmitDecl(*VD);
715             LocalDeclMap.erase(VDInit);
716             return GetAddrOfLocalVar(VD);
717           });
718         }
719         assert(IsRegistered &&
720                "firstprivate var already registered as private");
721         // Silence the warning about unused variable.
722         (void)IsRegistered;
723       }
724       ++IRef;
725       ++InitsRef;
726     }
727   }
728   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
729 }
730 
731 void CodeGenFunction::EmitOMPPrivateClause(
732     const OMPExecutableDirective &D,
733     CodeGenFunction::OMPPrivateScope &PrivateScope) {
734   if (!HaveInsertPoint())
735     return;
736   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
737   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
738     auto IRef = C->varlist_begin();
739     for (auto IInit : C->private_copies()) {
740       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
741       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
742         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
743         bool IsRegistered =
744             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
745               // Emit private VarDecl with copy init.
746               EmitDecl(*VD);
747               return GetAddrOfLocalVar(VD);
748             });
749         assert(IsRegistered && "private var already registered as private");
750         // Silence the warning about unused variable.
751         (void)IsRegistered;
752       }
753       ++IRef;
754     }
755   }
756 }
757 
758 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
759   if (!HaveInsertPoint())
760     return false;
761   // threadprivate_var1 = master_threadprivate_var1;
762   // operator=(threadprivate_var2, master_threadprivate_var2);
763   // ...
764   // __kmpc_barrier(&loc, global_tid);
765   llvm::DenseSet<const VarDecl *> CopiedVars;
766   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
767   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
768     auto IRef = C->varlist_begin();
769     auto ISrcRef = C->source_exprs().begin();
770     auto IDestRef = C->destination_exprs().begin();
771     for (auto *AssignOp : C->assignment_ops()) {
772       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
773       QualType Type = VD->getType();
774       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
775         // Get the address of the master variable. If we are emitting code with
776         // TLS support, the address is passed from the master as field in the
777         // captured declaration.
778         Address MasterAddr = Address::invalid();
779         if (getLangOpts().OpenMPUseTLS &&
780             getContext().getTargetInfo().isTLSSupported()) {
781           assert(CapturedStmtInfo->lookup(VD) &&
782                  "Copyin threadprivates should have been captured!");
783           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
784                           VK_LValue, (*IRef)->getExprLoc());
785           MasterAddr = EmitLValue(&DRE).getAddress();
786           LocalDeclMap.erase(VD);
787         } else {
788           MasterAddr =
789             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
790                                         : CGM.GetAddrOfGlobal(VD),
791                     getContext().getDeclAlign(VD));
792         }
793         // Get the address of the threadprivate variable.
794         Address PrivateAddr = EmitLValue(*IRef).getAddress();
795         if (CopiedVars.size() == 1) {
796           // At first check if current thread is a master thread. If it is, no
797           // need to copy data.
798           CopyBegin = createBasicBlock("copyin.not.master");
799           CopyEnd = createBasicBlock("copyin.not.master.end");
800           Builder.CreateCondBr(
801               Builder.CreateICmpNE(
802                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
803                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
804               CopyBegin, CopyEnd);
805           EmitBlock(CopyBegin);
806         }
807         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
808         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
809         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
810       }
811       ++IRef;
812       ++ISrcRef;
813       ++IDestRef;
814     }
815   }
816   if (CopyEnd) {
817     // Exit out of copying procedure for non-master thread.
818     EmitBlock(CopyEnd, /*IsFinished=*/true);
819     return true;
820   }
821   return false;
822 }
823 
824 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
825     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
826   if (!HaveInsertPoint())
827     return false;
828   bool HasAtLeastOneLastprivate = false;
829   llvm::DenseSet<const VarDecl *> SIMDLCVs;
830   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
831     auto *LoopDirective = cast<OMPLoopDirective>(&D);
832     for (auto *C : LoopDirective->counters()) {
833       SIMDLCVs.insert(
834           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
835     }
836   }
837   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
838   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
839     HasAtLeastOneLastprivate = true;
840     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
841       break;
842     auto IRef = C->varlist_begin();
843     auto IDestRef = C->destination_exprs().begin();
844     for (auto *IInit : C->private_copies()) {
845       // Keep the address of the original variable for future update at the end
846       // of the loop.
847       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
848       // Taskloops do not require additional initialization, it is done in
849       // runtime support library.
850       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
851         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
852         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
853           DeclRefExpr DRE(
854               const_cast<VarDecl *>(OrigVD),
855               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
856                   OrigVD) != nullptr,
857               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
858           return EmitLValue(&DRE).getAddress();
859         });
860         // Check if the variable is also a firstprivate: in this case IInit is
861         // not generated. Initialization of this variable will happen in codegen
862         // for 'firstprivate' clause.
863         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
864           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
865           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
866             // Emit private VarDecl with copy init.
867             EmitDecl(*VD);
868             return GetAddrOfLocalVar(VD);
869           });
870           assert(IsRegistered &&
871                  "lastprivate var already registered as private");
872           (void)IsRegistered;
873         }
874       }
875       ++IRef;
876       ++IDestRef;
877     }
878   }
879   return HasAtLeastOneLastprivate;
880 }
881 
882 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
883     const OMPExecutableDirective &D, bool NoFinals,
884     llvm::Value *IsLastIterCond) {
885   if (!HaveInsertPoint())
886     return;
887   // Emit following code:
888   // if (<IsLastIterCond>) {
889   //   orig_var1 = private_orig_var1;
890   //   ...
891   //   orig_varn = private_orig_varn;
892   // }
893   llvm::BasicBlock *ThenBB = nullptr;
894   llvm::BasicBlock *DoneBB = nullptr;
895   if (IsLastIterCond) {
896     ThenBB = createBasicBlock(".omp.lastprivate.then");
897     DoneBB = createBasicBlock(".omp.lastprivate.done");
898     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
899     EmitBlock(ThenBB);
900   }
901   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
902   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
903   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
904     auto IC = LoopDirective->counters().begin();
905     for (auto F : LoopDirective->finals()) {
906       auto *D =
907           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
908       if (NoFinals)
909         AlreadyEmittedVars.insert(D);
910       else
911         LoopCountersAndUpdates[D] = F;
912       ++IC;
913     }
914   }
915   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
916     auto IRef = C->varlist_begin();
917     auto ISrcRef = C->source_exprs().begin();
918     auto IDestRef = C->destination_exprs().begin();
919     for (auto *AssignOp : C->assignment_ops()) {
920       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
921       QualType Type = PrivateVD->getType();
922       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
923       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
924         // If lastprivate variable is a loop control variable for loop-based
925         // directive, update its value before copyin back to original
926         // variable.
927         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
928           EmitIgnoredExpr(FinalExpr);
929         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
930         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
931         // Get the address of the original variable.
932         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
933         // Get the address of the private variable.
934         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
935         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
936           PrivateAddr =
937               Address(Builder.CreateLoad(PrivateAddr),
938                       getNaturalTypeAlignment(RefTy->getPointeeType()));
939         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
940       }
941       ++IRef;
942       ++ISrcRef;
943       ++IDestRef;
944     }
945     if (auto *PostUpdate = C->getPostUpdateExpr())
946       EmitIgnoredExpr(PostUpdate);
947   }
948   if (IsLastIterCond)
949     EmitBlock(DoneBB, /*IsFinished=*/true);
950 }
951 
952 void CodeGenFunction::EmitOMPReductionClauseInit(
953     const OMPExecutableDirective &D,
954     CodeGenFunction::OMPPrivateScope &PrivateScope) {
955   if (!HaveInsertPoint())
956     return;
957   SmallVector<const Expr *, 4> Shareds;
958   SmallVector<const Expr *, 4> Privates;
959   SmallVector<const Expr *, 4> ReductionOps;
960   SmallVector<const Expr *, 4> LHSs;
961   SmallVector<const Expr *, 4> RHSs;
962   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
963     auto IPriv = C->privates().begin();
964     auto IRed = C->reduction_ops().begin();
965     auto ILHS = C->lhs_exprs().begin();
966     auto IRHS = C->rhs_exprs().begin();
967     for (const auto *Ref : C->varlists()) {
968       Shareds.emplace_back(Ref);
969       Privates.emplace_back(*IPriv);
970       ReductionOps.emplace_back(*IRed);
971       LHSs.emplace_back(*ILHS);
972       RHSs.emplace_back(*IRHS);
973       std::advance(IPriv, 1);
974       std::advance(IRed, 1);
975       std::advance(ILHS, 1);
976       std::advance(IRHS, 1);
977     }
978   }
979   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
980   unsigned Count = 0;
981   auto ILHS = LHSs.begin();
982   auto IRHS = RHSs.begin();
983   auto IPriv = Privates.begin();
984   for (const auto *IRef : Shareds) {
985     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
986     // Emit private VarDecl with reduction init.
987     RedCG.emitSharedLValue(*this, Count);
988     RedCG.emitAggregateType(*this, Count);
989     auto Emission = EmitAutoVarAlloca(*PrivateVD);
990     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
991                              RedCG.getSharedLValue(Count),
992                              [&Emission](CodeGenFunction &CGF) {
993                                CGF.EmitAutoVarInit(Emission);
994                                return true;
995                              });
996     EmitAutoVarCleanups(Emission);
997     Address BaseAddr = RedCG.adjustPrivateAddress(
998         *this, Count, Emission.getAllocatedAddress());
999     bool IsRegistered = PrivateScope.addPrivate(
1000         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
1001     assert(IsRegistered && "private var already registered as private");
1002     // Silence the warning about unused variable.
1003     (void)IsRegistered;
1004 
1005     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1006     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1007     QualType Type = PrivateVD->getType();
1008     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1009     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1010       // Store the address of the original variable associated with the LHS
1011       // implicit variable.
1012       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1013         return RedCG.getSharedLValue(Count).getAddress();
1014       });
1015       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1016         return GetAddrOfLocalVar(PrivateVD);
1017       });
1018     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1019                isa<ArraySubscriptExpr>(IRef)) {
1020       // Store the address of the original variable associated with the LHS
1021       // implicit variable.
1022       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1023         return RedCG.getSharedLValue(Count).getAddress();
1024       });
1025       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1026         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1027                                             ConvertTypeForMem(RHSVD->getType()),
1028                                             "rhs.begin");
1029       });
1030     } else {
1031       QualType Type = PrivateVD->getType();
1032       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1033       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1034       // Store the address of the original variable associated with the LHS
1035       // implicit variable.
1036       if (IsArray) {
1037         OriginalAddr = Builder.CreateElementBitCast(
1038             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1039       }
1040       PrivateScope.addPrivate(
1041           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1042       PrivateScope.addPrivate(
1043           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1044             return IsArray
1045                        ? Builder.CreateElementBitCast(
1046                              GetAddrOfLocalVar(PrivateVD),
1047                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1048                        : GetAddrOfLocalVar(PrivateVD);
1049           });
1050     }
1051     ++ILHS;
1052     ++IRHS;
1053     ++IPriv;
1054     ++Count;
1055   }
1056 }
1057 
1058 void CodeGenFunction::EmitOMPReductionClauseFinal(
1059     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1060   if (!HaveInsertPoint())
1061     return;
1062   llvm::SmallVector<const Expr *, 8> Privates;
1063   llvm::SmallVector<const Expr *, 8> LHSExprs;
1064   llvm::SmallVector<const Expr *, 8> RHSExprs;
1065   llvm::SmallVector<const Expr *, 8> ReductionOps;
1066   bool HasAtLeastOneReduction = false;
1067   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1068     HasAtLeastOneReduction = true;
1069     Privates.append(C->privates().begin(), C->privates().end());
1070     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1071     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1072     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1073   }
1074   if (HasAtLeastOneReduction) {
1075     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1076                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1077                       D.getDirectiveKind() == OMPD_simd;
1078     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1079     // Emit nowait reduction if nowait clause is present or directive is a
1080     // parallel directive (it always has implicit barrier).
1081     CGM.getOpenMPRuntime().emitReduction(
1082         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1083         {WithNowait, SimpleReduction, ReductionKind});
1084   }
1085 }
1086 
1087 static void emitPostUpdateForReductionClause(
1088     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1089     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1090   if (!CGF.HaveInsertPoint())
1091     return;
1092   llvm::BasicBlock *DoneBB = nullptr;
1093   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1094     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1095       if (!DoneBB) {
1096         if (auto *Cond = CondGen(CGF)) {
1097           // If the first post-update expression is found, emit conditional
1098           // block if it was requested.
1099           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1100           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1101           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1102           CGF.EmitBlock(ThenBB);
1103         }
1104       }
1105       CGF.EmitIgnoredExpr(PostUpdate);
1106     }
1107   }
1108   if (DoneBB)
1109     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1110 }
1111 
1112 namespace {
1113 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1114 /// parallel function. This is necessary for combined constructs such as
1115 /// 'distribute parallel for'
1116 typedef llvm::function_ref<void(CodeGenFunction &,
1117                                 const OMPExecutableDirective &,
1118                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1119     CodeGenBoundParametersTy;
1120 } // anonymous namespace
1121 
1122 static void emitCommonOMPParallelDirective(
1123     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1124     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1125     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1126   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1127   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1128       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1129   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1130     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1131     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1132                                          /*IgnoreResultAssign*/ true);
1133     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1134         CGF, NumThreads, NumThreadsClause->getLocStart());
1135   }
1136   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1137     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1138     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1139         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1140   }
1141   const Expr *IfCond = nullptr;
1142   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1143     if (C->getNameModifier() == OMPD_unknown ||
1144         C->getNameModifier() == OMPD_parallel) {
1145       IfCond = C->getCondition();
1146       break;
1147     }
1148   }
1149 
1150   OMPParallelScope Scope(CGF, S);
1151   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1152   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1153   // lower and upper bounds with the pragma 'for' chunking mechanism.
1154   // The following lambda takes care of appending the lower and upper bound
1155   // parameters when necessary
1156   CodeGenBoundParameters(CGF, S, CapturedVars);
1157   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1158   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1159                                               CapturedVars, IfCond);
1160 }
1161 
1162 static void emitEmptyBoundParameters(CodeGenFunction &,
1163                                      const OMPExecutableDirective &,
1164                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1165 
1166 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1167   // Emit parallel region as a standalone region.
1168   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1169     OMPPrivateScope PrivateScope(CGF);
1170     bool Copyins = CGF.EmitOMPCopyinClause(S);
1171     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1172     if (Copyins) {
1173       // Emit implicit barrier to synchronize threads and avoid data races on
1174       // propagation master's thread values of threadprivate variables to local
1175       // instances of that variables of all other implicit threads.
1176       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1177           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1178           /*ForceSimpleCall=*/true);
1179     }
1180     CGF.EmitOMPPrivateClause(S, PrivateScope);
1181     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1182     (void)PrivateScope.Privatize();
1183     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1184     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1185   };
1186   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1187                                  emitEmptyBoundParameters);
1188   emitPostUpdateForReductionClause(
1189       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1190 }
1191 
1192 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1193                                       JumpDest LoopExit) {
1194   RunCleanupsScope BodyScope(*this);
1195   // Update counters values on current iteration.
1196   for (auto I : D.updates()) {
1197     EmitIgnoredExpr(I);
1198   }
1199   // Update the linear variables.
1200   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1201     for (auto *U : C->updates())
1202       EmitIgnoredExpr(U);
1203   }
1204 
1205   // On a continue in the body, jump to the end.
1206   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1207   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1208   // Emit loop body.
1209   EmitStmt(D.getBody());
1210   // The end (updates/cleanups).
1211   EmitBlock(Continue.getBlock());
1212   BreakContinueStack.pop_back();
1213 }
1214 
1215 void CodeGenFunction::EmitOMPInnerLoop(
1216     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1217     const Expr *IncExpr,
1218     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1219     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1220   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1221 
1222   // Start the loop with a block that tests the condition.
1223   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1224   EmitBlock(CondBlock);
1225   const SourceRange &R = S.getSourceRange();
1226   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1227                  SourceLocToDebugLoc(R.getEnd()));
1228 
1229   // If there are any cleanups between here and the loop-exit scope,
1230   // create a block to stage a loop exit along.
1231   auto ExitBlock = LoopExit.getBlock();
1232   if (RequiresCleanup)
1233     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1234 
1235   auto LoopBody = createBasicBlock("omp.inner.for.body");
1236 
1237   // Emit condition.
1238   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1239   if (ExitBlock != LoopExit.getBlock()) {
1240     EmitBlock(ExitBlock);
1241     EmitBranchThroughCleanup(LoopExit);
1242   }
1243 
1244   EmitBlock(LoopBody);
1245   incrementProfileCounter(&S);
1246 
1247   // Create a block for the increment.
1248   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1249   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1250 
1251   BodyGen(*this);
1252 
1253   // Emit "IV = IV + 1" and a back-edge to the condition block.
1254   EmitBlock(Continue.getBlock());
1255   EmitIgnoredExpr(IncExpr);
1256   PostIncGen(*this);
1257   BreakContinueStack.pop_back();
1258   EmitBranch(CondBlock);
1259   LoopStack.pop();
1260   // Emit the fall-through block.
1261   EmitBlock(LoopExit.getBlock());
1262 }
1263 
1264 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1265   if (!HaveInsertPoint())
1266     return false;
1267   // Emit inits for the linear variables.
1268   bool HasLinears = false;
1269   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1270     for (auto *Init : C->inits()) {
1271       HasLinears = true;
1272       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1273       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1274         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1275         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1276         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1277                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1278                         VD->getInit()->getType(), VK_LValue,
1279                         VD->getInit()->getExprLoc());
1280         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1281                                                 VD->getType()),
1282                        /*capturedByInit=*/false);
1283         EmitAutoVarCleanups(Emission);
1284       } else
1285         EmitVarDecl(*VD);
1286     }
1287     // Emit the linear steps for the linear clauses.
1288     // If a step is not constant, it is pre-calculated before the loop.
1289     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1290       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1291         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1292         // Emit calculation of the linear step.
1293         EmitIgnoredExpr(CS);
1294       }
1295   }
1296   return HasLinears;
1297 }
1298 
1299 void CodeGenFunction::EmitOMPLinearClauseFinal(
1300     const OMPLoopDirective &D,
1301     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1302   if (!HaveInsertPoint())
1303     return;
1304   llvm::BasicBlock *DoneBB = nullptr;
1305   // Emit the final values of the linear variables.
1306   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1307     auto IC = C->varlist_begin();
1308     for (auto *F : C->finals()) {
1309       if (!DoneBB) {
1310         if (auto *Cond = CondGen(*this)) {
1311           // If the first post-update expression is found, emit conditional
1312           // block if it was requested.
1313           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1314           DoneBB = createBasicBlock(".omp.linear.pu.done");
1315           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1316           EmitBlock(ThenBB);
1317         }
1318       }
1319       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1320       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1321                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1322                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1323       Address OrigAddr = EmitLValue(&DRE).getAddress();
1324       CodeGenFunction::OMPPrivateScope VarScope(*this);
1325       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1326       (void)VarScope.Privatize();
1327       EmitIgnoredExpr(F);
1328       ++IC;
1329     }
1330     if (auto *PostUpdate = C->getPostUpdateExpr())
1331       EmitIgnoredExpr(PostUpdate);
1332   }
1333   if (DoneBB)
1334     EmitBlock(DoneBB, /*IsFinished=*/true);
1335 }
1336 
1337 static void emitAlignedClause(CodeGenFunction &CGF,
1338                               const OMPExecutableDirective &D) {
1339   if (!CGF.HaveInsertPoint())
1340     return;
1341   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1342     unsigned ClauseAlignment = 0;
1343     if (auto AlignmentExpr = Clause->getAlignment()) {
1344       auto AlignmentCI =
1345           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1346       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1347     }
1348     for (auto E : Clause->varlists()) {
1349       unsigned Alignment = ClauseAlignment;
1350       if (Alignment == 0) {
1351         // OpenMP [2.8.1, Description]
1352         // If no optional parameter is specified, implementation-defined default
1353         // alignments for SIMD instructions on the target platforms are assumed.
1354         Alignment =
1355             CGF.getContext()
1356                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1357                     E->getType()->getPointeeType()))
1358                 .getQuantity();
1359       }
1360       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1361              "alignment is not power of 2");
1362       if (Alignment != 0) {
1363         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1364         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1365       }
1366     }
1367   }
1368 }
1369 
1370 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1371     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1372   if (!HaveInsertPoint())
1373     return;
1374   auto I = S.private_counters().begin();
1375   for (auto *E : S.counters()) {
1376     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1377     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1378     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1379       // Emit var without initialization.
1380       if (!LocalDeclMap.count(PrivateVD)) {
1381         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1382         EmitAutoVarCleanups(VarEmission);
1383       }
1384       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1385                       /*RefersToEnclosingVariableOrCapture=*/false,
1386                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1387       return EmitLValue(&DRE).getAddress();
1388     });
1389     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1390         VD->hasGlobalStorage()) {
1391       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1392         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1393                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1394                         E->getType(), VK_LValue, E->getExprLoc());
1395         return EmitLValue(&DRE).getAddress();
1396       });
1397     }
1398     ++I;
1399   }
1400 }
1401 
1402 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1403                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1404                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1405   if (!CGF.HaveInsertPoint())
1406     return;
1407   {
1408     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1409     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1410     (void)PreCondScope.Privatize();
1411     // Get initial values of real counters.
1412     for (auto I : S.inits()) {
1413       CGF.EmitIgnoredExpr(I);
1414     }
1415   }
1416   // Check that loop is executed at least one time.
1417   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1418 }
1419 
1420 void CodeGenFunction::EmitOMPLinearClause(
1421     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1422   if (!HaveInsertPoint())
1423     return;
1424   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1425   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1426     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1427     for (auto *C : LoopDirective->counters()) {
1428       SIMDLCVs.insert(
1429           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1430     }
1431   }
1432   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1433     auto CurPrivate = C->privates().begin();
1434     for (auto *E : C->varlists()) {
1435       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1436       auto *PrivateVD =
1437           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1438       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1439         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1440           // Emit private VarDecl with copy init.
1441           EmitVarDecl(*PrivateVD);
1442           return GetAddrOfLocalVar(PrivateVD);
1443         });
1444         assert(IsRegistered && "linear var already registered as private");
1445         // Silence the warning about unused variable.
1446         (void)IsRegistered;
1447       } else
1448         EmitVarDecl(*PrivateVD);
1449       ++CurPrivate;
1450     }
1451   }
1452 }
1453 
1454 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1455                                      const OMPExecutableDirective &D,
1456                                      bool IsMonotonic) {
1457   if (!CGF.HaveInsertPoint())
1458     return;
1459   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1460     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1461                                  /*ignoreResult=*/true);
1462     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1463     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1464     // In presence of finite 'safelen', it may be unsafe to mark all
1465     // the memory instructions parallel, because loop-carried
1466     // dependences of 'safelen' iterations are possible.
1467     if (!IsMonotonic)
1468       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1469   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1470     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1471                                  /*ignoreResult=*/true);
1472     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1473     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1474     // In presence of finite 'safelen', it may be unsafe to mark all
1475     // the memory instructions parallel, because loop-carried
1476     // dependences of 'safelen' iterations are possible.
1477     CGF.LoopStack.setParallel(false);
1478   }
1479 }
1480 
1481 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1482                                       bool IsMonotonic) {
1483   // Walk clauses and process safelen/lastprivate.
1484   LoopStack.setParallel(!IsMonotonic);
1485   LoopStack.setVectorizeEnable(true);
1486   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1487 }
1488 
1489 void CodeGenFunction::EmitOMPSimdFinal(
1490     const OMPLoopDirective &D,
1491     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1492   if (!HaveInsertPoint())
1493     return;
1494   llvm::BasicBlock *DoneBB = nullptr;
1495   auto IC = D.counters().begin();
1496   auto IPC = D.private_counters().begin();
1497   for (auto F : D.finals()) {
1498     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1499     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1500     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1501     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1502         OrigVD->hasGlobalStorage() || CED) {
1503       if (!DoneBB) {
1504         if (auto *Cond = CondGen(*this)) {
1505           // If the first post-update expression is found, emit conditional
1506           // block if it was requested.
1507           auto *ThenBB = createBasicBlock(".omp.final.then");
1508           DoneBB = createBasicBlock(".omp.final.done");
1509           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1510           EmitBlock(ThenBB);
1511         }
1512       }
1513       Address OrigAddr = Address::invalid();
1514       if (CED)
1515         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1516       else {
1517         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1518                         /*RefersToEnclosingVariableOrCapture=*/false,
1519                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1520         OrigAddr = EmitLValue(&DRE).getAddress();
1521       }
1522       OMPPrivateScope VarScope(*this);
1523       VarScope.addPrivate(OrigVD,
1524                           [OrigAddr]() -> Address { return OrigAddr; });
1525       (void)VarScope.Privatize();
1526       EmitIgnoredExpr(F);
1527     }
1528     ++IC;
1529     ++IPC;
1530   }
1531   if (DoneBB)
1532     EmitBlock(DoneBB, /*IsFinished=*/true);
1533 }
1534 
1535 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1536                                          const OMPLoopDirective &S,
1537                                          CodeGenFunction::JumpDest LoopExit) {
1538   CGF.EmitOMPLoopBody(S, LoopExit);
1539   CGF.EmitStopPoint(&S);
1540 }
1541 
1542 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1543                               PrePostActionTy &Action) {
1544   Action.Enter(CGF);
1545   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1546          "Expected simd directive");
1547   OMPLoopScope PreInitScope(CGF, S);
1548   // if (PreCond) {
1549   //   for (IV in 0..LastIteration) BODY;
1550   //   <Final counter/linear vars updates>;
1551   // }
1552   //
1553 
1554   // Emit: if (PreCond) - begin.
1555   // If the condition constant folds and can be elided, avoid emitting the
1556   // whole loop.
1557   bool CondConstant;
1558   llvm::BasicBlock *ContBlock = nullptr;
1559   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1560     if (!CondConstant)
1561       return;
1562   } else {
1563     auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1564     ContBlock = CGF.createBasicBlock("simd.if.end");
1565     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1566                 CGF.getProfileCount(&S));
1567     CGF.EmitBlock(ThenBlock);
1568     CGF.incrementProfileCounter(&S);
1569   }
1570 
1571   // Emit the loop iteration variable.
1572   const Expr *IVExpr = S.getIterationVariable();
1573   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1574   CGF.EmitVarDecl(*IVDecl);
1575   CGF.EmitIgnoredExpr(S.getInit());
1576 
1577   // Emit the iterations count variable.
1578   // If it is not a variable, Sema decided to calculate iterations count on
1579   // each iteration (e.g., it is foldable into a constant).
1580   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1581     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1582     // Emit calculation of the iterations count.
1583     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1584   }
1585 
1586   CGF.EmitOMPSimdInit(S);
1587 
1588   emitAlignedClause(CGF, S);
1589   (void)CGF.EmitOMPLinearClauseInit(S);
1590   {
1591     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1592     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1593     CGF.EmitOMPLinearClause(S, LoopScope);
1594     CGF.EmitOMPPrivateClause(S, LoopScope);
1595     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1596     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1597     (void)LoopScope.Privatize();
1598     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1599                          S.getInc(),
1600                          [&S](CodeGenFunction &CGF) {
1601                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1602                            CGF.EmitStopPoint(&S);
1603                          },
1604                          [](CodeGenFunction &) {});
1605     CGF.EmitOMPSimdFinal(
1606         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1607     // Emit final copy of the lastprivate variables at the end of loops.
1608     if (HasLastprivateClause)
1609       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1610     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1611     emitPostUpdateForReductionClause(
1612         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1613   }
1614   CGF.EmitOMPLinearClauseFinal(
1615       S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1616   // Emit: if (PreCond) - end.
1617   if (ContBlock) {
1618     CGF.EmitBranch(ContBlock);
1619     CGF.EmitBlock(ContBlock, true);
1620   }
1621 }
1622 
1623 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1624   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1625     emitOMPSimdRegion(CGF, S, Action);
1626   };
1627   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1628   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1629 }
1630 
1631 void CodeGenFunction::EmitOMPOuterLoop(
1632     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1633     CodeGenFunction::OMPPrivateScope &LoopScope,
1634     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1635     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1636     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1637   auto &RT = CGM.getOpenMPRuntime();
1638 
1639   const Expr *IVExpr = S.getIterationVariable();
1640   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1641   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1642 
1643   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1644 
1645   // Start the loop with a block that tests the condition.
1646   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1647   EmitBlock(CondBlock);
1648   const SourceRange &R = S.getSourceRange();
1649   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1650                  SourceLocToDebugLoc(R.getEnd()));
1651 
1652   llvm::Value *BoolCondVal = nullptr;
1653   if (!DynamicOrOrdered) {
1654     // UB = min(UB, GlobalUB) or
1655     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1656     // 'distribute parallel for')
1657     EmitIgnoredExpr(LoopArgs.EUB);
1658     // IV = LB
1659     EmitIgnoredExpr(LoopArgs.Init);
1660     // IV < UB
1661     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1662   } else {
1663     BoolCondVal =
1664         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1665                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1666   }
1667 
1668   // If there are any cleanups between here and the loop-exit scope,
1669   // create a block to stage a loop exit along.
1670   auto ExitBlock = LoopExit.getBlock();
1671   if (LoopScope.requiresCleanups())
1672     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1673 
1674   auto LoopBody = createBasicBlock("omp.dispatch.body");
1675   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1676   if (ExitBlock != LoopExit.getBlock()) {
1677     EmitBlock(ExitBlock);
1678     EmitBranchThroughCleanup(LoopExit);
1679   }
1680   EmitBlock(LoopBody);
1681 
1682   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1683   // LB for loop condition and emitted it above).
1684   if (DynamicOrOrdered)
1685     EmitIgnoredExpr(LoopArgs.Init);
1686 
1687   // Create a block for the increment.
1688   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1689   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1690 
1691   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1692   // with dynamic/guided scheduling and without ordered clause.
1693   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1694     LoopStack.setParallel(!IsMonotonic);
1695   else
1696     EmitOMPSimdInit(S, IsMonotonic);
1697 
1698   SourceLocation Loc = S.getLocStart();
1699 
1700   // when 'distribute' is not combined with a 'for':
1701   // while (idx <= UB) { BODY; ++idx; }
1702   // when 'distribute' is combined with a 'for'
1703   // (e.g. 'distribute parallel for')
1704   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1705   EmitOMPInnerLoop(
1706       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1707       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1708         CodeGenLoop(CGF, S, LoopExit);
1709       },
1710       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1711         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1712       });
1713 
1714   EmitBlock(Continue.getBlock());
1715   BreakContinueStack.pop_back();
1716   if (!DynamicOrOrdered) {
1717     // Emit "LB = LB + Stride", "UB = UB + Stride".
1718     EmitIgnoredExpr(LoopArgs.NextLB);
1719     EmitIgnoredExpr(LoopArgs.NextUB);
1720   }
1721 
1722   EmitBranch(CondBlock);
1723   LoopStack.pop();
1724   // Emit the fall-through block.
1725   EmitBlock(LoopExit.getBlock());
1726 
1727   // Tell the runtime we are done.
1728   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1729     if (!DynamicOrOrdered)
1730       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1731                                                      S.getDirectiveKind());
1732   };
1733   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1734 }
1735 
1736 void CodeGenFunction::EmitOMPForOuterLoop(
1737     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1738     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1739     const OMPLoopArguments &LoopArgs,
1740     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1741   auto &RT = CGM.getOpenMPRuntime();
1742 
1743   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1744   const bool DynamicOrOrdered =
1745       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1746 
1747   assert((Ordered ||
1748           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1749                                  LoopArgs.Chunk != nullptr)) &&
1750          "static non-chunked schedule does not need outer loop");
1751 
1752   // Emit outer loop.
1753   //
1754   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1755   // When schedule(dynamic,chunk_size) is specified, the iterations are
1756   // distributed to threads in the team in chunks as the threads request them.
1757   // Each thread executes a chunk of iterations, then requests another chunk,
1758   // until no chunks remain to be distributed. Each chunk contains chunk_size
1759   // iterations, except for the last chunk to be distributed, which may have
1760   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1761   //
1762   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1763   // to threads in the team in chunks as the executing threads request them.
1764   // Each thread executes a chunk of iterations, then requests another chunk,
1765   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1766   // each chunk is proportional to the number of unassigned iterations divided
1767   // by the number of threads in the team, decreasing to 1. For a chunk_size
1768   // with value k (greater than 1), the size of each chunk is determined in the
1769   // same way, with the restriction that the chunks do not contain fewer than k
1770   // iterations (except for the last chunk to be assigned, which may have fewer
1771   // than k iterations).
1772   //
1773   // When schedule(auto) is specified, the decision regarding scheduling is
1774   // delegated to the compiler and/or runtime system. The programmer gives the
1775   // implementation the freedom to choose any possible mapping of iterations to
1776   // threads in the team.
1777   //
1778   // When schedule(runtime) is specified, the decision regarding scheduling is
1779   // deferred until run time, and the schedule and chunk size are taken from the
1780   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1781   // implementation defined
1782   //
1783   // while(__kmpc_dispatch_next(&LB, &UB)) {
1784   //   idx = LB;
1785   //   while (idx <= UB) { BODY; ++idx;
1786   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1787   //   } // inner loop
1788   // }
1789   //
1790   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1791   // When schedule(static, chunk_size) is specified, iterations are divided into
1792   // chunks of size chunk_size, and the chunks are assigned to the threads in
1793   // the team in a round-robin fashion in the order of the thread number.
1794   //
1795   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1796   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1797   //   LB = LB + ST;
1798   //   UB = UB + ST;
1799   // }
1800   //
1801 
1802   const Expr *IVExpr = S.getIterationVariable();
1803   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1804   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1805 
1806   if (DynamicOrOrdered) {
1807     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1808     llvm::Value *LBVal = DispatchBounds.first;
1809     llvm::Value *UBVal = DispatchBounds.second;
1810     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1811                                                              LoopArgs.Chunk};
1812     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1813                            IVSigned, Ordered, DipatchRTInputValues);
1814   } else {
1815     CGOpenMPRuntime::StaticRTInput StaticInit(
1816         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1817         LoopArgs.ST, LoopArgs.Chunk);
1818     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1819                          ScheduleKind, StaticInit);
1820   }
1821 
1822   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1823                                     const unsigned IVSize,
1824                                     const bool IVSigned) {
1825     if (Ordered) {
1826       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1827                                                             IVSigned);
1828     }
1829   };
1830 
1831   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1832                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1833   OuterLoopArgs.IncExpr = S.getInc();
1834   OuterLoopArgs.Init = S.getInit();
1835   OuterLoopArgs.Cond = S.getCond();
1836   OuterLoopArgs.NextLB = S.getNextLowerBound();
1837   OuterLoopArgs.NextUB = S.getNextUpperBound();
1838   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1839                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1840 }
1841 
1842 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1843                              const unsigned IVSize, const bool IVSigned) {}
1844 
1845 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1846     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1847     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1848     const CodeGenLoopTy &CodeGenLoopContent) {
1849 
1850   auto &RT = CGM.getOpenMPRuntime();
1851 
1852   // Emit outer loop.
1853   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1854   // dynamic
1855   //
1856 
1857   const Expr *IVExpr = S.getIterationVariable();
1858   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1859   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1860 
1861   CGOpenMPRuntime::StaticRTInput StaticInit(
1862       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1863       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1864   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1865 
1866   // for combined 'distribute' and 'for' the increment expression of distribute
1867   // is store in DistInc. For 'distribute' alone, it is in Inc.
1868   Expr *IncExpr;
1869   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1870     IncExpr = S.getDistInc();
1871   else
1872     IncExpr = S.getInc();
1873 
1874   // this routine is shared by 'omp distribute parallel for' and
1875   // 'omp distribute': select the right EUB expression depending on the
1876   // directive
1877   OMPLoopArguments OuterLoopArgs;
1878   OuterLoopArgs.LB = LoopArgs.LB;
1879   OuterLoopArgs.UB = LoopArgs.UB;
1880   OuterLoopArgs.ST = LoopArgs.ST;
1881   OuterLoopArgs.IL = LoopArgs.IL;
1882   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1883   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1884                           ? S.getCombinedEnsureUpperBound()
1885                           : S.getEnsureUpperBound();
1886   OuterLoopArgs.IncExpr = IncExpr;
1887   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1888                            ? S.getCombinedInit()
1889                            : S.getInit();
1890   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1891                            ? S.getCombinedCond()
1892                            : S.getCond();
1893   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1894                              ? S.getCombinedNextLowerBound()
1895                              : S.getNextLowerBound();
1896   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1897                              ? S.getCombinedNextUpperBound()
1898                              : S.getNextUpperBound();
1899 
1900   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1901                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1902                    emitEmptyOrdered);
1903 }
1904 
1905 /// Emit a helper variable and return corresponding lvalue.
1906 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1907                                const DeclRefExpr *Helper) {
1908   auto VDecl = cast<VarDecl>(Helper->getDecl());
1909   CGF.EmitVarDecl(*VDecl);
1910   return CGF.EmitLValue(Helper);
1911 }
1912 
1913 static std::pair<LValue, LValue>
1914 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1915                                      const OMPExecutableDirective &S) {
1916   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1917   LValue LB =
1918       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1919   LValue UB =
1920       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1921 
1922   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1923   // parallel for') we need to use the 'distribute'
1924   // chunk lower and upper bounds rather than the whole loop iteration
1925   // space. These are parameters to the outlined function for 'parallel'
1926   // and we copy the bounds of the previous schedule into the
1927   // the current ones.
1928   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1929   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1930   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1931   PrevLBVal = CGF.EmitScalarConversion(
1932       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1933       LS.getIterationVariable()->getType(), SourceLocation());
1934   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1935   PrevUBVal = CGF.EmitScalarConversion(
1936       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1937       LS.getIterationVariable()->getType(), SourceLocation());
1938 
1939   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1940   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1941 
1942   return {LB, UB};
1943 }
1944 
1945 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1946 /// we need to use the LB and UB expressions generated by the worksharing
1947 /// code generation support, whereas in non combined situations we would
1948 /// just emit 0 and the LastIteration expression
1949 /// This function is necessary due to the difference of the LB and UB
1950 /// types for the RT emission routines for 'for_static_init' and
1951 /// 'for_dispatch_init'
1952 static std::pair<llvm::Value *, llvm::Value *>
1953 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1954                                         const OMPExecutableDirective &S,
1955                                         Address LB, Address UB) {
1956   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1957   const Expr *IVExpr = LS.getIterationVariable();
1958   // when implementing a dynamic schedule for a 'for' combined with a
1959   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1960   // is not normalized as each team only executes its own assigned
1961   // distribute chunk
1962   QualType IteratorTy = IVExpr->getType();
1963   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1964                                             SourceLocation());
1965   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1966                                             SourceLocation());
1967   return {LBVal, UBVal};
1968 }
1969 
1970 static void emitDistributeParallelForDistributeInnerBoundParams(
1971     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1972     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1973   const auto &Dir = cast<OMPLoopDirective>(S);
1974   LValue LB =
1975       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1976   auto LBCast = CGF.Builder.CreateIntCast(
1977       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1978   CapturedVars.push_back(LBCast);
1979   LValue UB =
1980       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1981 
1982   auto UBCast = CGF.Builder.CreateIntCast(
1983       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1984   CapturedVars.push_back(UBCast);
1985 }
1986 
1987 static void
1988 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
1989                                  const OMPLoopDirective &S,
1990                                  CodeGenFunction::JumpDest LoopExit) {
1991   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
1992                                          PrePostActionTy &) {
1993     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
1994                                emitDistributeParallelForInnerBounds,
1995                                emitDistributeParallelForDispatchBounds);
1996   };
1997 
1998   emitCommonOMPParallelDirective(
1999       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
2000       emitDistributeParallelForDistributeInnerBoundParams);
2001 }
2002 
2003 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2004     const OMPDistributeParallelForDirective &S) {
2005   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2006     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2007                               S.getDistInc());
2008   };
2009   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2010   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
2011                                   /*HasCancel=*/false);
2012   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2013                                               /*HasCancel=*/false);
2014 }
2015 
2016 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2017     const OMPDistributeParallelForSimdDirective &S) {
2018   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2019   CGM.getOpenMPRuntime().emitInlinedDirective(
2020       *this, OMPD_distribute_parallel_for_simd,
2021       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2022         OMPLoopScope PreInitScope(CGF, S);
2023         CGF.EmitStmt(
2024             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2025       });
2026 }
2027 
2028 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2029     const OMPDistributeSimdDirective &S) {
2030   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2031   CGM.getOpenMPRuntime().emitInlinedDirective(
2032       *this, OMPD_distribute_simd,
2033       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2034         OMPLoopScope PreInitScope(CGF, S);
2035         CGF.EmitStmt(
2036             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2037       });
2038 }
2039 
2040 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2041     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2042   // Emit SPMD target parallel for region as a standalone region.
2043   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2044     emitOMPSimdRegion(CGF, S, Action);
2045   };
2046   llvm::Function *Fn;
2047   llvm::Constant *Addr;
2048   // Emit target region as a standalone region.
2049   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2050       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2051   assert(Fn && Addr && "Target device function emission failed.");
2052 }
2053 
2054 void CodeGenFunction::EmitOMPTargetSimdDirective(
2055     const OMPTargetSimdDirective &S) {
2056   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2057     emitOMPSimdRegion(CGF, S, Action);
2058   };
2059   emitCommonOMPTargetDirective(*this, S, CodeGen);
2060 }
2061 
2062 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2063     const OMPTeamsDistributeSimdDirective &S) {
2064   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2065   CGM.getOpenMPRuntime().emitInlinedDirective(
2066       *this, OMPD_teams_distribute_simd,
2067       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2068         OMPLoopScope PreInitScope(CGF, S);
2069         CGF.EmitStmt(
2070             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2071       });
2072 }
2073 
2074 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2075     const OMPTeamsDistributeParallelForSimdDirective &S) {
2076   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2077   CGM.getOpenMPRuntime().emitInlinedDirective(
2078       *this, OMPD_teams_distribute_parallel_for_simd,
2079       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2080         OMPLoopScope PreInitScope(CGF, S);
2081         CGF.EmitStmt(
2082             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2083       });
2084 }
2085 
2086 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2087     const OMPTargetTeamsDistributeDirective &S) {
2088   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2089   CGM.getOpenMPRuntime().emitInlinedDirective(
2090       *this, OMPD_target_teams_distribute,
2091       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2092         CGF.EmitStmt(
2093             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2094       });
2095 }
2096 
2097 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2098     const OMPTargetTeamsDistributeParallelForDirective &S) {
2099   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2100   CGM.getOpenMPRuntime().emitInlinedDirective(
2101       *this, OMPD_target_teams_distribute_parallel_for,
2102       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2103         CGF.EmitStmt(
2104             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2105       });
2106 }
2107 
2108 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2109     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2110   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2111   CGM.getOpenMPRuntime().emitInlinedDirective(
2112       *this, OMPD_target_teams_distribute_parallel_for_simd,
2113       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2114         CGF.EmitStmt(
2115             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2116       });
2117 }
2118 
2119 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2120     const OMPTargetTeamsDistributeSimdDirective &S) {
2121   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2122   CGM.getOpenMPRuntime().emitInlinedDirective(
2123       *this, OMPD_target_teams_distribute_simd,
2124       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2125         CGF.EmitStmt(
2126             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2127       });
2128 }
2129 
2130 namespace {
2131   struct ScheduleKindModifiersTy {
2132     OpenMPScheduleClauseKind Kind;
2133     OpenMPScheduleClauseModifier M1;
2134     OpenMPScheduleClauseModifier M2;
2135     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2136                             OpenMPScheduleClauseModifier M1,
2137                             OpenMPScheduleClauseModifier M2)
2138         : Kind(Kind), M1(M1), M2(M2) {}
2139   };
2140 } // namespace
2141 
2142 bool CodeGenFunction::EmitOMPWorksharingLoop(
2143     const OMPLoopDirective &S, Expr *EUB,
2144     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2145     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2146   // Emit the loop iteration variable.
2147   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2148   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2149   EmitVarDecl(*IVDecl);
2150 
2151   // Emit the iterations count variable.
2152   // If it is not a variable, Sema decided to calculate iterations count on each
2153   // iteration (e.g., it is foldable into a constant).
2154   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2155     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2156     // Emit calculation of the iterations count.
2157     EmitIgnoredExpr(S.getCalcLastIteration());
2158   }
2159 
2160   auto &RT = CGM.getOpenMPRuntime();
2161 
2162   bool HasLastprivateClause;
2163   // Check pre-condition.
2164   {
2165     OMPLoopScope PreInitScope(*this, S);
2166     // Skip the entire loop if we don't meet the precondition.
2167     // If the condition constant folds and can be elided, avoid emitting the
2168     // whole loop.
2169     bool CondConstant;
2170     llvm::BasicBlock *ContBlock = nullptr;
2171     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2172       if (!CondConstant)
2173         return false;
2174     } else {
2175       auto *ThenBlock = createBasicBlock("omp.precond.then");
2176       ContBlock = createBasicBlock("omp.precond.end");
2177       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2178                   getProfileCount(&S));
2179       EmitBlock(ThenBlock);
2180       incrementProfileCounter(&S);
2181     }
2182 
2183     bool Ordered = false;
2184     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2185       if (OrderedClause->getNumForLoops())
2186         RT.emitDoacrossInit(*this, S);
2187       else
2188         Ordered = true;
2189     }
2190 
2191     llvm::DenseSet<const Expr *> EmittedFinals;
2192     emitAlignedClause(*this, S);
2193     bool HasLinears = EmitOMPLinearClauseInit(S);
2194     // Emit helper vars inits.
2195 
2196     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2197     LValue LB = Bounds.first;
2198     LValue UB = Bounds.second;
2199     LValue ST =
2200         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2201     LValue IL =
2202         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2203 
2204     // Emit 'then' code.
2205     {
2206       OMPPrivateScope LoopScope(*this);
2207       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2208         // Emit implicit barrier to synchronize threads and avoid data races on
2209         // initialization of firstprivate variables and post-update of
2210         // lastprivate variables.
2211         CGM.getOpenMPRuntime().emitBarrierCall(
2212             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2213             /*ForceSimpleCall=*/true);
2214       }
2215       EmitOMPPrivateClause(S, LoopScope);
2216       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2217       EmitOMPReductionClauseInit(S, LoopScope);
2218       EmitOMPPrivateLoopCounters(S, LoopScope);
2219       EmitOMPLinearClause(S, LoopScope);
2220       (void)LoopScope.Privatize();
2221 
2222       // Detect the loop schedule kind and chunk.
2223       llvm::Value *Chunk = nullptr;
2224       OpenMPScheduleTy ScheduleKind;
2225       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2226         ScheduleKind.Schedule = C->getScheduleKind();
2227         ScheduleKind.M1 = C->getFirstScheduleModifier();
2228         ScheduleKind.M2 = C->getSecondScheduleModifier();
2229         if (const auto *Ch = C->getChunkSize()) {
2230           Chunk = EmitScalarExpr(Ch);
2231           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2232                                        S.getIterationVariable()->getType(),
2233                                        S.getLocStart());
2234         }
2235       }
2236       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2237       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2238       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2239       // If the static schedule kind is specified or if the ordered clause is
2240       // specified, and if no monotonic modifier is specified, the effect will
2241       // be as if the monotonic modifier was specified.
2242       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2243                                 /* Chunked */ Chunk != nullptr) &&
2244           !Ordered) {
2245         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2246           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2247         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2248         // When no chunk_size is specified, the iteration space is divided into
2249         // chunks that are approximately equal in size, and at most one chunk is
2250         // distributed to each thread. Note that the size of the chunks is
2251         // unspecified in this case.
2252         CGOpenMPRuntime::StaticRTInput StaticInit(
2253             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2254             UB.getAddress(), ST.getAddress());
2255         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2256                              ScheduleKind, StaticInit);
2257         auto LoopExit =
2258             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2259         // UB = min(UB, GlobalUB);
2260         EmitIgnoredExpr(S.getEnsureUpperBound());
2261         // IV = LB;
2262         EmitIgnoredExpr(S.getInit());
2263         // while (idx <= UB) { BODY; ++idx; }
2264         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2265                          S.getInc(),
2266                          [&S, LoopExit](CodeGenFunction &CGF) {
2267                            CGF.EmitOMPLoopBody(S, LoopExit);
2268                            CGF.EmitStopPoint(&S);
2269                          },
2270                          [](CodeGenFunction &) {});
2271         EmitBlock(LoopExit.getBlock());
2272         // Tell the runtime we are done.
2273         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2274           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2275                                                          S.getDirectiveKind());
2276         };
2277         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2278       } else {
2279         const bool IsMonotonic =
2280             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2281             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2282             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2283             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2284         // Emit the outer loop, which requests its work chunk [LB..UB] from
2285         // runtime and runs the inner loop to process it.
2286         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2287                                              ST.getAddress(), IL.getAddress(),
2288                                              Chunk, EUB);
2289         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2290                             LoopArguments, CGDispatchBounds);
2291       }
2292       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2293         EmitOMPSimdFinal(S,
2294                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2295                            return CGF.Builder.CreateIsNotNull(
2296                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2297                          });
2298       }
2299       EmitOMPReductionClauseFinal(
2300           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2301                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2302                  : /*Parallel only*/ OMPD_parallel);
2303       // Emit post-update of the reduction variables if IsLastIter != 0.
2304       emitPostUpdateForReductionClause(
2305           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2306             return CGF.Builder.CreateIsNotNull(
2307                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2308           });
2309       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2310       if (HasLastprivateClause)
2311         EmitOMPLastprivateClauseFinal(
2312             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2313             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2314     }
2315     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2316       return CGF.Builder.CreateIsNotNull(
2317           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2318     });
2319     // We're now done with the loop, so jump to the continuation block.
2320     if (ContBlock) {
2321       EmitBranch(ContBlock);
2322       EmitBlock(ContBlock, true);
2323     }
2324   }
2325   return HasLastprivateClause;
2326 }
2327 
2328 /// The following two functions generate expressions for the loop lower
2329 /// and upper bounds in case of static and dynamic (dispatch) schedule
2330 /// of the associated 'for' or 'distribute' loop.
2331 static std::pair<LValue, LValue>
2332 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2333   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2334   LValue LB =
2335       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2336   LValue UB =
2337       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2338   return {LB, UB};
2339 }
2340 
2341 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2342 /// consider the lower and upper bound expressions generated by the
2343 /// worksharing loop support, but we use 0 and the iteration space size as
2344 /// constants
2345 static std::pair<llvm::Value *, llvm::Value *>
2346 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2347                           Address LB, Address UB) {
2348   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2349   const Expr *IVExpr = LS.getIterationVariable();
2350   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2351   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2352   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2353   return {LBVal, UBVal};
2354 }
2355 
2356 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2357   bool HasLastprivates = false;
2358   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2359                                           PrePostActionTy &) {
2360     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2361     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2362                                                  emitForLoopBounds,
2363                                                  emitDispatchForLoopBounds);
2364   };
2365   {
2366     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2367     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2368                                                 S.hasCancel());
2369   }
2370 
2371   // Emit an implicit barrier at the end.
2372   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2373     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2374   }
2375 }
2376 
2377 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2378   bool HasLastprivates = false;
2379   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2380                                           PrePostActionTy &) {
2381     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2382                                                  emitForLoopBounds,
2383                                                  emitDispatchForLoopBounds);
2384   };
2385   {
2386     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2387     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2388   }
2389 
2390   // Emit an implicit barrier at the end.
2391   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2392     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2393   }
2394 }
2395 
2396 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2397                                 const Twine &Name,
2398                                 llvm::Value *Init = nullptr) {
2399   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2400   if (Init)
2401     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2402   return LVal;
2403 }
2404 
2405 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2406   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2407   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2408   bool HasLastprivates = false;
2409   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2410                                                     PrePostActionTy &) {
2411     auto &C = CGF.CGM.getContext();
2412     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2413     // Emit helper vars inits.
2414     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2415                                   CGF.Builder.getInt32(0));
2416     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2417                                       : CGF.Builder.getInt32(0);
2418     LValue UB =
2419         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2420     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2421                                   CGF.Builder.getInt32(1));
2422     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2423                                   CGF.Builder.getInt32(0));
2424     // Loop counter.
2425     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2426     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2427     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2428     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2429     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2430     // Generate condition for loop.
2431     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2432                         OK_Ordinary, S.getLocStart(), FPOptions());
2433     // Increment for loop counter.
2434     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2435                       S.getLocStart());
2436     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2437       // Iterate through all sections and emit a switch construct:
2438       // switch (IV) {
2439       //   case 0:
2440       //     <SectionStmt[0]>;
2441       //     break;
2442       // ...
2443       //   case <NumSection> - 1:
2444       //     <SectionStmt[<NumSection> - 1]>;
2445       //     break;
2446       // }
2447       // .omp.sections.exit:
2448       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2449       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2450           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2451           CS == nullptr ? 1 : CS->size());
2452       if (CS) {
2453         unsigned CaseNumber = 0;
2454         for (auto *SubStmt : CS->children()) {
2455           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2456           CGF.EmitBlock(CaseBB);
2457           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2458           CGF.EmitStmt(SubStmt);
2459           CGF.EmitBranch(ExitBB);
2460           ++CaseNumber;
2461         }
2462       } else {
2463         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2464         CGF.EmitBlock(CaseBB);
2465         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2466         CGF.EmitStmt(Stmt);
2467         CGF.EmitBranch(ExitBB);
2468       }
2469       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2470     };
2471 
2472     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2473     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2474       // Emit implicit barrier to synchronize threads and avoid data races on
2475       // initialization of firstprivate variables and post-update of lastprivate
2476       // variables.
2477       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2478           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2479           /*ForceSimpleCall=*/true);
2480     }
2481     CGF.EmitOMPPrivateClause(S, LoopScope);
2482     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2483     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2484     (void)LoopScope.Privatize();
2485 
2486     // Emit static non-chunked loop.
2487     OpenMPScheduleTy ScheduleKind;
2488     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2489     CGOpenMPRuntime::StaticRTInput StaticInit(
2490         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2491         LB.getAddress(), UB.getAddress(), ST.getAddress());
2492     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2493         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2494     // UB = min(UB, GlobalUB);
2495     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2496     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2497         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2498     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2499     // IV = LB;
2500     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2501     // while (idx <= UB) { BODY; ++idx; }
2502     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2503                          [](CodeGenFunction &) {});
2504     // Tell the runtime we are done.
2505     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2506       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2507                                                      S.getDirectiveKind());
2508     };
2509     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2510     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2511     // Emit post-update of the reduction variables if IsLastIter != 0.
2512     emitPostUpdateForReductionClause(
2513         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2514           return CGF.Builder.CreateIsNotNull(
2515               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2516         });
2517 
2518     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2519     if (HasLastprivates)
2520       CGF.EmitOMPLastprivateClauseFinal(
2521           S, /*NoFinals=*/false,
2522           CGF.Builder.CreateIsNotNull(
2523               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2524   };
2525 
2526   bool HasCancel = false;
2527   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2528     HasCancel = OSD->hasCancel();
2529   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2530     HasCancel = OPSD->hasCancel();
2531   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2532   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2533                                               HasCancel);
2534   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2535   // clause. Otherwise the barrier will be generated by the codegen for the
2536   // directive.
2537   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2538     // Emit implicit barrier to synchronize threads and avoid data races on
2539     // initialization of firstprivate variables.
2540     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2541                                            OMPD_unknown);
2542   }
2543 }
2544 
2545 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2546   {
2547     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2548     EmitSections(S);
2549   }
2550   // Emit an implicit barrier at the end.
2551   if (!S.getSingleClause<OMPNowaitClause>()) {
2552     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2553                                            OMPD_sections);
2554   }
2555 }
2556 
2557 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2558   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2559     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2560   };
2561   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2562   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2563                                               S.hasCancel());
2564 }
2565 
2566 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2567   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2568   llvm::SmallVector<const Expr *, 8> DestExprs;
2569   llvm::SmallVector<const Expr *, 8> SrcExprs;
2570   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2571   // Check if there are any 'copyprivate' clauses associated with this
2572   // 'single' construct.
2573   // Build a list of copyprivate variables along with helper expressions
2574   // (<source>, <destination>, <destination>=<source> expressions)
2575   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2576     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2577     DestExprs.append(C->destination_exprs().begin(),
2578                      C->destination_exprs().end());
2579     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2580     AssignmentOps.append(C->assignment_ops().begin(),
2581                          C->assignment_ops().end());
2582   }
2583   // Emit code for 'single' region along with 'copyprivate' clauses
2584   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2585     Action.Enter(CGF);
2586     OMPPrivateScope SingleScope(CGF);
2587     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2588     CGF.EmitOMPPrivateClause(S, SingleScope);
2589     (void)SingleScope.Privatize();
2590     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2591   };
2592   {
2593     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2594     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2595                                             CopyprivateVars, DestExprs,
2596                                             SrcExprs, AssignmentOps);
2597   }
2598   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2599   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2600   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2601     CGM.getOpenMPRuntime().emitBarrierCall(
2602         *this, S.getLocStart(),
2603         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2604   }
2605 }
2606 
2607 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2608   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2609     Action.Enter(CGF);
2610     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2611   };
2612   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2613   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2614 }
2615 
2616 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2617   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2618     Action.Enter(CGF);
2619     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2620   };
2621   Expr *Hint = nullptr;
2622   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2623     Hint = HintClause->getHint();
2624   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2625   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2626                                             S.getDirectiveName().getAsString(),
2627                                             CodeGen, S.getLocStart(), Hint);
2628 }
2629 
2630 void CodeGenFunction::EmitOMPParallelForDirective(
2631     const OMPParallelForDirective &S) {
2632   // Emit directive as a combined directive that consists of two implicit
2633   // directives: 'parallel' with 'for' directive.
2634   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2635     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2636     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2637                                emitDispatchForLoopBounds);
2638   };
2639   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2640                                  emitEmptyBoundParameters);
2641 }
2642 
2643 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2644     const OMPParallelForSimdDirective &S) {
2645   // Emit directive as a combined directive that consists of two implicit
2646   // directives: 'parallel' with 'for' directive.
2647   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2648     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2649                                emitDispatchForLoopBounds);
2650   };
2651   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2652                                  emitEmptyBoundParameters);
2653 }
2654 
2655 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2656     const OMPParallelSectionsDirective &S) {
2657   // Emit directive as a combined directive that consists of two implicit
2658   // directives: 'parallel' with 'sections' directive.
2659   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2660     CGF.EmitSections(S);
2661   };
2662   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2663                                  emitEmptyBoundParameters);
2664 }
2665 
2666 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2667                                                 const RegionCodeGenTy &BodyGen,
2668                                                 const TaskGenTy &TaskGen,
2669                                                 OMPTaskDataTy &Data) {
2670   // Emit outlined function for task construct.
2671   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2672   auto *I = CS->getCapturedDecl()->param_begin();
2673   auto *PartId = std::next(I);
2674   auto *TaskT = std::next(I, 4);
2675   // Check if the task is final
2676   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2677     // If the condition constant folds and can be elided, try to avoid emitting
2678     // the condition and the dead arm of the if/else.
2679     auto *Cond = Clause->getCondition();
2680     bool CondConstant;
2681     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2682       Data.Final.setInt(CondConstant);
2683     else
2684       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2685   } else {
2686     // By default the task is not final.
2687     Data.Final.setInt(/*IntVal=*/false);
2688   }
2689   // Check if the task has 'priority' clause.
2690   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2691     auto *Prio = Clause->getPriority();
2692     Data.Priority.setInt(/*IntVal=*/true);
2693     Data.Priority.setPointer(EmitScalarConversion(
2694         EmitScalarExpr(Prio), Prio->getType(),
2695         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2696         Prio->getExprLoc()));
2697   }
2698   // The first function argument for tasks is a thread id, the second one is a
2699   // part id (0 for tied tasks, >=0 for untied task).
2700   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2701   // Get list of private variables.
2702   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2703     auto IRef = C->varlist_begin();
2704     for (auto *IInit : C->private_copies()) {
2705       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2706       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2707         Data.PrivateVars.push_back(*IRef);
2708         Data.PrivateCopies.push_back(IInit);
2709       }
2710       ++IRef;
2711     }
2712   }
2713   EmittedAsPrivate.clear();
2714   // Get list of firstprivate variables.
2715   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2716     auto IRef = C->varlist_begin();
2717     auto IElemInitRef = C->inits().begin();
2718     for (auto *IInit : C->private_copies()) {
2719       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2720       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2721         Data.FirstprivateVars.push_back(*IRef);
2722         Data.FirstprivateCopies.push_back(IInit);
2723         Data.FirstprivateInits.push_back(*IElemInitRef);
2724       }
2725       ++IRef;
2726       ++IElemInitRef;
2727     }
2728   }
2729   // Get list of lastprivate variables (for taskloops).
2730   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2731   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2732     auto IRef = C->varlist_begin();
2733     auto ID = C->destination_exprs().begin();
2734     for (auto *IInit : C->private_copies()) {
2735       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2736       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2737         Data.LastprivateVars.push_back(*IRef);
2738         Data.LastprivateCopies.push_back(IInit);
2739       }
2740       LastprivateDstsOrigs.insert(
2741           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2742            cast<DeclRefExpr>(*IRef)});
2743       ++IRef;
2744       ++ID;
2745     }
2746   }
2747   SmallVector<const Expr *, 4> LHSs;
2748   SmallVector<const Expr *, 4> RHSs;
2749   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2750     auto IPriv = C->privates().begin();
2751     auto IRed = C->reduction_ops().begin();
2752     auto ILHS = C->lhs_exprs().begin();
2753     auto IRHS = C->rhs_exprs().begin();
2754     for (const auto *Ref : C->varlists()) {
2755       Data.ReductionVars.emplace_back(Ref);
2756       Data.ReductionCopies.emplace_back(*IPriv);
2757       Data.ReductionOps.emplace_back(*IRed);
2758       LHSs.emplace_back(*ILHS);
2759       RHSs.emplace_back(*IRHS);
2760       std::advance(IPriv, 1);
2761       std::advance(IRed, 1);
2762       std::advance(ILHS, 1);
2763       std::advance(IRHS, 1);
2764     }
2765   }
2766   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2767       *this, S.getLocStart(), LHSs, RHSs, Data);
2768   // Build list of dependences.
2769   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2770     for (auto *IRef : C->varlists())
2771       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2772   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2773       CodeGenFunction &CGF, PrePostActionTy &Action) {
2774     // Set proper addresses for generated private copies.
2775     OMPPrivateScope Scope(CGF);
2776     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2777         !Data.LastprivateVars.empty()) {
2778       enum { PrivatesParam = 2, CopyFnParam = 3 };
2779       auto *CopyFn = CGF.Builder.CreateLoad(
2780           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2781       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2782           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2783       // Map privates.
2784       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2785       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2786       CallArgs.push_back(PrivatesPtr);
2787       for (auto *E : Data.PrivateVars) {
2788         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2789         Address PrivatePtr = CGF.CreateMemTemp(
2790             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2791         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2792         CallArgs.push_back(PrivatePtr.getPointer());
2793       }
2794       for (auto *E : Data.FirstprivateVars) {
2795         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2796         Address PrivatePtr =
2797             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2798                               ".firstpriv.ptr.addr");
2799         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2800         CallArgs.push_back(PrivatePtr.getPointer());
2801       }
2802       for (auto *E : Data.LastprivateVars) {
2803         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2804         Address PrivatePtr =
2805             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2806                               ".lastpriv.ptr.addr");
2807         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2808         CallArgs.push_back(PrivatePtr.getPointer());
2809       }
2810       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2811                                                           CopyFn, CallArgs);
2812       for (auto &&Pair : LastprivateDstsOrigs) {
2813         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2814         DeclRefExpr DRE(
2815             const_cast<VarDecl *>(OrigVD),
2816             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2817                 OrigVD) != nullptr,
2818             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2819         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2820           return CGF.EmitLValue(&DRE).getAddress();
2821         });
2822       }
2823       for (auto &&Pair : PrivatePtrs) {
2824         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2825                             CGF.getContext().getDeclAlign(Pair.first));
2826         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2827       }
2828     }
2829     if (Data.Reductions) {
2830       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2831       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2832                              Data.ReductionOps);
2833       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2834           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2835       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2836         RedCG.emitSharedLValue(CGF, Cnt);
2837         RedCG.emitAggregateType(CGF, Cnt);
2838         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2839             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2840         Replacement =
2841             Address(CGF.EmitScalarConversion(
2842                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2843                         CGF.getContext().getPointerType(
2844                             Data.ReductionCopies[Cnt]->getType()),
2845                         SourceLocation()),
2846                     Replacement.getAlignment());
2847         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2848         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2849                          [Replacement]() { return Replacement; });
2850         // FIXME: This must removed once the runtime library is fixed.
2851         // Emit required threadprivate variables for
2852         // initilizer/combiner/finalizer.
2853         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2854                                                            RedCG, Cnt);
2855       }
2856     }
2857     // Privatize all private variables except for in_reduction items.
2858     (void)Scope.Privatize();
2859     SmallVector<const Expr *, 4> InRedVars;
2860     SmallVector<const Expr *, 4> InRedPrivs;
2861     SmallVector<const Expr *, 4> InRedOps;
2862     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2863     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2864       auto IPriv = C->privates().begin();
2865       auto IRed = C->reduction_ops().begin();
2866       auto ITD = C->taskgroup_descriptors().begin();
2867       for (const auto *Ref : C->varlists()) {
2868         InRedVars.emplace_back(Ref);
2869         InRedPrivs.emplace_back(*IPriv);
2870         InRedOps.emplace_back(*IRed);
2871         TaskgroupDescriptors.emplace_back(*ITD);
2872         std::advance(IPriv, 1);
2873         std::advance(IRed, 1);
2874         std::advance(ITD, 1);
2875       }
2876     }
2877     // Privatize in_reduction items here, because taskgroup descriptors must be
2878     // privatized earlier.
2879     OMPPrivateScope InRedScope(CGF);
2880     if (!InRedVars.empty()) {
2881       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2882       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2883         RedCG.emitSharedLValue(CGF, Cnt);
2884         RedCG.emitAggregateType(CGF, Cnt);
2885         // The taskgroup descriptor variable is always implicit firstprivate and
2886         // privatized already during procoessing of the firstprivates.
2887         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2888             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2889         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2890             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2891         Replacement = Address(
2892             CGF.EmitScalarConversion(
2893                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2894                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2895                 SourceLocation()),
2896             Replacement.getAlignment());
2897         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2898         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2899                               [Replacement]() { return Replacement; });
2900         // FIXME: This must removed once the runtime library is fixed.
2901         // Emit required threadprivate variables for
2902         // initilizer/combiner/finalizer.
2903         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2904                                                            RedCG, Cnt);
2905       }
2906     }
2907     (void)InRedScope.Privatize();
2908 
2909     Action.Enter(CGF);
2910     BodyGen(CGF);
2911   };
2912   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2913       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2914       Data.NumberOfParts);
2915   OMPLexicalScope Scope(*this, S);
2916   TaskGen(*this, OutlinedFn, Data);
2917 }
2918 
2919 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2920   // Emit outlined function for task construct.
2921   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2922   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2923   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2924   const Expr *IfCond = nullptr;
2925   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2926     if (C->getNameModifier() == OMPD_unknown ||
2927         C->getNameModifier() == OMPD_task) {
2928       IfCond = C->getCondition();
2929       break;
2930     }
2931   }
2932 
2933   OMPTaskDataTy Data;
2934   // Check if we should emit tied or untied task.
2935   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2936   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2937     CGF.EmitStmt(CS->getCapturedStmt());
2938   };
2939   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2940                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2941                             const OMPTaskDataTy &Data) {
2942     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2943                                             SharedsTy, CapturedStruct, IfCond,
2944                                             Data);
2945   };
2946   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2947 }
2948 
2949 void CodeGenFunction::EmitOMPTaskyieldDirective(
2950     const OMPTaskyieldDirective &S) {
2951   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2952 }
2953 
2954 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2955   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2956 }
2957 
2958 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2959   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2960 }
2961 
2962 void CodeGenFunction::EmitOMPTaskgroupDirective(
2963     const OMPTaskgroupDirective &S) {
2964   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2965     Action.Enter(CGF);
2966     if (const Expr *E = S.getReductionRef()) {
2967       SmallVector<const Expr *, 4> LHSs;
2968       SmallVector<const Expr *, 4> RHSs;
2969       OMPTaskDataTy Data;
2970       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2971         auto IPriv = C->privates().begin();
2972         auto IRed = C->reduction_ops().begin();
2973         auto ILHS = C->lhs_exprs().begin();
2974         auto IRHS = C->rhs_exprs().begin();
2975         for (const auto *Ref : C->varlists()) {
2976           Data.ReductionVars.emplace_back(Ref);
2977           Data.ReductionCopies.emplace_back(*IPriv);
2978           Data.ReductionOps.emplace_back(*IRed);
2979           LHSs.emplace_back(*ILHS);
2980           RHSs.emplace_back(*IRHS);
2981           std::advance(IPriv, 1);
2982           std::advance(IRed, 1);
2983           std::advance(ILHS, 1);
2984           std::advance(IRHS, 1);
2985         }
2986       }
2987       llvm::Value *ReductionDesc =
2988           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
2989                                                            LHSs, RHSs, Data);
2990       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2991       CGF.EmitVarDecl(*VD);
2992       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
2993                             /*Volatile=*/false, E->getType());
2994     }
2995     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2996   };
2997   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2998   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2999 }
3000 
3001 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3002   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3003     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3004       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3005                                 FlushClause->varlist_end());
3006     }
3007     return llvm::None;
3008   }(), S.getLocStart());
3009 }
3010 
3011 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3012                                             const CodeGenLoopTy &CodeGenLoop,
3013                                             Expr *IncExpr) {
3014   // Emit the loop iteration variable.
3015   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3016   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3017   EmitVarDecl(*IVDecl);
3018 
3019   // Emit the iterations count variable.
3020   // If it is not a variable, Sema decided to calculate iterations count on each
3021   // iteration (e.g., it is foldable into a constant).
3022   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3023     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3024     // Emit calculation of the iterations count.
3025     EmitIgnoredExpr(S.getCalcLastIteration());
3026   }
3027 
3028   auto &RT = CGM.getOpenMPRuntime();
3029 
3030   bool HasLastprivateClause = false;
3031   // Check pre-condition.
3032   {
3033     OMPLoopScope PreInitScope(*this, S);
3034     // Skip the entire loop if we don't meet the precondition.
3035     // If the condition constant folds and can be elided, avoid emitting the
3036     // whole loop.
3037     bool CondConstant;
3038     llvm::BasicBlock *ContBlock = nullptr;
3039     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3040       if (!CondConstant)
3041         return;
3042     } else {
3043       auto *ThenBlock = createBasicBlock("omp.precond.then");
3044       ContBlock = createBasicBlock("omp.precond.end");
3045       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3046                   getProfileCount(&S));
3047       EmitBlock(ThenBlock);
3048       incrementProfileCounter(&S);
3049     }
3050 
3051     // Emit 'then' code.
3052     {
3053       // Emit helper vars inits.
3054 
3055       LValue LB = EmitOMPHelperVar(
3056           *this, cast<DeclRefExpr>(
3057                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3058                           ? S.getCombinedLowerBoundVariable()
3059                           : S.getLowerBoundVariable())));
3060       LValue UB = EmitOMPHelperVar(
3061           *this, cast<DeclRefExpr>(
3062                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3063                           ? S.getCombinedUpperBoundVariable()
3064                           : S.getUpperBoundVariable())));
3065       LValue ST =
3066           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3067       LValue IL =
3068           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3069 
3070       OMPPrivateScope LoopScope(*this);
3071       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3072         // Emit implicit barrier to synchronize threads and avoid data races on
3073         // initialization of firstprivate variables and post-update of
3074         // lastprivate variables.
3075         CGM.getOpenMPRuntime().emitBarrierCall(
3076           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3077           /*ForceSimpleCall=*/true);
3078       }
3079       EmitOMPPrivateClause(S, LoopScope);
3080       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3081       EmitOMPPrivateLoopCounters(S, LoopScope);
3082       (void)LoopScope.Privatize();
3083 
3084       // Detect the distribute schedule kind and chunk.
3085       llvm::Value *Chunk = nullptr;
3086       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3087       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3088         ScheduleKind = C->getDistScheduleKind();
3089         if (const auto *Ch = C->getChunkSize()) {
3090           Chunk = EmitScalarExpr(Ch);
3091           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3092           S.getIterationVariable()->getType(),
3093           S.getLocStart());
3094         }
3095       }
3096       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3097       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3098 
3099       // OpenMP [2.10.8, distribute Construct, Description]
3100       // If dist_schedule is specified, kind must be static. If specified,
3101       // iterations are divided into chunks of size chunk_size, chunks are
3102       // assigned to the teams of the league in a round-robin fashion in the
3103       // order of the team number. When no chunk_size is specified, the
3104       // iteration space is divided into chunks that are approximately equal
3105       // in size, and at most one chunk is distributed to each team of the
3106       // league. The size of the chunks is unspecified in this case.
3107       if (RT.isStaticNonchunked(ScheduleKind,
3108                                 /* Chunked */ Chunk != nullptr)) {
3109         CGOpenMPRuntime::StaticRTInput StaticInit(
3110             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3111             LB.getAddress(), UB.getAddress(), ST.getAddress());
3112         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3113                                     StaticInit);
3114         auto LoopExit =
3115             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3116         // UB = min(UB, GlobalUB);
3117         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3118                             ? S.getCombinedEnsureUpperBound()
3119                             : S.getEnsureUpperBound());
3120         // IV = LB;
3121         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3122                             ? S.getCombinedInit()
3123                             : S.getInit());
3124 
3125         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3126                          ? S.getCombinedCond()
3127                          : S.getCond();
3128 
3129         // for distribute alone,  codegen
3130         // while (idx <= UB) { BODY; ++idx; }
3131         // when combined with 'for' (e.g. as in 'distribute parallel for')
3132         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3133         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3134                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3135                            CodeGenLoop(CGF, S, LoopExit);
3136                          },
3137                          [](CodeGenFunction &) {});
3138         EmitBlock(LoopExit.getBlock());
3139         // Tell the runtime we are done.
3140         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3141       } else {
3142         // Emit the outer loop, which requests its work chunk [LB..UB] from
3143         // runtime and runs the inner loop to process it.
3144         const OMPLoopArguments LoopArguments = {
3145             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3146             Chunk};
3147         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3148                                    CodeGenLoop);
3149       }
3150 
3151       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3152       if (HasLastprivateClause)
3153         EmitOMPLastprivateClauseFinal(
3154             S, /*NoFinals=*/false,
3155             Builder.CreateIsNotNull(
3156                 EmitLoadOfScalar(IL, S.getLocStart())));
3157     }
3158 
3159     // We're now done with the loop, so jump to the continuation block.
3160     if (ContBlock) {
3161       EmitBranch(ContBlock);
3162       EmitBlock(ContBlock, true);
3163     }
3164   }
3165 }
3166 
3167 void CodeGenFunction::EmitOMPDistributeDirective(
3168     const OMPDistributeDirective &S) {
3169   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3170 
3171     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3172   };
3173   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3174   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3175                                               false);
3176 }
3177 
3178 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3179                                                    const CapturedStmt *S) {
3180   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3181   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3182   CGF.CapturedStmtInfo = &CapStmtInfo;
3183   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3184   Fn->addFnAttr(llvm::Attribute::NoInline);
3185   return Fn;
3186 }
3187 
3188 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3189   if (!S.getAssociatedStmt()) {
3190     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3191       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3192     return;
3193   }
3194   auto *C = S.getSingleClause<OMPSIMDClause>();
3195   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3196                                  PrePostActionTy &Action) {
3197     if (C) {
3198       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3199       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3200       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3201       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3202       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3203                                                       OutlinedFn, CapturedVars);
3204     } else {
3205       Action.Enter(CGF);
3206       CGF.EmitStmt(
3207           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3208     }
3209   };
3210   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3211   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3212 }
3213 
3214 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3215                                          QualType SrcType, QualType DestType,
3216                                          SourceLocation Loc) {
3217   assert(CGF.hasScalarEvaluationKind(DestType) &&
3218          "DestType must have scalar evaluation kind.");
3219   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3220   return Val.isScalar()
3221              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3222                                         Loc)
3223              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3224                                                  DestType, Loc);
3225 }
3226 
3227 static CodeGenFunction::ComplexPairTy
3228 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3229                       QualType DestType, SourceLocation Loc) {
3230   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3231          "DestType must have complex evaluation kind.");
3232   CodeGenFunction::ComplexPairTy ComplexVal;
3233   if (Val.isScalar()) {
3234     // Convert the input element to the element type of the complex.
3235     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3236     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3237                                               DestElementType, Loc);
3238     ComplexVal = CodeGenFunction::ComplexPairTy(
3239         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3240   } else {
3241     assert(Val.isComplex() && "Must be a scalar or complex.");
3242     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3243     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3244     ComplexVal.first = CGF.EmitScalarConversion(
3245         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3246     ComplexVal.second = CGF.EmitScalarConversion(
3247         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3248   }
3249   return ComplexVal;
3250 }
3251 
3252 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3253                                   LValue LVal, RValue RVal) {
3254   if (LVal.isGlobalReg()) {
3255     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3256   } else {
3257     CGF.EmitAtomicStore(RVal, LVal,
3258                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3259                                  : llvm::AtomicOrdering::Monotonic,
3260                         LVal.isVolatile(), /*IsInit=*/false);
3261   }
3262 }
3263 
3264 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3265                                          QualType RValTy, SourceLocation Loc) {
3266   switch (getEvaluationKind(LVal.getType())) {
3267   case TEK_Scalar:
3268     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3269                                *this, RVal, RValTy, LVal.getType(), Loc)),
3270                            LVal);
3271     break;
3272   case TEK_Complex:
3273     EmitStoreOfComplex(
3274         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3275         /*isInit=*/false);
3276     break;
3277   case TEK_Aggregate:
3278     llvm_unreachable("Must be a scalar or complex.");
3279   }
3280 }
3281 
3282 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3283                                   const Expr *X, const Expr *V,
3284                                   SourceLocation Loc) {
3285   // v = x;
3286   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3287   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3288   LValue XLValue = CGF.EmitLValue(X);
3289   LValue VLValue = CGF.EmitLValue(V);
3290   RValue Res = XLValue.isGlobalReg()
3291                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3292                    : CGF.EmitAtomicLoad(
3293                          XLValue, Loc,
3294                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3295                                   : llvm::AtomicOrdering::Monotonic,
3296                          XLValue.isVolatile());
3297   // OpenMP, 2.12.6, atomic Construct
3298   // Any atomic construct with a seq_cst clause forces the atomically
3299   // performed operation to include an implicit flush operation without a
3300   // list.
3301   if (IsSeqCst)
3302     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3303   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3304 }
3305 
3306 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3307                                    const Expr *X, const Expr *E,
3308                                    SourceLocation Loc) {
3309   // x = expr;
3310   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3311   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3312   // OpenMP, 2.12.6, atomic Construct
3313   // Any atomic construct with a seq_cst clause forces the atomically
3314   // performed operation to include an implicit flush operation without a
3315   // list.
3316   if (IsSeqCst)
3317     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3318 }
3319 
3320 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3321                                                 RValue Update,
3322                                                 BinaryOperatorKind BO,
3323                                                 llvm::AtomicOrdering AO,
3324                                                 bool IsXLHSInRHSPart) {
3325   auto &Context = CGF.CGM.getContext();
3326   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3327   // expression is simple and atomic is allowed for the given type for the
3328   // target platform.
3329   if (BO == BO_Comma || !Update.isScalar() ||
3330       !Update.getScalarVal()->getType()->isIntegerTy() ||
3331       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3332                         (Update.getScalarVal()->getType() !=
3333                          X.getAddress().getElementType())) ||
3334       !X.getAddress().getElementType()->isIntegerTy() ||
3335       !Context.getTargetInfo().hasBuiltinAtomic(
3336           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3337     return std::make_pair(false, RValue::get(nullptr));
3338 
3339   llvm::AtomicRMWInst::BinOp RMWOp;
3340   switch (BO) {
3341   case BO_Add:
3342     RMWOp = llvm::AtomicRMWInst::Add;
3343     break;
3344   case BO_Sub:
3345     if (!IsXLHSInRHSPart)
3346       return std::make_pair(false, RValue::get(nullptr));
3347     RMWOp = llvm::AtomicRMWInst::Sub;
3348     break;
3349   case BO_And:
3350     RMWOp = llvm::AtomicRMWInst::And;
3351     break;
3352   case BO_Or:
3353     RMWOp = llvm::AtomicRMWInst::Or;
3354     break;
3355   case BO_Xor:
3356     RMWOp = llvm::AtomicRMWInst::Xor;
3357     break;
3358   case BO_LT:
3359     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3360                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3361                                    : llvm::AtomicRMWInst::Max)
3362                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3363                                    : llvm::AtomicRMWInst::UMax);
3364     break;
3365   case BO_GT:
3366     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3367                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3368                                    : llvm::AtomicRMWInst::Min)
3369                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3370                                    : llvm::AtomicRMWInst::UMin);
3371     break;
3372   case BO_Assign:
3373     RMWOp = llvm::AtomicRMWInst::Xchg;
3374     break;
3375   case BO_Mul:
3376   case BO_Div:
3377   case BO_Rem:
3378   case BO_Shl:
3379   case BO_Shr:
3380   case BO_LAnd:
3381   case BO_LOr:
3382     return std::make_pair(false, RValue::get(nullptr));
3383   case BO_PtrMemD:
3384   case BO_PtrMemI:
3385   case BO_LE:
3386   case BO_GE:
3387   case BO_EQ:
3388   case BO_NE:
3389   case BO_AddAssign:
3390   case BO_SubAssign:
3391   case BO_AndAssign:
3392   case BO_OrAssign:
3393   case BO_XorAssign:
3394   case BO_MulAssign:
3395   case BO_DivAssign:
3396   case BO_RemAssign:
3397   case BO_ShlAssign:
3398   case BO_ShrAssign:
3399   case BO_Comma:
3400     llvm_unreachable("Unsupported atomic update operation");
3401   }
3402   auto *UpdateVal = Update.getScalarVal();
3403   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3404     UpdateVal = CGF.Builder.CreateIntCast(
3405         IC, X.getAddress().getElementType(),
3406         X.getType()->hasSignedIntegerRepresentation());
3407   }
3408   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3409   return std::make_pair(true, RValue::get(Res));
3410 }
3411 
3412 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3413     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3414     llvm::AtomicOrdering AO, SourceLocation Loc,
3415     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3416   // Update expressions are allowed to have the following forms:
3417   // x binop= expr; -> xrval + expr;
3418   // x++, ++x -> xrval + 1;
3419   // x--, --x -> xrval - 1;
3420   // x = x binop expr; -> xrval binop expr
3421   // x = expr Op x; - > expr binop xrval;
3422   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3423   if (!Res.first) {
3424     if (X.isGlobalReg()) {
3425       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3426       // 'xrval'.
3427       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3428     } else {
3429       // Perform compare-and-swap procedure.
3430       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3431     }
3432   }
3433   return Res;
3434 }
3435 
3436 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3437                                     const Expr *X, const Expr *E,
3438                                     const Expr *UE, bool IsXLHSInRHSPart,
3439                                     SourceLocation Loc) {
3440   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3441          "Update expr in 'atomic update' must be a binary operator.");
3442   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3443   // Update expressions are allowed to have the following forms:
3444   // x binop= expr; -> xrval + expr;
3445   // x++, ++x -> xrval + 1;
3446   // x--, --x -> xrval - 1;
3447   // x = x binop expr; -> xrval binop expr
3448   // x = expr Op x; - > expr binop xrval;
3449   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3450   LValue XLValue = CGF.EmitLValue(X);
3451   RValue ExprRValue = CGF.EmitAnyExpr(E);
3452   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3453                      : llvm::AtomicOrdering::Monotonic;
3454   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3455   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3456   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3457   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3458   auto Gen =
3459       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3460         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3461         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3462         return CGF.EmitAnyExpr(UE);
3463       };
3464   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3465       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3466   // OpenMP, 2.12.6, atomic Construct
3467   // Any atomic construct with a seq_cst clause forces the atomically
3468   // performed operation to include an implicit flush operation without a
3469   // list.
3470   if (IsSeqCst)
3471     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3472 }
3473 
3474 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3475                             QualType SourceType, QualType ResType,
3476                             SourceLocation Loc) {
3477   switch (CGF.getEvaluationKind(ResType)) {
3478   case TEK_Scalar:
3479     return RValue::get(
3480         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3481   case TEK_Complex: {
3482     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3483     return RValue::getComplex(Res.first, Res.second);
3484   }
3485   case TEK_Aggregate:
3486     break;
3487   }
3488   llvm_unreachable("Must be a scalar or complex.");
3489 }
3490 
3491 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3492                                      bool IsPostfixUpdate, const Expr *V,
3493                                      const Expr *X, const Expr *E,
3494                                      const Expr *UE, bool IsXLHSInRHSPart,
3495                                      SourceLocation Loc) {
3496   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3497   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3498   RValue NewVVal;
3499   LValue VLValue = CGF.EmitLValue(V);
3500   LValue XLValue = CGF.EmitLValue(X);
3501   RValue ExprRValue = CGF.EmitAnyExpr(E);
3502   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3503                      : llvm::AtomicOrdering::Monotonic;
3504   QualType NewVValType;
3505   if (UE) {
3506     // 'x' is updated with some additional value.
3507     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3508            "Update expr in 'atomic capture' must be a binary operator.");
3509     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3510     // Update expressions are allowed to have the following forms:
3511     // x binop= expr; -> xrval + expr;
3512     // x++, ++x -> xrval + 1;
3513     // x--, --x -> xrval - 1;
3514     // x = x binop expr; -> xrval binop expr
3515     // x = expr Op x; - > expr binop xrval;
3516     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3517     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3518     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3519     NewVValType = XRValExpr->getType();
3520     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3521     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3522                   IsPostfixUpdate](RValue XRValue) -> RValue {
3523       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3524       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3525       RValue Res = CGF.EmitAnyExpr(UE);
3526       NewVVal = IsPostfixUpdate ? XRValue : Res;
3527       return Res;
3528     };
3529     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3530         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3531     if (Res.first) {
3532       // 'atomicrmw' instruction was generated.
3533       if (IsPostfixUpdate) {
3534         // Use old value from 'atomicrmw'.
3535         NewVVal = Res.second;
3536       } else {
3537         // 'atomicrmw' does not provide new value, so evaluate it using old
3538         // value of 'x'.
3539         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3540         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3541         NewVVal = CGF.EmitAnyExpr(UE);
3542       }
3543     }
3544   } else {
3545     // 'x' is simply rewritten with some 'expr'.
3546     NewVValType = X->getType().getNonReferenceType();
3547     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3548                                X->getType().getNonReferenceType(), Loc);
3549     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3550       NewVVal = XRValue;
3551       return ExprRValue;
3552     };
3553     // Try to perform atomicrmw xchg, otherwise simple exchange.
3554     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3555         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3556         Loc, Gen);
3557     if (Res.first) {
3558       // 'atomicrmw' instruction was generated.
3559       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3560     }
3561   }
3562   // Emit post-update store to 'v' of old/new 'x' value.
3563   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3564   // OpenMP, 2.12.6, atomic Construct
3565   // Any atomic construct with a seq_cst clause forces the atomically
3566   // performed operation to include an implicit flush operation without a
3567   // list.
3568   if (IsSeqCst)
3569     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3570 }
3571 
3572 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3573                               bool IsSeqCst, bool IsPostfixUpdate,
3574                               const Expr *X, const Expr *V, const Expr *E,
3575                               const Expr *UE, bool IsXLHSInRHSPart,
3576                               SourceLocation Loc) {
3577   switch (Kind) {
3578   case OMPC_read:
3579     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3580     break;
3581   case OMPC_write:
3582     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3583     break;
3584   case OMPC_unknown:
3585   case OMPC_update:
3586     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3587     break;
3588   case OMPC_capture:
3589     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3590                              IsXLHSInRHSPart, Loc);
3591     break;
3592   case OMPC_if:
3593   case OMPC_final:
3594   case OMPC_num_threads:
3595   case OMPC_private:
3596   case OMPC_firstprivate:
3597   case OMPC_lastprivate:
3598   case OMPC_reduction:
3599   case OMPC_task_reduction:
3600   case OMPC_in_reduction:
3601   case OMPC_safelen:
3602   case OMPC_simdlen:
3603   case OMPC_collapse:
3604   case OMPC_default:
3605   case OMPC_seq_cst:
3606   case OMPC_shared:
3607   case OMPC_linear:
3608   case OMPC_aligned:
3609   case OMPC_copyin:
3610   case OMPC_copyprivate:
3611   case OMPC_flush:
3612   case OMPC_proc_bind:
3613   case OMPC_schedule:
3614   case OMPC_ordered:
3615   case OMPC_nowait:
3616   case OMPC_untied:
3617   case OMPC_threadprivate:
3618   case OMPC_depend:
3619   case OMPC_mergeable:
3620   case OMPC_device:
3621   case OMPC_threads:
3622   case OMPC_simd:
3623   case OMPC_map:
3624   case OMPC_num_teams:
3625   case OMPC_thread_limit:
3626   case OMPC_priority:
3627   case OMPC_grainsize:
3628   case OMPC_nogroup:
3629   case OMPC_num_tasks:
3630   case OMPC_hint:
3631   case OMPC_dist_schedule:
3632   case OMPC_defaultmap:
3633   case OMPC_uniform:
3634   case OMPC_to:
3635   case OMPC_from:
3636   case OMPC_use_device_ptr:
3637   case OMPC_is_device_ptr:
3638     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3639   }
3640 }
3641 
3642 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3643   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3644   OpenMPClauseKind Kind = OMPC_unknown;
3645   for (auto *C : S.clauses()) {
3646     // Find first clause (skip seq_cst clause, if it is first).
3647     if (C->getClauseKind() != OMPC_seq_cst) {
3648       Kind = C->getClauseKind();
3649       break;
3650     }
3651   }
3652 
3653   const auto *CS =
3654       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3655   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3656     enterFullExpression(EWC);
3657   }
3658   // Processing for statements under 'atomic capture'.
3659   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3660     for (const auto *C : Compound->body()) {
3661       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3662         enterFullExpression(EWC);
3663       }
3664     }
3665   }
3666 
3667   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3668                                             PrePostActionTy &) {
3669     CGF.EmitStopPoint(CS);
3670     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3671                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3672                       S.isXLHSInRHSPart(), S.getLocStart());
3673   };
3674   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3675   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3676 }
3677 
3678 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3679                                          const OMPExecutableDirective &S,
3680                                          const RegionCodeGenTy &CodeGen) {
3681   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3682   CodeGenModule &CGM = CGF.CGM;
3683   const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target);
3684 
3685   llvm::Function *Fn = nullptr;
3686   llvm::Constant *FnID = nullptr;
3687 
3688   const Expr *IfCond = nullptr;
3689   // Check for the at most one if clause associated with the target region.
3690   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3691     if (C->getNameModifier() == OMPD_unknown ||
3692         C->getNameModifier() == OMPD_target) {
3693       IfCond = C->getCondition();
3694       break;
3695     }
3696   }
3697 
3698   // Check if we have any device clause associated with the directive.
3699   const Expr *Device = nullptr;
3700   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3701     Device = C->getDevice();
3702   }
3703 
3704   // Check if we have an if clause whose conditional always evaluates to false
3705   // or if we do not have any targets specified. If so the target region is not
3706   // an offload entry point.
3707   bool IsOffloadEntry = true;
3708   if (IfCond) {
3709     bool Val;
3710     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3711       IsOffloadEntry = false;
3712   }
3713   if (CGM.getLangOpts().OMPTargetTriples.empty())
3714     IsOffloadEntry = false;
3715 
3716   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3717   StringRef ParentName;
3718   // In case we have Ctors/Dtors we use the complete type variant to produce
3719   // the mangling of the device outlined kernel.
3720   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3721     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3722   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3723     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3724   else
3725     ParentName =
3726         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3727 
3728   // Emit target region as a standalone region.
3729   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3730                                                     IsOffloadEntry, CodeGen);
3731   OMPLexicalScope Scope(CGF, S);
3732   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3733   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3734   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3735                                         CapturedVars);
3736 }
3737 
3738 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3739                              PrePostActionTy &Action) {
3740   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3741   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3742   CGF.EmitOMPPrivateClause(S, PrivateScope);
3743   (void)PrivateScope.Privatize();
3744 
3745   Action.Enter(CGF);
3746   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3747 }
3748 
3749 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3750                                                   StringRef ParentName,
3751                                                   const OMPTargetDirective &S) {
3752   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3753     emitTargetRegion(CGF, S, Action);
3754   };
3755   llvm::Function *Fn;
3756   llvm::Constant *Addr;
3757   // Emit target region as a standalone region.
3758   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3759       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3760   assert(Fn && Addr && "Target device function emission failed.");
3761 }
3762 
3763 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3764   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3765     emitTargetRegion(CGF, S, Action);
3766   };
3767   emitCommonOMPTargetDirective(*this, S, CodeGen);
3768 }
3769 
3770 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3771                                         const OMPExecutableDirective &S,
3772                                         OpenMPDirectiveKind InnermostKind,
3773                                         const RegionCodeGenTy &CodeGen) {
3774   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3775   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3776       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3777 
3778   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3779   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3780   if (NT || TL) {
3781     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3782     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3783 
3784     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3785                                                   S.getLocStart());
3786   }
3787 
3788   OMPTeamsScope Scope(CGF, S);
3789   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3790   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3791   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3792                                            CapturedVars);
3793 }
3794 
3795 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3796   // Emit teams region as a standalone region.
3797   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3798     OMPPrivateScope PrivateScope(CGF);
3799     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3800     CGF.EmitOMPPrivateClause(S, PrivateScope);
3801     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3802     (void)PrivateScope.Privatize();
3803     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3804     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3805   };
3806   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
3807   emitPostUpdateForReductionClause(
3808       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3809 }
3810 
3811 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3812                                   const OMPTargetTeamsDirective &S) {
3813   auto *CS = S.getCapturedStmt(OMPD_teams);
3814   Action.Enter(CGF);
3815   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3816     // TODO: Add support for clauses.
3817     CGF.EmitStmt(CS->getCapturedStmt());
3818   };
3819   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3820 }
3821 
3822 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3823     CodeGenModule &CGM, StringRef ParentName,
3824     const OMPTargetTeamsDirective &S) {
3825   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3826     emitTargetTeamsRegion(CGF, Action, S);
3827   };
3828   llvm::Function *Fn;
3829   llvm::Constant *Addr;
3830   // Emit target region as a standalone region.
3831   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3832       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3833   assert(Fn && Addr && "Target device function emission failed.");
3834 }
3835 
3836 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3837     const OMPTargetTeamsDirective &S) {
3838   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3839     emitTargetTeamsRegion(CGF, Action, S);
3840   };
3841   emitCommonOMPTargetDirective(*this, S, CodeGen);
3842 }
3843 
3844 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
3845     const OMPTeamsDistributeDirective &S) {
3846 
3847   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3848     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3849   };
3850 
3851   // Emit teams region as a standalone region.
3852   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3853                                             PrePostActionTy &) {
3854     OMPPrivateScope PrivateScope(CGF);
3855     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3856     (void)PrivateScope.Privatize();
3857     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3858                                                     CodeGenDistribute);
3859     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3860   };
3861   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3862   emitPostUpdateForReductionClause(*this, S,
3863                                    [](CodeGenFunction &) { return nullptr; });
3864 }
3865 
3866 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
3867     const OMPTeamsDistributeParallelForDirective &S) {
3868   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3869     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3870                               S.getDistInc());
3871   };
3872 
3873   // Emit teams region as a standalone region.
3874   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3875                                             PrePostActionTy &) {
3876     OMPPrivateScope PrivateScope(CGF);
3877     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3878     (void)PrivateScope.Privatize();
3879     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3880                                                     CodeGenDistribute);
3881     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3882   };
3883   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
3884   emitPostUpdateForReductionClause(*this, S,
3885                                    [](CodeGenFunction &) { return nullptr; });
3886 }
3887 
3888 void CodeGenFunction::EmitOMPCancellationPointDirective(
3889     const OMPCancellationPointDirective &S) {
3890   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3891                                                    S.getCancelRegion());
3892 }
3893 
3894 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3895   const Expr *IfCond = nullptr;
3896   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3897     if (C->getNameModifier() == OMPD_unknown ||
3898         C->getNameModifier() == OMPD_cancel) {
3899       IfCond = C->getCondition();
3900       break;
3901     }
3902   }
3903   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3904                                         S.getCancelRegion());
3905 }
3906 
3907 CodeGenFunction::JumpDest
3908 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3909   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3910       Kind == OMPD_target_parallel)
3911     return ReturnBlock;
3912   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3913          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3914          Kind == OMPD_distribute_parallel_for ||
3915          Kind == OMPD_target_parallel_for);
3916   return OMPCancelStack.getExitBlock();
3917 }
3918 
3919 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3920     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3921     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3922   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3923   auto OrigVarIt = C.varlist_begin();
3924   auto InitIt = C.inits().begin();
3925   for (auto PvtVarIt : C.private_copies()) {
3926     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3927     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3928     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3929 
3930     // In order to identify the right initializer we need to match the
3931     // declaration used by the mapping logic. In some cases we may get
3932     // OMPCapturedExprDecl that refers to the original declaration.
3933     const ValueDecl *MatchingVD = OrigVD;
3934     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3935       // OMPCapturedExprDecl are used to privative fields of the current
3936       // structure.
3937       auto *ME = cast<MemberExpr>(OED->getInit());
3938       assert(isa<CXXThisExpr>(ME->getBase()) &&
3939              "Base should be the current struct!");
3940       MatchingVD = ME->getMemberDecl();
3941     }
3942 
3943     // If we don't have information about the current list item, move on to
3944     // the next one.
3945     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3946     if (InitAddrIt == CaptureDeviceAddrMap.end())
3947       continue;
3948 
3949     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3950       // Initialize the temporary initialization variable with the address we
3951       // get from the runtime library. We have to cast the source address
3952       // because it is always a void *. References are materialized in the
3953       // privatization scope, so the initialization here disregards the fact
3954       // the original variable is a reference.
3955       QualType AddrQTy =
3956           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3957       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3958       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3959       setAddrOfLocalVar(InitVD, InitAddr);
3960 
3961       // Emit private declaration, it will be initialized by the value we
3962       // declaration we just added to the local declarations map.
3963       EmitDecl(*PvtVD);
3964 
3965       // The initialization variables reached its purpose in the emission
3966       // ofthe previous declaration, so we don't need it anymore.
3967       LocalDeclMap.erase(InitVD);
3968 
3969       // Return the address of the private variable.
3970       return GetAddrOfLocalVar(PvtVD);
3971     });
3972     assert(IsRegistered && "firstprivate var already registered as private");
3973     // Silence the warning about unused variable.
3974     (void)IsRegistered;
3975 
3976     ++OrigVarIt;
3977     ++InitIt;
3978   }
3979 }
3980 
3981 // Generate the instructions for '#pragma omp target data' directive.
3982 void CodeGenFunction::EmitOMPTargetDataDirective(
3983     const OMPTargetDataDirective &S) {
3984   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3985 
3986   // Create a pre/post action to signal the privatization of the device pointer.
3987   // This action can be replaced by the OpenMP runtime code generation to
3988   // deactivate privatization.
3989   bool PrivatizeDevicePointers = false;
3990   class DevicePointerPrivActionTy : public PrePostActionTy {
3991     bool &PrivatizeDevicePointers;
3992 
3993   public:
3994     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3995         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3996     void Enter(CodeGenFunction &CGF) override {
3997       PrivatizeDevicePointers = true;
3998     }
3999   };
4000   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4001 
4002   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4003       CodeGenFunction &CGF, PrePostActionTy &Action) {
4004     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4005       CGF.EmitStmt(
4006           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
4007     };
4008 
4009     // Codegen that selects wheather to generate the privatization code or not.
4010     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4011                           &InnermostCodeGen](CodeGenFunction &CGF,
4012                                              PrePostActionTy &Action) {
4013       RegionCodeGenTy RCG(InnermostCodeGen);
4014       PrivatizeDevicePointers = false;
4015 
4016       // Call the pre-action to change the status of PrivatizeDevicePointers if
4017       // needed.
4018       Action.Enter(CGF);
4019 
4020       if (PrivatizeDevicePointers) {
4021         OMPPrivateScope PrivateScope(CGF);
4022         // Emit all instances of the use_device_ptr clause.
4023         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4024           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4025                                         Info.CaptureDeviceAddrMap);
4026         (void)PrivateScope.Privatize();
4027         RCG(CGF);
4028       } else
4029         RCG(CGF);
4030     };
4031 
4032     // Forward the provided action to the privatization codegen.
4033     RegionCodeGenTy PrivRCG(PrivCodeGen);
4034     PrivRCG.setAction(Action);
4035 
4036     // Notwithstanding the body of the region is emitted as inlined directive,
4037     // we don't use an inline scope as changes in the references inside the
4038     // region are expected to be visible outside, so we do not privative them.
4039     OMPLexicalScope Scope(CGF, S);
4040     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4041                                                     PrivRCG);
4042   };
4043 
4044   RegionCodeGenTy RCG(CodeGen);
4045 
4046   // If we don't have target devices, don't bother emitting the data mapping
4047   // code.
4048   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4049     RCG(*this);
4050     return;
4051   }
4052 
4053   // Check if we have any if clause associated with the directive.
4054   const Expr *IfCond = nullptr;
4055   if (auto *C = S.getSingleClause<OMPIfClause>())
4056     IfCond = C->getCondition();
4057 
4058   // Check if we have any device clause associated with the directive.
4059   const Expr *Device = nullptr;
4060   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4061     Device = C->getDevice();
4062 
4063   // Set the action to signal privatization of device pointers.
4064   RCG.setAction(PrivAction);
4065 
4066   // Emit region code.
4067   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4068                                              Info);
4069 }
4070 
4071 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4072     const OMPTargetEnterDataDirective &S) {
4073   // If we don't have target devices, don't bother emitting the data mapping
4074   // code.
4075   if (CGM.getLangOpts().OMPTargetTriples.empty())
4076     return;
4077 
4078   // Check if we have any if clause associated with the directive.
4079   const Expr *IfCond = nullptr;
4080   if (auto *C = S.getSingleClause<OMPIfClause>())
4081     IfCond = C->getCondition();
4082 
4083   // Check if we have any device clause associated with the directive.
4084   const Expr *Device = nullptr;
4085   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4086     Device = C->getDevice();
4087 
4088   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4089                                         PrePostActionTy &) {
4090     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4091                                                             Device);
4092   };
4093   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4094   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data,
4095                                               CodeGen);
4096 }
4097 
4098 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4099     const OMPTargetExitDataDirective &S) {
4100   // If we don't have target devices, don't bother emitting the data mapping
4101   // code.
4102   if (CGM.getLangOpts().OMPTargetTriples.empty())
4103     return;
4104 
4105   // Check if we have any if clause associated with the directive.
4106   const Expr *IfCond = nullptr;
4107   if (auto *C = S.getSingleClause<OMPIfClause>())
4108     IfCond = C->getCondition();
4109 
4110   // Check if we have any device clause associated with the directive.
4111   const Expr *Device = nullptr;
4112   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4113     Device = C->getDevice();
4114 
4115   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4116                                         PrePostActionTy &) {
4117     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4118                                                             Device);
4119   };
4120   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4121   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data,
4122                                               CodeGen);
4123 }
4124 
4125 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4126                                      const OMPTargetParallelDirective &S,
4127                                      PrePostActionTy &Action) {
4128   // Get the captured statement associated with the 'parallel' region.
4129   auto *CS = S.getCapturedStmt(OMPD_parallel);
4130   Action.Enter(CGF);
4131   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4132     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4133     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4134     CGF.EmitOMPPrivateClause(S, PrivateScope);
4135     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4136     (void)PrivateScope.Privatize();
4137     // TODO: Add support for clauses.
4138     CGF.EmitStmt(CS->getCapturedStmt());
4139     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4140   };
4141   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4142                                  emitEmptyBoundParameters);
4143   emitPostUpdateForReductionClause(
4144       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4145 }
4146 
4147 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4148     CodeGenModule &CGM, StringRef ParentName,
4149     const OMPTargetParallelDirective &S) {
4150   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4151     emitTargetParallelRegion(CGF, S, Action);
4152   };
4153   llvm::Function *Fn;
4154   llvm::Constant *Addr;
4155   // Emit target region as a standalone region.
4156   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4157       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4158   assert(Fn && Addr && "Target device function emission failed.");
4159 }
4160 
4161 void CodeGenFunction::EmitOMPTargetParallelDirective(
4162     const OMPTargetParallelDirective &S) {
4163   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4164     emitTargetParallelRegion(CGF, S, Action);
4165   };
4166   emitCommonOMPTargetDirective(*this, S, CodeGen);
4167 }
4168 
4169 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4170                                         const OMPTargetParallelForDirective &S,
4171                                         PrePostActionTy &Action) {
4172   Action.Enter(CGF);
4173   // Emit directive as a combined directive that consists of two implicit
4174   // directives: 'parallel' with 'for' directive.
4175   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4176     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4177         CGF, OMPD_target_parallel_for, S.hasCancel());
4178     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4179                                emitDispatchForLoopBounds);
4180   };
4181   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4182                                  emitEmptyBoundParameters);
4183 }
4184 
4185 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4186     CodeGenModule &CGM, StringRef ParentName,
4187     const OMPTargetParallelForDirective &S) {
4188   // Emit SPMD target parallel for region as a standalone region.
4189   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4190     emitTargetParallelForRegion(CGF, S, Action);
4191   };
4192   llvm::Function *Fn;
4193   llvm::Constant *Addr;
4194   // Emit target region as a standalone region.
4195   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4196       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4197   assert(Fn && Addr && "Target device function emission failed.");
4198 }
4199 
4200 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4201     const OMPTargetParallelForDirective &S) {
4202   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4203     emitTargetParallelForRegion(CGF, S, Action);
4204   };
4205   emitCommonOMPTargetDirective(*this, S, CodeGen);
4206 }
4207 
4208 static void
4209 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4210                                 const OMPTargetParallelForSimdDirective &S,
4211                                 PrePostActionTy &Action) {
4212   Action.Enter(CGF);
4213   // Emit directive as a combined directive that consists of two implicit
4214   // directives: 'parallel' with 'for' directive.
4215   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4216     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4217                                emitDispatchForLoopBounds);
4218   };
4219   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4220                                  emitEmptyBoundParameters);
4221 }
4222 
4223 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4224     CodeGenModule &CGM, StringRef ParentName,
4225     const OMPTargetParallelForSimdDirective &S) {
4226   // Emit SPMD target parallel for region as a standalone region.
4227   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4228     emitTargetParallelForSimdRegion(CGF, S, Action);
4229   };
4230   llvm::Function *Fn;
4231   llvm::Constant *Addr;
4232   // Emit target region as a standalone region.
4233   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4234       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4235   assert(Fn && Addr && "Target device function emission failed.");
4236 }
4237 
4238 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4239     const OMPTargetParallelForSimdDirective &S) {
4240   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4241     emitTargetParallelForSimdRegion(CGF, S, Action);
4242   };
4243   emitCommonOMPTargetDirective(*this, S, CodeGen);
4244 }
4245 
4246 /// Emit a helper variable and return corresponding lvalue.
4247 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4248                      const ImplicitParamDecl *PVD,
4249                      CodeGenFunction::OMPPrivateScope &Privates) {
4250   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4251   Privates.addPrivate(
4252       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4253 }
4254 
4255 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4256   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4257   // Emit outlined function for task construct.
4258   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4259   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4260   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4261   const Expr *IfCond = nullptr;
4262   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4263     if (C->getNameModifier() == OMPD_unknown ||
4264         C->getNameModifier() == OMPD_taskloop) {
4265       IfCond = C->getCondition();
4266       break;
4267     }
4268   }
4269 
4270   OMPTaskDataTy Data;
4271   // Check if taskloop must be emitted without taskgroup.
4272   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4273   // TODO: Check if we should emit tied or untied task.
4274   Data.Tied = true;
4275   // Set scheduling for taskloop
4276   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4277     // grainsize clause
4278     Data.Schedule.setInt(/*IntVal=*/false);
4279     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4280   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4281     // num_tasks clause
4282     Data.Schedule.setInt(/*IntVal=*/true);
4283     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4284   }
4285 
4286   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4287     // if (PreCond) {
4288     //   for (IV in 0..LastIteration) BODY;
4289     //   <Final counter/linear vars updates>;
4290     // }
4291     //
4292 
4293     // Emit: if (PreCond) - begin.
4294     // If the condition constant folds and can be elided, avoid emitting the
4295     // whole loop.
4296     bool CondConstant;
4297     llvm::BasicBlock *ContBlock = nullptr;
4298     OMPLoopScope PreInitScope(CGF, S);
4299     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4300       if (!CondConstant)
4301         return;
4302     } else {
4303       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4304       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4305       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4306                   CGF.getProfileCount(&S));
4307       CGF.EmitBlock(ThenBlock);
4308       CGF.incrementProfileCounter(&S);
4309     }
4310 
4311     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4312       CGF.EmitOMPSimdInit(S);
4313 
4314     OMPPrivateScope LoopScope(CGF);
4315     // Emit helper vars inits.
4316     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4317     auto *I = CS->getCapturedDecl()->param_begin();
4318     auto *LBP = std::next(I, LowerBound);
4319     auto *UBP = std::next(I, UpperBound);
4320     auto *STP = std::next(I, Stride);
4321     auto *LIP = std::next(I, LastIter);
4322     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4323              LoopScope);
4324     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4325              LoopScope);
4326     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4327     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4328              LoopScope);
4329     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4330     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4331     (void)LoopScope.Privatize();
4332     // Emit the loop iteration variable.
4333     const Expr *IVExpr = S.getIterationVariable();
4334     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4335     CGF.EmitVarDecl(*IVDecl);
4336     CGF.EmitIgnoredExpr(S.getInit());
4337 
4338     // Emit the iterations count variable.
4339     // If it is not a variable, Sema decided to calculate iterations count on
4340     // each iteration (e.g., it is foldable into a constant).
4341     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4342       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4343       // Emit calculation of the iterations count.
4344       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4345     }
4346 
4347     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4348                          S.getInc(),
4349                          [&S](CodeGenFunction &CGF) {
4350                            CGF.EmitOMPLoopBody(S, JumpDest());
4351                            CGF.EmitStopPoint(&S);
4352                          },
4353                          [](CodeGenFunction &) {});
4354     // Emit: if (PreCond) - end.
4355     if (ContBlock) {
4356       CGF.EmitBranch(ContBlock);
4357       CGF.EmitBlock(ContBlock, true);
4358     }
4359     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4360     if (HasLastprivateClause) {
4361       CGF.EmitOMPLastprivateClauseFinal(
4362           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4363           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4364               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4365               (*LIP)->getType(), S.getLocStart())));
4366     }
4367   };
4368   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4369                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4370                             const OMPTaskDataTy &Data) {
4371     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4372       OMPLoopScope PreInitScope(CGF, S);
4373       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4374                                                   OutlinedFn, SharedsTy,
4375                                                   CapturedStruct, IfCond, Data);
4376     };
4377     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4378                                                     CodeGen);
4379   };
4380   if (Data.Nogroup)
4381     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4382   else {
4383     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4384         *this,
4385         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4386                                         PrePostActionTy &Action) {
4387           Action.Enter(CGF);
4388           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4389         },
4390         S.getLocStart());
4391   }
4392 }
4393 
4394 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4395   EmitOMPTaskLoopBasedDirective(S);
4396 }
4397 
4398 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4399     const OMPTaskLoopSimdDirective &S) {
4400   EmitOMPTaskLoopBasedDirective(S);
4401 }
4402 
4403 // Generate the instructions for '#pragma omp target update' directive.
4404 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4405     const OMPTargetUpdateDirective &S) {
4406   // If we don't have target devices, don't bother emitting the data mapping
4407   // code.
4408   if (CGM.getLangOpts().OMPTargetTriples.empty())
4409     return;
4410 
4411   // Check if we have any if clause associated with the directive.
4412   const Expr *IfCond = nullptr;
4413   if (auto *C = S.getSingleClause<OMPIfClause>())
4414     IfCond = C->getCondition();
4415 
4416   // Check if we have any device clause associated with the directive.
4417   const Expr *Device = nullptr;
4418   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4419     Device = C->getDevice();
4420 
4421   auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
4422                                         PrePostActionTy &) {
4423     CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
4424                                                             Device);
4425   };
4426   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4427   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update,
4428                                               CodeGen);
4429 }
4430