1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             assert(VD == VD->getCanonicalDecl() &&
69                         "Canonical decl must be captured.");
70             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
71                             isCapturedVar(CGF, VD) ||
72                                 (CGF.CapturedStmtInfo &&
73                                  InlinedShareds.isGlobalVarCaptured(VD)),
74                             VD->getType().getNonReferenceType(), VK_LValue,
75                             SourceLocation());
76             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
77               return CGF.EmitLValue(&DRE).getAddress();
78             });
79           }
80         }
81         (void)InlinedShareds.Privatize();
82       }
83     }
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S,
100                         /*AsInlined=*/false,
101                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S,
116                         /*AsInlined=*/false,
117                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 };
119 
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
123   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
125       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
126         for (const auto *I : PreInits->decls())
127           CGF.EmitVarDecl(cast<VarDecl>(*I));
128       }
129     }
130   }
131 
132 public:
133   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
134       : CodeGenFunction::RunCleanupsScope(CGF) {
135     emitPreInitStmt(CGF, S);
136   }
137 };
138 
139 } // namespace
140 
141 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
142   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
143     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
144       OrigVD = OrigVD->getCanonicalDecl();
145       bool IsCaptured =
146           LambdaCaptureFields.lookup(OrigVD) ||
147           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
148           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
149       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
150                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
151       return EmitLValue(&DRE);
152     }
153   }
154   return EmitLValue(E);
155 }
156 
157 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
158   auto &C = getContext();
159   llvm::Value *Size = nullptr;
160   auto SizeInChars = C.getTypeSizeInChars(Ty);
161   if (SizeInChars.isZero()) {
162     // getTypeSizeInChars() returns 0 for a VLA.
163     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
164       llvm::Value *ArraySize;
165       std::tie(ArraySize, Ty) = getVLASize(VAT);
166       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
167     }
168     SizeInChars = C.getTypeSizeInChars(Ty);
169     if (SizeInChars.isZero())
170       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
171     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
172   } else
173     Size = CGM.getSize(SizeInChars);
174   return Size;
175 }
176 
177 void CodeGenFunction::GenerateOpenMPCapturedVars(
178     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
179   const RecordDecl *RD = S.getCapturedRecordDecl();
180   auto CurField = RD->field_begin();
181   auto CurCap = S.captures().begin();
182   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
183                                                  E = S.capture_init_end();
184        I != E; ++I, ++CurField, ++CurCap) {
185     if (CurField->hasCapturedVLAType()) {
186       auto VAT = CurField->getCapturedVLAType();
187       auto *Val = VLASizeMap[VAT->getSizeExpr()];
188       CapturedVars.push_back(Val);
189     } else if (CurCap->capturesThis())
190       CapturedVars.push_back(CXXThisValue);
191     else if (CurCap->capturesVariableByCopy()) {
192       llvm::Value *CV =
193           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
194 
195       // If the field is not a pointer, we need to save the actual value
196       // and load it as a void pointer.
197       if (!CurField->getType()->isAnyPointerType()) {
198         auto &Ctx = getContext();
199         auto DstAddr = CreateMemTemp(
200             Ctx.getUIntPtrType(),
201             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
202         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
203 
204         auto *SrcAddrVal = EmitScalarConversion(
205             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
206             Ctx.getPointerType(CurField->getType()), SourceLocation());
207         LValue SrcLV =
208             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
209 
210         // Store the value using the source type pointer.
211         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
212 
213         // Load the value using the destination type pointer.
214         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
215       }
216       CapturedVars.push_back(CV);
217     } else {
218       assert(CurCap->capturesVariable() && "Expected capture by reference.");
219       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
220     }
221   }
222 }
223 
224 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
225                                     StringRef Name, LValue AddrLV,
226                                     bool isReferenceType = false) {
227   ASTContext &Ctx = CGF.getContext();
228 
229   auto *CastedPtr = CGF.EmitScalarConversion(
230       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
231       Ctx.getPointerType(DstType), SourceLocation());
232   auto TmpAddr =
233       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
234           .getAddress();
235 
236   // If we are dealing with references we need to return the address of the
237   // reference instead of the reference of the value.
238   if (isReferenceType) {
239     QualType RefType = Ctx.getLValueReferenceType(DstType);
240     auto *RefVal = TmpAddr.getPointer();
241     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
242     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
243     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
244   }
245 
246   return TmpAddr;
247 }
248 
249 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
250   if (T->isLValueReferenceType()) {
251     return C.getLValueReferenceType(
252         getCanonicalParamType(C, T.getNonReferenceType()),
253         /*SpelledAsLValue=*/false);
254   }
255   if (T->isPointerType())
256     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
257   return C.getCanonicalParamType(T);
258 }
259 
260 namespace {
261   /// Contains required data for proper outlined function codegen.
262   struct FunctionOptions {
263     /// Captured statement for which the function is generated.
264     const CapturedStmt *S = nullptr;
265     /// true if cast to/from  UIntPtr is required for variables captured by
266     /// value.
267     const bool UIntPtrCastRequired = true;
268     /// true if only casted arguments must be registered as local args or VLA
269     /// sizes.
270     const bool RegisterCastedArgsOnly = false;
271     /// Name of the generated function.
272     const StringRef FunctionName;
273     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
274                              bool RegisterCastedArgsOnly,
275                              StringRef FunctionName)
276         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
277           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
278           FunctionName(FunctionName) {}
279   };
280 }
281 
282 static llvm::Function *emitOutlinedFunctionPrologue(
283     CodeGenFunction &CGF, FunctionArgList &Args,
284     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
285         &LocalAddrs,
286     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
287         &VLASizes,
288     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
289   const CapturedDecl *CD = FO.S->getCapturedDecl();
290   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
291   assert(CD->hasBody() && "missing CapturedDecl body");
292 
293   CXXThisValue = nullptr;
294   // Build the argument list.
295   CodeGenModule &CGM = CGF.CGM;
296   ASTContext &Ctx = CGM.getContext();
297   FunctionArgList TargetArgs;
298   Args.append(CD->param_begin(),
299               std::next(CD->param_begin(), CD->getContextParamPosition()));
300   TargetArgs.append(
301       CD->param_begin(),
302       std::next(CD->param_begin(), CD->getContextParamPosition()));
303   auto I = FO.S->captures().begin();
304   for (auto *FD : RD->fields()) {
305     QualType ArgType = FD->getType();
306     IdentifierInfo *II = nullptr;
307     VarDecl *CapVar = nullptr;
308 
309     // If this is a capture by copy and the type is not a pointer, the outlined
310     // function argument type should be uintptr and the value properly casted to
311     // uintptr. This is necessary given that the runtime library is only able to
312     // deal with pointers. We can pass in the same way the VLA type sizes to the
313     // outlined function.
314     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
315         I->capturesVariableArrayType()) {
316       if (FO.UIntPtrCastRequired)
317         ArgType = Ctx.getUIntPtrType();
318     }
319 
320     if (I->capturesVariable() || I->capturesVariableByCopy()) {
321       CapVar = I->getCapturedVar();
322       II = CapVar->getIdentifier();
323     } else if (I->capturesThis())
324       II = &Ctx.Idents.get("this");
325     else {
326       assert(I->capturesVariableArrayType());
327       II = &Ctx.Idents.get("vla");
328     }
329     if (ArgType->isVariablyModifiedType())
330       ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
331     auto *Arg =
332         ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
333                                   ArgType, ImplicitParamDecl::Other);
334     Args.emplace_back(Arg);
335     // Do not cast arguments if we emit function with non-original types.
336     TargetArgs.emplace_back(
337         FO.UIntPtrCastRequired
338             ? Arg
339             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
340     ++I;
341   }
342   Args.append(
343       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
344       CD->param_end());
345   TargetArgs.append(
346       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
347       CD->param_end());
348 
349   // Create the function declaration.
350   FunctionType::ExtInfo ExtInfo;
351   const CGFunctionInfo &FuncInfo =
352       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
353   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
354 
355   llvm::Function *F =
356       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
357                              FO.FunctionName, &CGM.getModule());
358   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
359   if (CD->isNothrow())
360     F->setDoesNotThrow();
361 
362   // Generate the function.
363   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
364                     FO.S->getLocStart(), CD->getBody()->getLocStart());
365   unsigned Cnt = CD->getContextParamPosition();
366   I = FO.S->captures().begin();
367   for (auto *FD : RD->fields()) {
368     // Do not map arguments if we emit function with non-original types.
369     Address LocalAddr(Address::invalid());
370     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
371       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
372                                                              TargetArgs[Cnt]);
373     } else {
374       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
375     }
376     // If we are capturing a pointer by copy we don't need to do anything, just
377     // use the value that we get from the arguments.
378     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
379       const VarDecl *CurVD = I->getCapturedVar();
380       // If the variable is a reference we need to materialize it here.
381       if (CurVD->getType()->isReferenceType()) {
382         Address RefAddr = CGF.CreateMemTemp(
383             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
384         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
385                               /*Volatile=*/false, CurVD->getType());
386         LocalAddr = RefAddr;
387       }
388       if (!FO.RegisterCastedArgsOnly)
389         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
390       ++Cnt;
391       ++I;
392       continue;
393     }
394 
395     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
396                                         AlignmentSource::Decl);
397     if (FD->hasCapturedVLAType()) {
398       if (FO.UIntPtrCastRequired) {
399         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
400                                                           Args[Cnt]->getName(),
401                                                           ArgLVal),
402                                      FD->getType(), AlignmentSource::Decl);
403       }
404       auto *ExprArg =
405           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
406       auto VAT = FD->getCapturedVLAType();
407       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
408     } else if (I->capturesVariable()) {
409       auto *Var = I->getCapturedVar();
410       QualType VarTy = Var->getType();
411       Address ArgAddr = ArgLVal.getAddress();
412       if (!VarTy->isReferenceType()) {
413         if (ArgLVal.getType()->isLValueReferenceType()) {
414           ArgAddr = CGF.EmitLoadOfReference(
415               ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
416         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
417           assert(ArgLVal.getType()->isPointerType());
418           ArgAddr = CGF.EmitLoadOfPointer(
419               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
420         }
421       }
422       if (!FO.RegisterCastedArgsOnly) {
423         LocalAddrs.insert(
424             {Args[Cnt],
425              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
426       }
427     } else if (I->capturesVariableByCopy()) {
428       assert(!FD->getType()->isAnyPointerType() &&
429              "Not expecting a captured pointer.");
430       auto *Var = I->getCapturedVar();
431       QualType VarTy = Var->getType();
432       LocalAddrs.insert(
433           {Args[Cnt],
434            {Var,
435             FO.UIntPtrCastRequired
436                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
437                                        ArgLVal, VarTy->isReferenceType())
438                 : ArgLVal.getAddress()}});
439     } else {
440       // If 'this' is captured, load it into CXXThisValue.
441       assert(I->capturesThis());
442       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
443                          .getScalarVal();
444       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
445     }
446     ++Cnt;
447     ++I;
448   }
449 
450   return F;
451 }
452 
453 llvm::Function *
454 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
455   assert(
456       CapturedStmtInfo &&
457       "CapturedStmtInfo should be set when generating the captured function");
458   const CapturedDecl *CD = S.getCapturedDecl();
459   // Build the argument list.
460   bool NeedWrapperFunction =
461       getDebugInfo() &&
462       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
463   FunctionArgList Args;
464   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
465   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
466   SmallString<256> Buffer;
467   llvm::raw_svector_ostream Out(Buffer);
468   Out << CapturedStmtInfo->getHelperName();
469   if (NeedWrapperFunction)
470     Out << "_debug__";
471   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
472                      Out.str());
473   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
474                                                    VLASizes, CXXThisValue, FO);
475   for (const auto &LocalAddrPair : LocalAddrs) {
476     if (LocalAddrPair.second.first) {
477       setAddrOfLocalVar(LocalAddrPair.second.first,
478                         LocalAddrPair.second.second);
479     }
480   }
481   for (const auto &VLASizePair : VLASizes)
482     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
483   PGO.assignRegionCounters(GlobalDecl(CD), F);
484   CapturedStmtInfo->EmitBody(*this, CD->getBody());
485   FinishFunction(CD->getBodyRBrace());
486   if (!NeedWrapperFunction)
487     return F;
488 
489   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
490                             /*RegisterCastedArgsOnly=*/true,
491                             CapturedStmtInfo->getHelperName());
492   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
493   Args.clear();
494   LocalAddrs.clear();
495   VLASizes.clear();
496   llvm::Function *WrapperF =
497       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
498                                    WrapperCGF.CXXThisValue, WrapperFO);
499   llvm::SmallVector<llvm::Value *, 4> CallArgs;
500   for (const auto *Arg : Args) {
501     llvm::Value *CallArg;
502     auto I = LocalAddrs.find(Arg);
503     if (I != LocalAddrs.end()) {
504       LValue LV = WrapperCGF.MakeAddrLValue(
505           I->second.second,
506           I->second.first ? I->second.first->getType() : Arg->getType(),
507           AlignmentSource::Decl);
508       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
509     } else {
510       auto EI = VLASizes.find(Arg);
511       if (EI != VLASizes.end())
512         CallArg = EI->second.second;
513       else {
514         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
515                                               Arg->getType(),
516                                               AlignmentSource::Decl);
517         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
518       }
519     }
520     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
521   }
522   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
523                                                   F, CallArgs);
524   WrapperCGF.FinishFunction();
525   return WrapperF;
526 }
527 
528 //===----------------------------------------------------------------------===//
529 //                              OpenMP Directive Emission
530 //===----------------------------------------------------------------------===//
531 void CodeGenFunction::EmitOMPAggregateAssign(
532     Address DestAddr, Address SrcAddr, QualType OriginalType,
533     const llvm::function_ref<void(Address, Address)> &CopyGen) {
534   // Perform element-by-element initialization.
535   QualType ElementTy;
536 
537   // Drill down to the base element type on both arrays.
538   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
539   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
540   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
541 
542   auto SrcBegin = SrcAddr.getPointer();
543   auto DestBegin = DestAddr.getPointer();
544   // Cast from pointer to array type to pointer to single element.
545   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
546   // The basic structure here is a while-do loop.
547   auto BodyBB = createBasicBlock("omp.arraycpy.body");
548   auto DoneBB = createBasicBlock("omp.arraycpy.done");
549   auto IsEmpty =
550       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
551   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
552 
553   // Enter the loop body, making that address the current address.
554   auto EntryBB = Builder.GetInsertBlock();
555   EmitBlock(BodyBB);
556 
557   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
558 
559   llvm::PHINode *SrcElementPHI =
560     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
561   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
562   Address SrcElementCurrent =
563       Address(SrcElementPHI,
564               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
565 
566   llvm::PHINode *DestElementPHI =
567     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
568   DestElementPHI->addIncoming(DestBegin, EntryBB);
569   Address DestElementCurrent =
570     Address(DestElementPHI,
571             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
572 
573   // Emit copy.
574   CopyGen(DestElementCurrent, SrcElementCurrent);
575 
576   // Shift the address forward by one element.
577   auto DestElementNext = Builder.CreateConstGEP1_32(
578       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
579   auto SrcElementNext = Builder.CreateConstGEP1_32(
580       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
581   // Check whether we've reached the end.
582   auto Done =
583       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
584   Builder.CreateCondBr(Done, DoneBB, BodyBB);
585   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
586   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
587 
588   // Done.
589   EmitBlock(DoneBB, /*IsFinished=*/true);
590 }
591 
592 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
593                                   Address SrcAddr, const VarDecl *DestVD,
594                                   const VarDecl *SrcVD, const Expr *Copy) {
595   if (OriginalType->isArrayType()) {
596     auto *BO = dyn_cast<BinaryOperator>(Copy);
597     if (BO && BO->getOpcode() == BO_Assign) {
598       // Perform simple memcpy for simple copying.
599       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
600     } else {
601       // For arrays with complex element types perform element by element
602       // copying.
603       EmitOMPAggregateAssign(
604           DestAddr, SrcAddr, OriginalType,
605           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
606             // Working with the single array element, so have to remap
607             // destination and source variables to corresponding array
608             // elements.
609             CodeGenFunction::OMPPrivateScope Remap(*this);
610             Remap.addPrivate(DestVD, [DestElement]() -> Address {
611               return DestElement;
612             });
613             Remap.addPrivate(
614                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
615             (void)Remap.Privatize();
616             EmitIgnoredExpr(Copy);
617           });
618     }
619   } else {
620     // Remap pseudo source variable to private copy.
621     CodeGenFunction::OMPPrivateScope Remap(*this);
622     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
623     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
624     (void)Remap.Privatize();
625     // Emit copying of the whole variable.
626     EmitIgnoredExpr(Copy);
627   }
628 }
629 
630 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
631                                                 OMPPrivateScope &PrivateScope) {
632   if (!HaveInsertPoint())
633     return false;
634   bool FirstprivateIsLastprivate = false;
635   llvm::DenseSet<const VarDecl *> Lastprivates;
636   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
637     for (const auto *D : C->varlists())
638       Lastprivates.insert(
639           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
640   }
641   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
642   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
643   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
644     auto IRef = C->varlist_begin();
645     auto InitsRef = C->inits().begin();
646     for (auto IInit : C->private_copies()) {
647       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
648       bool ThisFirstprivateIsLastprivate =
649           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
650       auto *CapFD = CapturesInfo.lookup(OrigVD);
651       auto *FD = CapturedStmtInfo->lookup(OrigVD);
652       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
653           !FD->getType()->isReferenceType()) {
654         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
655         ++IRef;
656         ++InitsRef;
657         continue;
658       }
659       FirstprivateIsLastprivate =
660           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
661       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
662         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
663         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
664         bool IsRegistered;
665         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
666                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
667                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
668         Address OriginalAddr = EmitLValue(&DRE).getAddress();
669         QualType Type = VD->getType();
670         if (Type->isArrayType()) {
671           // Emit VarDecl with copy init for arrays.
672           // Get the address of the original variable captured in current
673           // captured region.
674           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
675             auto Emission = EmitAutoVarAlloca(*VD);
676             auto *Init = VD->getInit();
677             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
678               // Perform simple memcpy.
679               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
680                                   Type);
681             } else {
682               EmitOMPAggregateAssign(
683                   Emission.getAllocatedAddress(), OriginalAddr, Type,
684                   [this, VDInit, Init](Address DestElement,
685                                        Address SrcElement) {
686                     // Clean up any temporaries needed by the initialization.
687                     RunCleanupsScope InitScope(*this);
688                     // Emit initialization for single element.
689                     setAddrOfLocalVar(VDInit, SrcElement);
690                     EmitAnyExprToMem(Init, DestElement,
691                                      Init->getType().getQualifiers(),
692                                      /*IsInitializer*/ false);
693                     LocalDeclMap.erase(VDInit);
694                   });
695             }
696             EmitAutoVarCleanups(Emission);
697             return Emission.getAllocatedAddress();
698           });
699         } else {
700           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
701             // Emit private VarDecl with copy init.
702             // Remap temp VDInit variable to the address of the original
703             // variable
704             // (for proper handling of captured global variables).
705             setAddrOfLocalVar(VDInit, OriginalAddr);
706             EmitDecl(*VD);
707             LocalDeclMap.erase(VDInit);
708             return GetAddrOfLocalVar(VD);
709           });
710         }
711         assert(IsRegistered &&
712                "firstprivate var already registered as private");
713         // Silence the warning about unused variable.
714         (void)IsRegistered;
715       }
716       ++IRef;
717       ++InitsRef;
718     }
719   }
720   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
721 }
722 
723 void CodeGenFunction::EmitOMPPrivateClause(
724     const OMPExecutableDirective &D,
725     CodeGenFunction::OMPPrivateScope &PrivateScope) {
726   if (!HaveInsertPoint())
727     return;
728   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
729   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
730     auto IRef = C->varlist_begin();
731     for (auto IInit : C->private_copies()) {
732       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
733       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
734         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
735         bool IsRegistered =
736             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
737               // Emit private VarDecl with copy init.
738               EmitDecl(*VD);
739               return GetAddrOfLocalVar(VD);
740             });
741         assert(IsRegistered && "private var already registered as private");
742         // Silence the warning about unused variable.
743         (void)IsRegistered;
744       }
745       ++IRef;
746     }
747   }
748 }
749 
750 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
751   if (!HaveInsertPoint())
752     return false;
753   // threadprivate_var1 = master_threadprivate_var1;
754   // operator=(threadprivate_var2, master_threadprivate_var2);
755   // ...
756   // __kmpc_barrier(&loc, global_tid);
757   llvm::DenseSet<const VarDecl *> CopiedVars;
758   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
759   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
760     auto IRef = C->varlist_begin();
761     auto ISrcRef = C->source_exprs().begin();
762     auto IDestRef = C->destination_exprs().begin();
763     for (auto *AssignOp : C->assignment_ops()) {
764       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
765       QualType Type = VD->getType();
766       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
767         // Get the address of the master variable. If we are emitting code with
768         // TLS support, the address is passed from the master as field in the
769         // captured declaration.
770         Address MasterAddr = Address::invalid();
771         if (getLangOpts().OpenMPUseTLS &&
772             getContext().getTargetInfo().isTLSSupported()) {
773           assert(CapturedStmtInfo->lookup(VD) &&
774                  "Copyin threadprivates should have been captured!");
775           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
776                           VK_LValue, (*IRef)->getExprLoc());
777           MasterAddr = EmitLValue(&DRE).getAddress();
778           LocalDeclMap.erase(VD);
779         } else {
780           MasterAddr =
781             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
782                                         : CGM.GetAddrOfGlobal(VD),
783                     getContext().getDeclAlign(VD));
784         }
785         // Get the address of the threadprivate variable.
786         Address PrivateAddr = EmitLValue(*IRef).getAddress();
787         if (CopiedVars.size() == 1) {
788           // At first check if current thread is a master thread. If it is, no
789           // need to copy data.
790           CopyBegin = createBasicBlock("copyin.not.master");
791           CopyEnd = createBasicBlock("copyin.not.master.end");
792           Builder.CreateCondBr(
793               Builder.CreateICmpNE(
794                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
795                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
796               CopyBegin, CopyEnd);
797           EmitBlock(CopyBegin);
798         }
799         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
800         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
801         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
802       }
803       ++IRef;
804       ++ISrcRef;
805       ++IDestRef;
806     }
807   }
808   if (CopyEnd) {
809     // Exit out of copying procedure for non-master thread.
810     EmitBlock(CopyEnd, /*IsFinished=*/true);
811     return true;
812   }
813   return false;
814 }
815 
816 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
817     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
818   if (!HaveInsertPoint())
819     return false;
820   bool HasAtLeastOneLastprivate = false;
821   llvm::DenseSet<const VarDecl *> SIMDLCVs;
822   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
823     auto *LoopDirective = cast<OMPLoopDirective>(&D);
824     for (auto *C : LoopDirective->counters()) {
825       SIMDLCVs.insert(
826           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
827     }
828   }
829   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
830   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
831     HasAtLeastOneLastprivate = true;
832     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
833       break;
834     auto IRef = C->varlist_begin();
835     auto IDestRef = C->destination_exprs().begin();
836     for (auto *IInit : C->private_copies()) {
837       // Keep the address of the original variable for future update at the end
838       // of the loop.
839       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
840       // Taskloops do not require additional initialization, it is done in
841       // runtime support library.
842       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
843         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
844         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
845           DeclRefExpr DRE(
846               const_cast<VarDecl *>(OrigVD),
847               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
848                   OrigVD) != nullptr,
849               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
850           return EmitLValue(&DRE).getAddress();
851         });
852         // Check if the variable is also a firstprivate: in this case IInit is
853         // not generated. Initialization of this variable will happen in codegen
854         // for 'firstprivate' clause.
855         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
856           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
857           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
858             // Emit private VarDecl with copy init.
859             EmitDecl(*VD);
860             return GetAddrOfLocalVar(VD);
861           });
862           assert(IsRegistered &&
863                  "lastprivate var already registered as private");
864           (void)IsRegistered;
865         }
866       }
867       ++IRef;
868       ++IDestRef;
869     }
870   }
871   return HasAtLeastOneLastprivate;
872 }
873 
874 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
875     const OMPExecutableDirective &D, bool NoFinals,
876     llvm::Value *IsLastIterCond) {
877   if (!HaveInsertPoint())
878     return;
879   // Emit following code:
880   // if (<IsLastIterCond>) {
881   //   orig_var1 = private_orig_var1;
882   //   ...
883   //   orig_varn = private_orig_varn;
884   // }
885   llvm::BasicBlock *ThenBB = nullptr;
886   llvm::BasicBlock *DoneBB = nullptr;
887   if (IsLastIterCond) {
888     ThenBB = createBasicBlock(".omp.lastprivate.then");
889     DoneBB = createBasicBlock(".omp.lastprivate.done");
890     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
891     EmitBlock(ThenBB);
892   }
893   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
894   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
895   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
896     auto IC = LoopDirective->counters().begin();
897     for (auto F : LoopDirective->finals()) {
898       auto *D =
899           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
900       if (NoFinals)
901         AlreadyEmittedVars.insert(D);
902       else
903         LoopCountersAndUpdates[D] = F;
904       ++IC;
905     }
906   }
907   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
908     auto IRef = C->varlist_begin();
909     auto ISrcRef = C->source_exprs().begin();
910     auto IDestRef = C->destination_exprs().begin();
911     for (auto *AssignOp : C->assignment_ops()) {
912       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
913       QualType Type = PrivateVD->getType();
914       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
915       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
916         // If lastprivate variable is a loop control variable for loop-based
917         // directive, update its value before copyin back to original
918         // variable.
919         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
920           EmitIgnoredExpr(FinalExpr);
921         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
922         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
923         // Get the address of the original variable.
924         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
925         // Get the address of the private variable.
926         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
927         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
928           PrivateAddr =
929               Address(Builder.CreateLoad(PrivateAddr),
930                       getNaturalTypeAlignment(RefTy->getPointeeType()));
931         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
932       }
933       ++IRef;
934       ++ISrcRef;
935       ++IDestRef;
936     }
937     if (auto *PostUpdate = C->getPostUpdateExpr())
938       EmitIgnoredExpr(PostUpdate);
939   }
940   if (IsLastIterCond)
941     EmitBlock(DoneBB, /*IsFinished=*/true);
942 }
943 
944 void CodeGenFunction::EmitOMPReductionClauseInit(
945     const OMPExecutableDirective &D,
946     CodeGenFunction::OMPPrivateScope &PrivateScope) {
947   if (!HaveInsertPoint())
948     return;
949   SmallVector<const Expr *, 4> Shareds;
950   SmallVector<const Expr *, 4> Privates;
951   SmallVector<const Expr *, 4> ReductionOps;
952   SmallVector<const Expr *, 4> LHSs;
953   SmallVector<const Expr *, 4> RHSs;
954   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
955     auto IPriv = C->privates().begin();
956     auto IRed = C->reduction_ops().begin();
957     auto ILHS = C->lhs_exprs().begin();
958     auto IRHS = C->rhs_exprs().begin();
959     for (const auto *Ref : C->varlists()) {
960       Shareds.emplace_back(Ref);
961       Privates.emplace_back(*IPriv);
962       ReductionOps.emplace_back(*IRed);
963       LHSs.emplace_back(*ILHS);
964       RHSs.emplace_back(*IRHS);
965       std::advance(IPriv, 1);
966       std::advance(IRed, 1);
967       std::advance(ILHS, 1);
968       std::advance(IRHS, 1);
969     }
970   }
971   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
972   unsigned Count = 0;
973   auto ILHS = LHSs.begin();
974   auto IRHS = RHSs.begin();
975   auto IPriv = Privates.begin();
976   for (const auto *IRef : Shareds) {
977     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
978     // Emit private VarDecl with reduction init.
979     RedCG.emitSharedLValue(*this, Count);
980     RedCG.emitAggregateType(*this, Count);
981     auto Emission = EmitAutoVarAlloca(*PrivateVD);
982     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
983                              RedCG.getSharedLValue(Count),
984                              [&Emission](CodeGenFunction &CGF) {
985                                CGF.EmitAutoVarInit(Emission);
986                                return true;
987                              });
988     EmitAutoVarCleanups(Emission);
989     Address BaseAddr = RedCG.adjustPrivateAddress(
990         *this, Count, Emission.getAllocatedAddress());
991     bool IsRegistered = PrivateScope.addPrivate(
992         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
993     assert(IsRegistered && "private var already registered as private");
994     // Silence the warning about unused variable.
995     (void)IsRegistered;
996 
997     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
998     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
999     if (isa<OMPArraySectionExpr>(IRef)) {
1000       // Store the address of the original variable associated with the LHS
1001       // implicit variable.
1002       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1003         return RedCG.getSharedLValue(Count).getAddress();
1004       });
1005       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1006         return GetAddrOfLocalVar(PrivateVD);
1007       });
1008     } else if (isa<ArraySubscriptExpr>(IRef)) {
1009       // Store the address of the original variable associated with the LHS
1010       // implicit variable.
1011       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1012         return RedCG.getSharedLValue(Count).getAddress();
1013       });
1014       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1015         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1016                                             ConvertTypeForMem(RHSVD->getType()),
1017                                             "rhs.begin");
1018       });
1019     } else {
1020       QualType Type = PrivateVD->getType();
1021       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1022       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1023       // Store the address of the original variable associated with the LHS
1024       // implicit variable.
1025       if (IsArray) {
1026         OriginalAddr = Builder.CreateElementBitCast(
1027             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1028       }
1029       PrivateScope.addPrivate(
1030           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1031       PrivateScope.addPrivate(
1032           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1033             return IsArray
1034                        ? Builder.CreateElementBitCast(
1035                              GetAddrOfLocalVar(PrivateVD),
1036                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1037                        : GetAddrOfLocalVar(PrivateVD);
1038           });
1039     }
1040     ++ILHS;
1041     ++IRHS;
1042     ++IPriv;
1043     ++Count;
1044   }
1045 }
1046 
1047 void CodeGenFunction::EmitOMPReductionClauseFinal(
1048     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1049   if (!HaveInsertPoint())
1050     return;
1051   llvm::SmallVector<const Expr *, 8> Privates;
1052   llvm::SmallVector<const Expr *, 8> LHSExprs;
1053   llvm::SmallVector<const Expr *, 8> RHSExprs;
1054   llvm::SmallVector<const Expr *, 8> ReductionOps;
1055   bool HasAtLeastOneReduction = false;
1056   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1057     HasAtLeastOneReduction = true;
1058     Privates.append(C->privates().begin(), C->privates().end());
1059     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1060     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1061     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1062   }
1063   if (HasAtLeastOneReduction) {
1064     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1065                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1066                       D.getDirectiveKind() == OMPD_simd;
1067     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1068     // Emit nowait reduction if nowait clause is present or directive is a
1069     // parallel directive (it always has implicit barrier).
1070     CGM.getOpenMPRuntime().emitReduction(
1071         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1072         {WithNowait, SimpleReduction, ReductionKind});
1073   }
1074 }
1075 
1076 static void emitPostUpdateForReductionClause(
1077     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1078     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1079   if (!CGF.HaveInsertPoint())
1080     return;
1081   llvm::BasicBlock *DoneBB = nullptr;
1082   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1083     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1084       if (!DoneBB) {
1085         if (auto *Cond = CondGen(CGF)) {
1086           // If the first post-update expression is found, emit conditional
1087           // block if it was requested.
1088           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1089           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1090           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1091           CGF.EmitBlock(ThenBB);
1092         }
1093       }
1094       CGF.EmitIgnoredExpr(PostUpdate);
1095     }
1096   }
1097   if (DoneBB)
1098     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1099 }
1100 
1101 namespace {
1102 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1103 /// parallel function. This is necessary for combined constructs such as
1104 /// 'distribute parallel for'
1105 typedef llvm::function_ref<void(CodeGenFunction &,
1106                                 const OMPExecutableDirective &,
1107                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1108     CodeGenBoundParametersTy;
1109 } // anonymous namespace
1110 
1111 static void emitCommonOMPParallelDirective(
1112     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1113     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1114     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1115   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1116   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1117       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1118   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1119     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1120     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1121                                          /*IgnoreResultAssign*/ true);
1122     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1123         CGF, NumThreads, NumThreadsClause->getLocStart());
1124   }
1125   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1126     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1127     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1128         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1129   }
1130   const Expr *IfCond = nullptr;
1131   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1132     if (C->getNameModifier() == OMPD_unknown ||
1133         C->getNameModifier() == OMPD_parallel) {
1134       IfCond = C->getCondition();
1135       break;
1136     }
1137   }
1138 
1139   OMPParallelScope Scope(CGF, S);
1140   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1141   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1142   // lower and upper bounds with the pragma 'for' chunking mechanism.
1143   // The following lambda takes care of appending the lower and upper bound
1144   // parameters when necessary
1145   CodeGenBoundParameters(CGF, S, CapturedVars);
1146   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1147   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1148                                               CapturedVars, IfCond);
1149 }
1150 
1151 static void emitEmptyBoundParameters(CodeGenFunction &,
1152                                      const OMPExecutableDirective &,
1153                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1154 
1155 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1156   // Emit parallel region as a standalone region.
1157   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1158     OMPPrivateScope PrivateScope(CGF);
1159     bool Copyins = CGF.EmitOMPCopyinClause(S);
1160     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1161     if (Copyins) {
1162       // Emit implicit barrier to synchronize threads and avoid data races on
1163       // propagation master's thread values of threadprivate variables to local
1164       // instances of that variables of all other implicit threads.
1165       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1166           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1167           /*ForceSimpleCall=*/true);
1168     }
1169     CGF.EmitOMPPrivateClause(S, PrivateScope);
1170     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1171     (void)PrivateScope.Privatize();
1172     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1173     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1174   };
1175   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1176                                  emitEmptyBoundParameters);
1177   emitPostUpdateForReductionClause(
1178       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1179 }
1180 
1181 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1182                                       JumpDest LoopExit) {
1183   RunCleanupsScope BodyScope(*this);
1184   // Update counters values on current iteration.
1185   for (auto I : D.updates()) {
1186     EmitIgnoredExpr(I);
1187   }
1188   // Update the linear variables.
1189   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1190     for (auto *U : C->updates())
1191       EmitIgnoredExpr(U);
1192   }
1193 
1194   // On a continue in the body, jump to the end.
1195   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1196   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1197   // Emit loop body.
1198   EmitStmt(D.getBody());
1199   // The end (updates/cleanups).
1200   EmitBlock(Continue.getBlock());
1201   BreakContinueStack.pop_back();
1202 }
1203 
1204 void CodeGenFunction::EmitOMPInnerLoop(
1205     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1206     const Expr *IncExpr,
1207     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1208     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1209   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1210 
1211   // Start the loop with a block that tests the condition.
1212   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1213   EmitBlock(CondBlock);
1214   const SourceRange &R = S.getSourceRange();
1215   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1216                  SourceLocToDebugLoc(R.getEnd()));
1217 
1218   // If there are any cleanups between here and the loop-exit scope,
1219   // create a block to stage a loop exit along.
1220   auto ExitBlock = LoopExit.getBlock();
1221   if (RequiresCleanup)
1222     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1223 
1224   auto LoopBody = createBasicBlock("omp.inner.for.body");
1225 
1226   // Emit condition.
1227   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1228   if (ExitBlock != LoopExit.getBlock()) {
1229     EmitBlock(ExitBlock);
1230     EmitBranchThroughCleanup(LoopExit);
1231   }
1232 
1233   EmitBlock(LoopBody);
1234   incrementProfileCounter(&S);
1235 
1236   // Create a block for the increment.
1237   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1238   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1239 
1240   BodyGen(*this);
1241 
1242   // Emit "IV = IV + 1" and a back-edge to the condition block.
1243   EmitBlock(Continue.getBlock());
1244   EmitIgnoredExpr(IncExpr);
1245   PostIncGen(*this);
1246   BreakContinueStack.pop_back();
1247   EmitBranch(CondBlock);
1248   LoopStack.pop();
1249   // Emit the fall-through block.
1250   EmitBlock(LoopExit.getBlock());
1251 }
1252 
1253 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1254   if (!HaveInsertPoint())
1255     return false;
1256   // Emit inits for the linear variables.
1257   bool HasLinears = false;
1258   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1259     for (auto *Init : C->inits()) {
1260       HasLinears = true;
1261       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1262       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1263         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1264         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1265         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1266                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1267                         VD->getInit()->getType(), VK_LValue,
1268                         VD->getInit()->getExprLoc());
1269         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1270                                                 VD->getType()),
1271                        /*capturedByInit=*/false);
1272         EmitAutoVarCleanups(Emission);
1273       } else
1274         EmitVarDecl(*VD);
1275     }
1276     // Emit the linear steps for the linear clauses.
1277     // If a step is not constant, it is pre-calculated before the loop.
1278     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1279       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1280         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1281         // Emit calculation of the linear step.
1282         EmitIgnoredExpr(CS);
1283       }
1284   }
1285   return HasLinears;
1286 }
1287 
1288 void CodeGenFunction::EmitOMPLinearClauseFinal(
1289     const OMPLoopDirective &D,
1290     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1291   if (!HaveInsertPoint())
1292     return;
1293   llvm::BasicBlock *DoneBB = nullptr;
1294   // Emit the final values of the linear variables.
1295   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1296     auto IC = C->varlist_begin();
1297     for (auto *F : C->finals()) {
1298       if (!DoneBB) {
1299         if (auto *Cond = CondGen(*this)) {
1300           // If the first post-update expression is found, emit conditional
1301           // block if it was requested.
1302           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1303           DoneBB = createBasicBlock(".omp.linear.pu.done");
1304           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1305           EmitBlock(ThenBB);
1306         }
1307       }
1308       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1309       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1310                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1311                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1312       Address OrigAddr = EmitLValue(&DRE).getAddress();
1313       CodeGenFunction::OMPPrivateScope VarScope(*this);
1314       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1315       (void)VarScope.Privatize();
1316       EmitIgnoredExpr(F);
1317       ++IC;
1318     }
1319     if (auto *PostUpdate = C->getPostUpdateExpr())
1320       EmitIgnoredExpr(PostUpdate);
1321   }
1322   if (DoneBB)
1323     EmitBlock(DoneBB, /*IsFinished=*/true);
1324 }
1325 
1326 static void emitAlignedClause(CodeGenFunction &CGF,
1327                               const OMPExecutableDirective &D) {
1328   if (!CGF.HaveInsertPoint())
1329     return;
1330   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1331     unsigned ClauseAlignment = 0;
1332     if (auto AlignmentExpr = Clause->getAlignment()) {
1333       auto AlignmentCI =
1334           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1335       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1336     }
1337     for (auto E : Clause->varlists()) {
1338       unsigned Alignment = ClauseAlignment;
1339       if (Alignment == 0) {
1340         // OpenMP [2.8.1, Description]
1341         // If no optional parameter is specified, implementation-defined default
1342         // alignments for SIMD instructions on the target platforms are assumed.
1343         Alignment =
1344             CGF.getContext()
1345                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1346                     E->getType()->getPointeeType()))
1347                 .getQuantity();
1348       }
1349       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1350              "alignment is not power of 2");
1351       if (Alignment != 0) {
1352         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1353         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1354       }
1355     }
1356   }
1357 }
1358 
1359 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1360     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1361   if (!HaveInsertPoint())
1362     return;
1363   auto I = S.private_counters().begin();
1364   for (auto *E : S.counters()) {
1365     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1366     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1367     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1368       // Emit var without initialization.
1369       if (!LocalDeclMap.count(PrivateVD)) {
1370         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1371         EmitAutoVarCleanups(VarEmission);
1372       }
1373       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1374                       /*RefersToEnclosingVariableOrCapture=*/false,
1375                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1376       return EmitLValue(&DRE).getAddress();
1377     });
1378     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1379         VD->hasGlobalStorage()) {
1380       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1381         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1382                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1383                         E->getType(), VK_LValue, E->getExprLoc());
1384         return EmitLValue(&DRE).getAddress();
1385       });
1386     }
1387     ++I;
1388   }
1389 }
1390 
1391 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1392                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1393                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1394   if (!CGF.HaveInsertPoint())
1395     return;
1396   {
1397     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1398     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1399     (void)PreCondScope.Privatize();
1400     // Get initial values of real counters.
1401     for (auto I : S.inits()) {
1402       CGF.EmitIgnoredExpr(I);
1403     }
1404   }
1405   // Check that loop is executed at least one time.
1406   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1407 }
1408 
1409 void CodeGenFunction::EmitOMPLinearClause(
1410     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1411   if (!HaveInsertPoint())
1412     return;
1413   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1414   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1415     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1416     for (auto *C : LoopDirective->counters()) {
1417       SIMDLCVs.insert(
1418           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1419     }
1420   }
1421   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1422     auto CurPrivate = C->privates().begin();
1423     for (auto *E : C->varlists()) {
1424       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1425       auto *PrivateVD =
1426           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1427       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1428         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1429           // Emit private VarDecl with copy init.
1430           EmitVarDecl(*PrivateVD);
1431           return GetAddrOfLocalVar(PrivateVD);
1432         });
1433         assert(IsRegistered && "linear var already registered as private");
1434         // Silence the warning about unused variable.
1435         (void)IsRegistered;
1436       } else
1437         EmitVarDecl(*PrivateVD);
1438       ++CurPrivate;
1439     }
1440   }
1441 }
1442 
1443 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1444                                      const OMPExecutableDirective &D,
1445                                      bool IsMonotonic) {
1446   if (!CGF.HaveInsertPoint())
1447     return;
1448   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1449     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1450                                  /*ignoreResult=*/true);
1451     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1452     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1453     // In presence of finite 'safelen', it may be unsafe to mark all
1454     // the memory instructions parallel, because loop-carried
1455     // dependences of 'safelen' iterations are possible.
1456     if (!IsMonotonic)
1457       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1458   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1459     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1460                                  /*ignoreResult=*/true);
1461     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1462     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1463     // In presence of finite 'safelen', it may be unsafe to mark all
1464     // the memory instructions parallel, because loop-carried
1465     // dependences of 'safelen' iterations are possible.
1466     CGF.LoopStack.setParallel(false);
1467   }
1468 }
1469 
1470 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1471                                       bool IsMonotonic) {
1472   // Walk clauses and process safelen/lastprivate.
1473   LoopStack.setParallel(!IsMonotonic);
1474   LoopStack.setVectorizeEnable(true);
1475   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1476 }
1477 
1478 void CodeGenFunction::EmitOMPSimdFinal(
1479     const OMPLoopDirective &D,
1480     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1481   if (!HaveInsertPoint())
1482     return;
1483   llvm::BasicBlock *DoneBB = nullptr;
1484   auto IC = D.counters().begin();
1485   auto IPC = D.private_counters().begin();
1486   for (auto F : D.finals()) {
1487     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1488     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1489     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1490     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1491         OrigVD->hasGlobalStorage() || CED) {
1492       if (!DoneBB) {
1493         if (auto *Cond = CondGen(*this)) {
1494           // If the first post-update expression is found, emit conditional
1495           // block if it was requested.
1496           auto *ThenBB = createBasicBlock(".omp.final.then");
1497           DoneBB = createBasicBlock(".omp.final.done");
1498           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1499           EmitBlock(ThenBB);
1500         }
1501       }
1502       Address OrigAddr = Address::invalid();
1503       if (CED)
1504         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1505       else {
1506         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1507                         /*RefersToEnclosingVariableOrCapture=*/false,
1508                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1509         OrigAddr = EmitLValue(&DRE).getAddress();
1510       }
1511       OMPPrivateScope VarScope(*this);
1512       VarScope.addPrivate(OrigVD,
1513                           [OrigAddr]() -> Address { return OrigAddr; });
1514       (void)VarScope.Privatize();
1515       EmitIgnoredExpr(F);
1516     }
1517     ++IC;
1518     ++IPC;
1519   }
1520   if (DoneBB)
1521     EmitBlock(DoneBB, /*IsFinished=*/true);
1522 }
1523 
1524 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1525                                          const OMPLoopDirective &S,
1526                                          CodeGenFunction::JumpDest LoopExit) {
1527   CGF.EmitOMPLoopBody(S, LoopExit);
1528   CGF.EmitStopPoint(&S);
1529 }
1530 
1531 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1532   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1533     OMPLoopScope PreInitScope(CGF, S);
1534     // if (PreCond) {
1535     //   for (IV in 0..LastIteration) BODY;
1536     //   <Final counter/linear vars updates>;
1537     // }
1538     //
1539 
1540     // Emit: if (PreCond) - begin.
1541     // If the condition constant folds and can be elided, avoid emitting the
1542     // whole loop.
1543     bool CondConstant;
1544     llvm::BasicBlock *ContBlock = nullptr;
1545     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1546       if (!CondConstant)
1547         return;
1548     } else {
1549       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1550       ContBlock = CGF.createBasicBlock("simd.if.end");
1551       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1552                   CGF.getProfileCount(&S));
1553       CGF.EmitBlock(ThenBlock);
1554       CGF.incrementProfileCounter(&S);
1555     }
1556 
1557     // Emit the loop iteration variable.
1558     const Expr *IVExpr = S.getIterationVariable();
1559     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1560     CGF.EmitVarDecl(*IVDecl);
1561     CGF.EmitIgnoredExpr(S.getInit());
1562 
1563     // Emit the iterations count variable.
1564     // If it is not a variable, Sema decided to calculate iterations count on
1565     // each iteration (e.g., it is foldable into a constant).
1566     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1567       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1568       // Emit calculation of the iterations count.
1569       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1570     }
1571 
1572     CGF.EmitOMPSimdInit(S);
1573 
1574     emitAlignedClause(CGF, S);
1575     (void)CGF.EmitOMPLinearClauseInit(S);
1576     {
1577       OMPPrivateScope LoopScope(CGF);
1578       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1579       CGF.EmitOMPLinearClause(S, LoopScope);
1580       CGF.EmitOMPPrivateClause(S, LoopScope);
1581       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1582       bool HasLastprivateClause =
1583           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1584       (void)LoopScope.Privatize();
1585       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1586                            S.getInc(),
1587                            [&S](CodeGenFunction &CGF) {
1588                              CGF.EmitOMPLoopBody(S, JumpDest());
1589                              CGF.EmitStopPoint(&S);
1590                            },
1591                            [](CodeGenFunction &) {});
1592       CGF.EmitOMPSimdFinal(
1593           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1594       // Emit final copy of the lastprivate variables at the end of loops.
1595       if (HasLastprivateClause)
1596         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1597       CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1598       emitPostUpdateForReductionClause(
1599           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1600     }
1601     CGF.EmitOMPLinearClauseFinal(
1602         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1603     // Emit: if (PreCond) - end.
1604     if (ContBlock) {
1605       CGF.EmitBranch(ContBlock);
1606       CGF.EmitBlock(ContBlock, true);
1607     }
1608   };
1609   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1610   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1611 }
1612 
1613 void CodeGenFunction::EmitOMPOuterLoop(
1614     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1615     CodeGenFunction::OMPPrivateScope &LoopScope,
1616     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1617     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1618     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1619   auto &RT = CGM.getOpenMPRuntime();
1620 
1621   const Expr *IVExpr = S.getIterationVariable();
1622   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1623   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1624 
1625   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1626 
1627   // Start the loop with a block that tests the condition.
1628   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1629   EmitBlock(CondBlock);
1630   const SourceRange &R = S.getSourceRange();
1631   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1632                  SourceLocToDebugLoc(R.getEnd()));
1633 
1634   llvm::Value *BoolCondVal = nullptr;
1635   if (!DynamicOrOrdered) {
1636     // UB = min(UB, GlobalUB) or
1637     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1638     // 'distribute parallel for')
1639     EmitIgnoredExpr(LoopArgs.EUB);
1640     // IV = LB
1641     EmitIgnoredExpr(LoopArgs.Init);
1642     // IV < UB
1643     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1644   } else {
1645     BoolCondVal =
1646         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1647                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1648   }
1649 
1650   // If there are any cleanups between here and the loop-exit scope,
1651   // create a block to stage a loop exit along.
1652   auto ExitBlock = LoopExit.getBlock();
1653   if (LoopScope.requiresCleanups())
1654     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1655 
1656   auto LoopBody = createBasicBlock("omp.dispatch.body");
1657   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1658   if (ExitBlock != LoopExit.getBlock()) {
1659     EmitBlock(ExitBlock);
1660     EmitBranchThroughCleanup(LoopExit);
1661   }
1662   EmitBlock(LoopBody);
1663 
1664   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1665   // LB for loop condition and emitted it above).
1666   if (DynamicOrOrdered)
1667     EmitIgnoredExpr(LoopArgs.Init);
1668 
1669   // Create a block for the increment.
1670   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1671   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1672 
1673   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1674   // with dynamic/guided scheduling and without ordered clause.
1675   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1676     LoopStack.setParallel(!IsMonotonic);
1677   else
1678     EmitOMPSimdInit(S, IsMonotonic);
1679 
1680   SourceLocation Loc = S.getLocStart();
1681 
1682   // when 'distribute' is not combined with a 'for':
1683   // while (idx <= UB) { BODY; ++idx; }
1684   // when 'distribute' is combined with a 'for'
1685   // (e.g. 'distribute parallel for')
1686   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1687   EmitOMPInnerLoop(
1688       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1689       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1690         CodeGenLoop(CGF, S, LoopExit);
1691       },
1692       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1693         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1694       });
1695 
1696   EmitBlock(Continue.getBlock());
1697   BreakContinueStack.pop_back();
1698   if (!DynamicOrOrdered) {
1699     // Emit "LB = LB + Stride", "UB = UB + Stride".
1700     EmitIgnoredExpr(LoopArgs.NextLB);
1701     EmitIgnoredExpr(LoopArgs.NextUB);
1702   }
1703 
1704   EmitBranch(CondBlock);
1705   LoopStack.pop();
1706   // Emit the fall-through block.
1707   EmitBlock(LoopExit.getBlock());
1708 
1709   // Tell the runtime we are done.
1710   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1711     if (!DynamicOrOrdered)
1712       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1713                                                      S.getDirectiveKind());
1714   };
1715   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1716 }
1717 
1718 void CodeGenFunction::EmitOMPForOuterLoop(
1719     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1720     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1721     const OMPLoopArguments &LoopArgs,
1722     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1723   auto &RT = CGM.getOpenMPRuntime();
1724 
1725   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1726   const bool DynamicOrOrdered =
1727       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1728 
1729   assert((Ordered ||
1730           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1731                                  LoopArgs.Chunk != nullptr)) &&
1732          "static non-chunked schedule does not need outer loop");
1733 
1734   // Emit outer loop.
1735   //
1736   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1737   // When schedule(dynamic,chunk_size) is specified, the iterations are
1738   // distributed to threads in the team in chunks as the threads request them.
1739   // Each thread executes a chunk of iterations, then requests another chunk,
1740   // until no chunks remain to be distributed. Each chunk contains chunk_size
1741   // iterations, except for the last chunk to be distributed, which may have
1742   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1743   //
1744   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1745   // to threads in the team in chunks as the executing threads request them.
1746   // Each thread executes a chunk of iterations, then requests another chunk,
1747   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1748   // each chunk is proportional to the number of unassigned iterations divided
1749   // by the number of threads in the team, decreasing to 1. For a chunk_size
1750   // with value k (greater than 1), the size of each chunk is determined in the
1751   // same way, with the restriction that the chunks do not contain fewer than k
1752   // iterations (except for the last chunk to be assigned, which may have fewer
1753   // than k iterations).
1754   //
1755   // When schedule(auto) is specified, the decision regarding scheduling is
1756   // delegated to the compiler and/or runtime system. The programmer gives the
1757   // implementation the freedom to choose any possible mapping of iterations to
1758   // threads in the team.
1759   //
1760   // When schedule(runtime) is specified, the decision regarding scheduling is
1761   // deferred until run time, and the schedule and chunk size are taken from the
1762   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1763   // implementation defined
1764   //
1765   // while(__kmpc_dispatch_next(&LB, &UB)) {
1766   //   idx = LB;
1767   //   while (idx <= UB) { BODY; ++idx;
1768   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1769   //   } // inner loop
1770   // }
1771   //
1772   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1773   // When schedule(static, chunk_size) is specified, iterations are divided into
1774   // chunks of size chunk_size, and the chunks are assigned to the threads in
1775   // the team in a round-robin fashion in the order of the thread number.
1776   //
1777   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1778   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1779   //   LB = LB + ST;
1780   //   UB = UB + ST;
1781   // }
1782   //
1783 
1784   const Expr *IVExpr = S.getIterationVariable();
1785   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1786   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1787 
1788   if (DynamicOrOrdered) {
1789     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1790     llvm::Value *LBVal = DispatchBounds.first;
1791     llvm::Value *UBVal = DispatchBounds.second;
1792     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1793                                                              LoopArgs.Chunk};
1794     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1795                            IVSigned, Ordered, DipatchRTInputValues);
1796   } else {
1797     CGOpenMPRuntime::StaticRTInput StaticInit(
1798         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1799         LoopArgs.ST, LoopArgs.Chunk);
1800     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1801                          ScheduleKind, StaticInit);
1802   }
1803 
1804   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1805                                     const unsigned IVSize,
1806                                     const bool IVSigned) {
1807     if (Ordered) {
1808       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1809                                                             IVSigned);
1810     }
1811   };
1812 
1813   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1814                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1815   OuterLoopArgs.IncExpr = S.getInc();
1816   OuterLoopArgs.Init = S.getInit();
1817   OuterLoopArgs.Cond = S.getCond();
1818   OuterLoopArgs.NextLB = S.getNextLowerBound();
1819   OuterLoopArgs.NextUB = S.getNextUpperBound();
1820   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1821                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1822 }
1823 
1824 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1825                              const unsigned IVSize, const bool IVSigned) {}
1826 
1827 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1828     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1829     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1830     const CodeGenLoopTy &CodeGenLoopContent) {
1831 
1832   auto &RT = CGM.getOpenMPRuntime();
1833 
1834   // Emit outer loop.
1835   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1836   // dynamic
1837   //
1838 
1839   const Expr *IVExpr = S.getIterationVariable();
1840   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1841   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1842 
1843   CGOpenMPRuntime::StaticRTInput StaticInit(
1844       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1845       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1846   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1847 
1848   // for combined 'distribute' and 'for' the increment expression of distribute
1849   // is store in DistInc. For 'distribute' alone, it is in Inc.
1850   Expr *IncExpr;
1851   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1852     IncExpr = S.getDistInc();
1853   else
1854     IncExpr = S.getInc();
1855 
1856   // this routine is shared by 'omp distribute parallel for' and
1857   // 'omp distribute': select the right EUB expression depending on the
1858   // directive
1859   OMPLoopArguments OuterLoopArgs;
1860   OuterLoopArgs.LB = LoopArgs.LB;
1861   OuterLoopArgs.UB = LoopArgs.UB;
1862   OuterLoopArgs.ST = LoopArgs.ST;
1863   OuterLoopArgs.IL = LoopArgs.IL;
1864   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1865   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1866                           ? S.getCombinedEnsureUpperBound()
1867                           : S.getEnsureUpperBound();
1868   OuterLoopArgs.IncExpr = IncExpr;
1869   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1870                            ? S.getCombinedInit()
1871                            : S.getInit();
1872   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1873                            ? S.getCombinedCond()
1874                            : S.getCond();
1875   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1876                              ? S.getCombinedNextLowerBound()
1877                              : S.getNextLowerBound();
1878   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1879                              ? S.getCombinedNextUpperBound()
1880                              : S.getNextUpperBound();
1881 
1882   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1883                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1884                    emitEmptyOrdered);
1885 }
1886 
1887 /// Emit a helper variable and return corresponding lvalue.
1888 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1889                                const DeclRefExpr *Helper) {
1890   auto VDecl = cast<VarDecl>(Helper->getDecl());
1891   CGF.EmitVarDecl(*VDecl);
1892   return CGF.EmitLValue(Helper);
1893 }
1894 
1895 static std::pair<LValue, LValue>
1896 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1897                                      const OMPExecutableDirective &S) {
1898   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1899   LValue LB =
1900       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1901   LValue UB =
1902       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1903 
1904   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1905   // parallel for') we need to use the 'distribute'
1906   // chunk lower and upper bounds rather than the whole loop iteration
1907   // space. These are parameters to the outlined function for 'parallel'
1908   // and we copy the bounds of the previous schedule into the
1909   // the current ones.
1910   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1911   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1912   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1913   PrevLBVal = CGF.EmitScalarConversion(
1914       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1915       LS.getIterationVariable()->getType(), SourceLocation());
1916   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1917   PrevUBVal = CGF.EmitScalarConversion(
1918       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1919       LS.getIterationVariable()->getType(), SourceLocation());
1920 
1921   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1922   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1923 
1924   return {LB, UB};
1925 }
1926 
1927 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1928 /// we need to use the LB and UB expressions generated by the worksharing
1929 /// code generation support, whereas in non combined situations we would
1930 /// just emit 0 and the LastIteration expression
1931 /// This function is necessary due to the difference of the LB and UB
1932 /// types for the RT emission routines for 'for_static_init' and
1933 /// 'for_dispatch_init'
1934 static std::pair<llvm::Value *, llvm::Value *>
1935 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1936                                         const OMPExecutableDirective &S,
1937                                         Address LB, Address UB) {
1938   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1939   const Expr *IVExpr = LS.getIterationVariable();
1940   // when implementing a dynamic schedule for a 'for' combined with a
1941   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1942   // is not normalized as each team only executes its own assigned
1943   // distribute chunk
1944   QualType IteratorTy = IVExpr->getType();
1945   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1946                                             SourceLocation());
1947   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1948                                             SourceLocation());
1949   return {LBVal, UBVal};
1950 }
1951 
1952 static void emitDistributeParallelForDistributeInnerBoundParams(
1953     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1954     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1955   const auto &Dir = cast<OMPLoopDirective>(S);
1956   LValue LB =
1957       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1958   auto LBCast = CGF.Builder.CreateIntCast(
1959       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1960   CapturedVars.push_back(LBCast);
1961   LValue UB =
1962       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1963 
1964   auto UBCast = CGF.Builder.CreateIntCast(
1965       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1966   CapturedVars.push_back(UBCast);
1967 }
1968 
1969 static void
1970 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
1971                                  const OMPLoopDirective &S,
1972                                  CodeGenFunction::JumpDest LoopExit) {
1973   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
1974                                          PrePostActionTy &) {
1975     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
1976                                emitDistributeParallelForInnerBounds,
1977                                emitDistributeParallelForDispatchBounds);
1978   };
1979 
1980   emitCommonOMPParallelDirective(
1981       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
1982       emitDistributeParallelForDistributeInnerBoundParams);
1983 }
1984 
1985 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1986     const OMPDistributeParallelForDirective &S) {
1987   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1988     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
1989                               S.getDistInc());
1990   };
1991   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1992   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
1993                                   /*HasCancel=*/false);
1994   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
1995                                               /*HasCancel=*/false);
1996 }
1997 
1998 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
1999     const OMPDistributeParallelForSimdDirective &S) {
2000   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2001   CGM.getOpenMPRuntime().emitInlinedDirective(
2002       *this, OMPD_distribute_parallel_for_simd,
2003       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2004         OMPLoopScope PreInitScope(CGF, S);
2005         CGF.EmitStmt(
2006             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2007       });
2008 }
2009 
2010 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2011     const OMPDistributeSimdDirective &S) {
2012   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2013   CGM.getOpenMPRuntime().emitInlinedDirective(
2014       *this, OMPD_distribute_simd,
2015       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2016         OMPLoopScope PreInitScope(CGF, S);
2017         CGF.EmitStmt(
2018             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2019       });
2020 }
2021 
2022 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
2023     const OMPTargetParallelForSimdDirective &S) {
2024   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2025   CGM.getOpenMPRuntime().emitInlinedDirective(
2026       *this, OMPD_target_parallel_for_simd,
2027       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2028         OMPLoopScope PreInitScope(CGF, S);
2029         CGF.EmitStmt(
2030             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2031       });
2032 }
2033 
2034 void CodeGenFunction::EmitOMPTargetSimdDirective(
2035     const OMPTargetSimdDirective &S) {
2036   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2037   CGM.getOpenMPRuntime().emitInlinedDirective(
2038       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2039         OMPLoopScope PreInitScope(CGF, S);
2040         CGF.EmitStmt(
2041             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2042       });
2043 }
2044 
2045 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2046     const OMPTeamsDistributeSimdDirective &S) {
2047   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2048   CGM.getOpenMPRuntime().emitInlinedDirective(
2049       *this, OMPD_teams_distribute_simd,
2050       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2051         OMPLoopScope PreInitScope(CGF, S);
2052         CGF.EmitStmt(
2053             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2054       });
2055 }
2056 
2057 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2058     const OMPTeamsDistributeParallelForSimdDirective &S) {
2059   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2060   CGM.getOpenMPRuntime().emitInlinedDirective(
2061       *this, OMPD_teams_distribute_parallel_for_simd,
2062       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2063         OMPLoopScope PreInitScope(CGF, S);
2064         CGF.EmitStmt(
2065             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2066       });
2067 }
2068 
2069 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2070     const OMPTeamsDistributeParallelForDirective &S) {
2071   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2072   CGM.getOpenMPRuntime().emitInlinedDirective(
2073       *this, OMPD_teams_distribute_parallel_for,
2074       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2075         OMPLoopScope PreInitScope(CGF, S);
2076         CGF.EmitStmt(
2077             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2078       });
2079 }
2080 
2081 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2082     const OMPTargetTeamsDistributeDirective &S) {
2083   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2084   CGM.getOpenMPRuntime().emitInlinedDirective(
2085       *this, OMPD_target_teams_distribute,
2086       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2087         CGF.EmitStmt(
2088             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2089       });
2090 }
2091 
2092 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2093     const OMPTargetTeamsDistributeParallelForDirective &S) {
2094   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2095   CGM.getOpenMPRuntime().emitInlinedDirective(
2096       *this, OMPD_target_teams_distribute_parallel_for,
2097       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2098         CGF.EmitStmt(
2099             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2100       });
2101 }
2102 
2103 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2104     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2105   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2106   CGM.getOpenMPRuntime().emitInlinedDirective(
2107       *this, OMPD_target_teams_distribute_parallel_for_simd,
2108       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2109         CGF.EmitStmt(
2110             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2111       });
2112 }
2113 
2114 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2115     const OMPTargetTeamsDistributeSimdDirective &S) {
2116   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2117   CGM.getOpenMPRuntime().emitInlinedDirective(
2118       *this, OMPD_target_teams_distribute_simd,
2119       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2120         CGF.EmitStmt(
2121             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2122       });
2123 }
2124 
2125 namespace {
2126   struct ScheduleKindModifiersTy {
2127     OpenMPScheduleClauseKind Kind;
2128     OpenMPScheduleClauseModifier M1;
2129     OpenMPScheduleClauseModifier M2;
2130     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2131                             OpenMPScheduleClauseModifier M1,
2132                             OpenMPScheduleClauseModifier M2)
2133         : Kind(Kind), M1(M1), M2(M2) {}
2134   };
2135 } // namespace
2136 
2137 bool CodeGenFunction::EmitOMPWorksharingLoop(
2138     const OMPLoopDirective &S, Expr *EUB,
2139     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2140     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2141   // Emit the loop iteration variable.
2142   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2143   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2144   EmitVarDecl(*IVDecl);
2145 
2146   // Emit the iterations count variable.
2147   // If it is not a variable, Sema decided to calculate iterations count on each
2148   // iteration (e.g., it is foldable into a constant).
2149   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2150     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2151     // Emit calculation of the iterations count.
2152     EmitIgnoredExpr(S.getCalcLastIteration());
2153   }
2154 
2155   auto &RT = CGM.getOpenMPRuntime();
2156 
2157   bool HasLastprivateClause;
2158   // Check pre-condition.
2159   {
2160     OMPLoopScope PreInitScope(*this, S);
2161     // Skip the entire loop if we don't meet the precondition.
2162     // If the condition constant folds and can be elided, avoid emitting the
2163     // whole loop.
2164     bool CondConstant;
2165     llvm::BasicBlock *ContBlock = nullptr;
2166     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2167       if (!CondConstant)
2168         return false;
2169     } else {
2170       auto *ThenBlock = createBasicBlock("omp.precond.then");
2171       ContBlock = createBasicBlock("omp.precond.end");
2172       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2173                   getProfileCount(&S));
2174       EmitBlock(ThenBlock);
2175       incrementProfileCounter(&S);
2176     }
2177 
2178     bool Ordered = false;
2179     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2180       if (OrderedClause->getNumForLoops())
2181         RT.emitDoacrossInit(*this, S);
2182       else
2183         Ordered = true;
2184     }
2185 
2186     llvm::DenseSet<const Expr *> EmittedFinals;
2187     emitAlignedClause(*this, S);
2188     bool HasLinears = EmitOMPLinearClauseInit(S);
2189     // Emit helper vars inits.
2190 
2191     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2192     LValue LB = Bounds.first;
2193     LValue UB = Bounds.second;
2194     LValue ST =
2195         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2196     LValue IL =
2197         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2198 
2199     // Emit 'then' code.
2200     {
2201       OMPPrivateScope LoopScope(*this);
2202       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2203         // Emit implicit barrier to synchronize threads and avoid data races on
2204         // initialization of firstprivate variables and post-update of
2205         // lastprivate variables.
2206         CGM.getOpenMPRuntime().emitBarrierCall(
2207             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2208             /*ForceSimpleCall=*/true);
2209       }
2210       EmitOMPPrivateClause(S, LoopScope);
2211       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2212       EmitOMPReductionClauseInit(S, LoopScope);
2213       EmitOMPPrivateLoopCounters(S, LoopScope);
2214       EmitOMPLinearClause(S, LoopScope);
2215       (void)LoopScope.Privatize();
2216 
2217       // Detect the loop schedule kind and chunk.
2218       llvm::Value *Chunk = nullptr;
2219       OpenMPScheduleTy ScheduleKind;
2220       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2221         ScheduleKind.Schedule = C->getScheduleKind();
2222         ScheduleKind.M1 = C->getFirstScheduleModifier();
2223         ScheduleKind.M2 = C->getSecondScheduleModifier();
2224         if (const auto *Ch = C->getChunkSize()) {
2225           Chunk = EmitScalarExpr(Ch);
2226           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2227                                        S.getIterationVariable()->getType(),
2228                                        S.getLocStart());
2229         }
2230       }
2231       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2232       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2233       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2234       // If the static schedule kind is specified or if the ordered clause is
2235       // specified, and if no monotonic modifier is specified, the effect will
2236       // be as if the monotonic modifier was specified.
2237       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2238                                 /* Chunked */ Chunk != nullptr) &&
2239           !Ordered) {
2240         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2241           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2242         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2243         // When no chunk_size is specified, the iteration space is divided into
2244         // chunks that are approximately equal in size, and at most one chunk is
2245         // distributed to each thread. Note that the size of the chunks is
2246         // unspecified in this case.
2247         CGOpenMPRuntime::StaticRTInput StaticInit(
2248             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2249             UB.getAddress(), ST.getAddress());
2250         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2251                              ScheduleKind, StaticInit);
2252         auto LoopExit =
2253             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2254         // UB = min(UB, GlobalUB);
2255         EmitIgnoredExpr(S.getEnsureUpperBound());
2256         // IV = LB;
2257         EmitIgnoredExpr(S.getInit());
2258         // while (idx <= UB) { BODY; ++idx; }
2259         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2260                          S.getInc(),
2261                          [&S, LoopExit](CodeGenFunction &CGF) {
2262                            CGF.EmitOMPLoopBody(S, LoopExit);
2263                            CGF.EmitStopPoint(&S);
2264                          },
2265                          [](CodeGenFunction &) {});
2266         EmitBlock(LoopExit.getBlock());
2267         // Tell the runtime we are done.
2268         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2269           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2270                                                          S.getDirectiveKind());
2271         };
2272         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2273       } else {
2274         const bool IsMonotonic =
2275             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2276             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2277             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2278             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2279         // Emit the outer loop, which requests its work chunk [LB..UB] from
2280         // runtime and runs the inner loop to process it.
2281         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2282                                              ST.getAddress(), IL.getAddress(),
2283                                              Chunk, EUB);
2284         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2285                             LoopArguments, CGDispatchBounds);
2286       }
2287       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2288         EmitOMPSimdFinal(S,
2289                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2290                            return CGF.Builder.CreateIsNotNull(
2291                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2292                          });
2293       }
2294       EmitOMPReductionClauseFinal(
2295           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2296                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2297                  : /*Parallel only*/ OMPD_parallel);
2298       // Emit post-update of the reduction variables if IsLastIter != 0.
2299       emitPostUpdateForReductionClause(
2300           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2301             return CGF.Builder.CreateIsNotNull(
2302                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2303           });
2304       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2305       if (HasLastprivateClause)
2306         EmitOMPLastprivateClauseFinal(
2307             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2308             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2309     }
2310     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2311       return CGF.Builder.CreateIsNotNull(
2312           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2313     });
2314     // We're now done with the loop, so jump to the continuation block.
2315     if (ContBlock) {
2316       EmitBranch(ContBlock);
2317       EmitBlock(ContBlock, true);
2318     }
2319   }
2320   return HasLastprivateClause;
2321 }
2322 
2323 /// The following two functions generate expressions for the loop lower
2324 /// and upper bounds in case of static and dynamic (dispatch) schedule
2325 /// of the associated 'for' or 'distribute' loop.
2326 static std::pair<LValue, LValue>
2327 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2328   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2329   LValue LB =
2330       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2331   LValue UB =
2332       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2333   return {LB, UB};
2334 }
2335 
2336 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2337 /// consider the lower and upper bound expressions generated by the
2338 /// worksharing loop support, but we use 0 and the iteration space size as
2339 /// constants
2340 static std::pair<llvm::Value *, llvm::Value *>
2341 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2342                           Address LB, Address UB) {
2343   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2344   const Expr *IVExpr = LS.getIterationVariable();
2345   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2346   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2347   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2348   return {LBVal, UBVal};
2349 }
2350 
2351 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2352   bool HasLastprivates = false;
2353   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2354                                           PrePostActionTy &) {
2355     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2356     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2357                                                  emitForLoopBounds,
2358                                                  emitDispatchForLoopBounds);
2359   };
2360   {
2361     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2362     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2363                                                 S.hasCancel());
2364   }
2365 
2366   // Emit an implicit barrier at the end.
2367   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2368     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2369   }
2370 }
2371 
2372 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2373   bool HasLastprivates = false;
2374   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2375                                           PrePostActionTy &) {
2376     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2377                                                  emitForLoopBounds,
2378                                                  emitDispatchForLoopBounds);
2379   };
2380   {
2381     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2382     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2383   }
2384 
2385   // Emit an implicit barrier at the end.
2386   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2387     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2388   }
2389 }
2390 
2391 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2392                                 const Twine &Name,
2393                                 llvm::Value *Init = nullptr) {
2394   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2395   if (Init)
2396     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2397   return LVal;
2398 }
2399 
2400 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2401   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2402   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2403   bool HasLastprivates = false;
2404   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2405                                                     PrePostActionTy &) {
2406     auto &C = CGF.CGM.getContext();
2407     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2408     // Emit helper vars inits.
2409     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2410                                   CGF.Builder.getInt32(0));
2411     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2412                                       : CGF.Builder.getInt32(0);
2413     LValue UB =
2414         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2415     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2416                                   CGF.Builder.getInt32(1));
2417     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2418                                   CGF.Builder.getInt32(0));
2419     // Loop counter.
2420     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2421     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2422     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2423     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2424     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2425     // Generate condition for loop.
2426     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2427                         OK_Ordinary, S.getLocStart(), FPOptions());
2428     // Increment for loop counter.
2429     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2430                       S.getLocStart());
2431     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2432       // Iterate through all sections and emit a switch construct:
2433       // switch (IV) {
2434       //   case 0:
2435       //     <SectionStmt[0]>;
2436       //     break;
2437       // ...
2438       //   case <NumSection> - 1:
2439       //     <SectionStmt[<NumSection> - 1]>;
2440       //     break;
2441       // }
2442       // .omp.sections.exit:
2443       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2444       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2445           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2446           CS == nullptr ? 1 : CS->size());
2447       if (CS) {
2448         unsigned CaseNumber = 0;
2449         for (auto *SubStmt : CS->children()) {
2450           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2451           CGF.EmitBlock(CaseBB);
2452           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2453           CGF.EmitStmt(SubStmt);
2454           CGF.EmitBranch(ExitBB);
2455           ++CaseNumber;
2456         }
2457       } else {
2458         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2459         CGF.EmitBlock(CaseBB);
2460         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2461         CGF.EmitStmt(Stmt);
2462         CGF.EmitBranch(ExitBB);
2463       }
2464       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2465     };
2466 
2467     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2468     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2469       // Emit implicit barrier to synchronize threads and avoid data races on
2470       // initialization of firstprivate variables and post-update of lastprivate
2471       // variables.
2472       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2473           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2474           /*ForceSimpleCall=*/true);
2475     }
2476     CGF.EmitOMPPrivateClause(S, LoopScope);
2477     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2478     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2479     (void)LoopScope.Privatize();
2480 
2481     // Emit static non-chunked loop.
2482     OpenMPScheduleTy ScheduleKind;
2483     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2484     CGOpenMPRuntime::StaticRTInput StaticInit(
2485         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2486         LB.getAddress(), UB.getAddress(), ST.getAddress());
2487     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2488         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2489     // UB = min(UB, GlobalUB);
2490     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2491     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2492         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2493     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2494     // IV = LB;
2495     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2496     // while (idx <= UB) { BODY; ++idx; }
2497     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2498                          [](CodeGenFunction &) {});
2499     // Tell the runtime we are done.
2500     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2501       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2502                                                      S.getDirectiveKind());
2503     };
2504     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2505     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2506     // Emit post-update of the reduction variables if IsLastIter != 0.
2507     emitPostUpdateForReductionClause(
2508         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2509           return CGF.Builder.CreateIsNotNull(
2510               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2511         });
2512 
2513     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2514     if (HasLastprivates)
2515       CGF.EmitOMPLastprivateClauseFinal(
2516           S, /*NoFinals=*/false,
2517           CGF.Builder.CreateIsNotNull(
2518               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2519   };
2520 
2521   bool HasCancel = false;
2522   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2523     HasCancel = OSD->hasCancel();
2524   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2525     HasCancel = OPSD->hasCancel();
2526   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2527   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2528                                               HasCancel);
2529   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2530   // clause. Otherwise the barrier will be generated by the codegen for the
2531   // directive.
2532   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2533     // Emit implicit barrier to synchronize threads and avoid data races on
2534     // initialization of firstprivate variables.
2535     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2536                                            OMPD_unknown);
2537   }
2538 }
2539 
2540 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2541   {
2542     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2543     EmitSections(S);
2544   }
2545   // Emit an implicit barrier at the end.
2546   if (!S.getSingleClause<OMPNowaitClause>()) {
2547     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2548                                            OMPD_sections);
2549   }
2550 }
2551 
2552 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2553   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2554     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2555   };
2556   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2557   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2558                                               S.hasCancel());
2559 }
2560 
2561 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2562   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2563   llvm::SmallVector<const Expr *, 8> DestExprs;
2564   llvm::SmallVector<const Expr *, 8> SrcExprs;
2565   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2566   // Check if there are any 'copyprivate' clauses associated with this
2567   // 'single' construct.
2568   // Build a list of copyprivate variables along with helper expressions
2569   // (<source>, <destination>, <destination>=<source> expressions)
2570   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2571     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2572     DestExprs.append(C->destination_exprs().begin(),
2573                      C->destination_exprs().end());
2574     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2575     AssignmentOps.append(C->assignment_ops().begin(),
2576                          C->assignment_ops().end());
2577   }
2578   // Emit code for 'single' region along with 'copyprivate' clauses
2579   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2580     Action.Enter(CGF);
2581     OMPPrivateScope SingleScope(CGF);
2582     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2583     CGF.EmitOMPPrivateClause(S, SingleScope);
2584     (void)SingleScope.Privatize();
2585     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2586   };
2587   {
2588     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2589     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2590                                             CopyprivateVars, DestExprs,
2591                                             SrcExprs, AssignmentOps);
2592   }
2593   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2594   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2595   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2596     CGM.getOpenMPRuntime().emitBarrierCall(
2597         *this, S.getLocStart(),
2598         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2599   }
2600 }
2601 
2602 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2603   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2604     Action.Enter(CGF);
2605     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2606   };
2607   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2608   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2609 }
2610 
2611 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2612   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2613     Action.Enter(CGF);
2614     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2615   };
2616   Expr *Hint = nullptr;
2617   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2618     Hint = HintClause->getHint();
2619   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2620   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2621                                             S.getDirectiveName().getAsString(),
2622                                             CodeGen, S.getLocStart(), Hint);
2623 }
2624 
2625 void CodeGenFunction::EmitOMPParallelForDirective(
2626     const OMPParallelForDirective &S) {
2627   // Emit directive as a combined directive that consists of two implicit
2628   // directives: 'parallel' with 'for' directive.
2629   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2630     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2631     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2632                                emitDispatchForLoopBounds);
2633   };
2634   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2635                                  emitEmptyBoundParameters);
2636 }
2637 
2638 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2639     const OMPParallelForSimdDirective &S) {
2640   // Emit directive as a combined directive that consists of two implicit
2641   // directives: 'parallel' with 'for' directive.
2642   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2643     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2644                                emitDispatchForLoopBounds);
2645   };
2646   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2647                                  emitEmptyBoundParameters);
2648 }
2649 
2650 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2651     const OMPParallelSectionsDirective &S) {
2652   // Emit directive as a combined directive that consists of two implicit
2653   // directives: 'parallel' with 'sections' directive.
2654   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2655     CGF.EmitSections(S);
2656   };
2657   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2658                                  emitEmptyBoundParameters);
2659 }
2660 
2661 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2662                                                 const RegionCodeGenTy &BodyGen,
2663                                                 const TaskGenTy &TaskGen,
2664                                                 OMPTaskDataTy &Data) {
2665   // Emit outlined function for task construct.
2666   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2667   auto *I = CS->getCapturedDecl()->param_begin();
2668   auto *PartId = std::next(I);
2669   auto *TaskT = std::next(I, 4);
2670   // Check if the task is final
2671   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2672     // If the condition constant folds and can be elided, try to avoid emitting
2673     // the condition and the dead arm of the if/else.
2674     auto *Cond = Clause->getCondition();
2675     bool CondConstant;
2676     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2677       Data.Final.setInt(CondConstant);
2678     else
2679       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2680   } else {
2681     // By default the task is not final.
2682     Data.Final.setInt(/*IntVal=*/false);
2683   }
2684   // Check if the task has 'priority' clause.
2685   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2686     auto *Prio = Clause->getPriority();
2687     Data.Priority.setInt(/*IntVal=*/true);
2688     Data.Priority.setPointer(EmitScalarConversion(
2689         EmitScalarExpr(Prio), Prio->getType(),
2690         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2691         Prio->getExprLoc()));
2692   }
2693   // The first function argument for tasks is a thread id, the second one is a
2694   // part id (0 for tied tasks, >=0 for untied task).
2695   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2696   // Get list of private variables.
2697   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2698     auto IRef = C->varlist_begin();
2699     for (auto *IInit : C->private_copies()) {
2700       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2701       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2702         Data.PrivateVars.push_back(*IRef);
2703         Data.PrivateCopies.push_back(IInit);
2704       }
2705       ++IRef;
2706     }
2707   }
2708   EmittedAsPrivate.clear();
2709   // Get list of firstprivate variables.
2710   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2711     auto IRef = C->varlist_begin();
2712     auto IElemInitRef = C->inits().begin();
2713     for (auto *IInit : C->private_copies()) {
2714       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2715       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2716         Data.FirstprivateVars.push_back(*IRef);
2717         Data.FirstprivateCopies.push_back(IInit);
2718         Data.FirstprivateInits.push_back(*IElemInitRef);
2719       }
2720       ++IRef;
2721       ++IElemInitRef;
2722     }
2723   }
2724   // Get list of lastprivate variables (for taskloops).
2725   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2726   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2727     auto IRef = C->varlist_begin();
2728     auto ID = C->destination_exprs().begin();
2729     for (auto *IInit : C->private_copies()) {
2730       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2731       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2732         Data.LastprivateVars.push_back(*IRef);
2733         Data.LastprivateCopies.push_back(IInit);
2734       }
2735       LastprivateDstsOrigs.insert(
2736           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2737            cast<DeclRefExpr>(*IRef)});
2738       ++IRef;
2739       ++ID;
2740     }
2741   }
2742   SmallVector<const Expr *, 4> LHSs;
2743   SmallVector<const Expr *, 4> RHSs;
2744   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2745     auto IPriv = C->privates().begin();
2746     auto IRed = C->reduction_ops().begin();
2747     auto ILHS = C->lhs_exprs().begin();
2748     auto IRHS = C->rhs_exprs().begin();
2749     for (const auto *Ref : C->varlists()) {
2750       Data.ReductionVars.emplace_back(Ref);
2751       Data.ReductionCopies.emplace_back(*IPriv);
2752       Data.ReductionOps.emplace_back(*IRed);
2753       LHSs.emplace_back(*ILHS);
2754       RHSs.emplace_back(*IRHS);
2755       std::advance(IPriv, 1);
2756       std::advance(IRed, 1);
2757       std::advance(ILHS, 1);
2758       std::advance(IRHS, 1);
2759     }
2760   }
2761   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2762       *this, S.getLocStart(), LHSs, RHSs, Data);
2763   // Build list of dependences.
2764   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2765     for (auto *IRef : C->varlists())
2766       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2767   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2768       CodeGenFunction &CGF, PrePostActionTy &Action) {
2769     // Set proper addresses for generated private copies.
2770     OMPPrivateScope Scope(CGF);
2771     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2772         !Data.LastprivateVars.empty()) {
2773       enum { PrivatesParam = 2, CopyFnParam = 3 };
2774       auto *CopyFn = CGF.Builder.CreateLoad(
2775           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2776       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2777           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2778       // Map privates.
2779       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2780       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2781       CallArgs.push_back(PrivatesPtr);
2782       for (auto *E : Data.PrivateVars) {
2783         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2784         Address PrivatePtr = CGF.CreateMemTemp(
2785             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2786         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2787         CallArgs.push_back(PrivatePtr.getPointer());
2788       }
2789       for (auto *E : Data.FirstprivateVars) {
2790         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2791         Address PrivatePtr =
2792             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2793                               ".firstpriv.ptr.addr");
2794         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2795         CallArgs.push_back(PrivatePtr.getPointer());
2796       }
2797       for (auto *E : Data.LastprivateVars) {
2798         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2799         Address PrivatePtr =
2800             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2801                               ".lastpriv.ptr.addr");
2802         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2803         CallArgs.push_back(PrivatePtr.getPointer());
2804       }
2805       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2806                                                           CopyFn, CallArgs);
2807       for (auto &&Pair : LastprivateDstsOrigs) {
2808         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2809         DeclRefExpr DRE(
2810             const_cast<VarDecl *>(OrigVD),
2811             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2812                 OrigVD) != nullptr,
2813             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2814         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2815           return CGF.EmitLValue(&DRE).getAddress();
2816         });
2817       }
2818       for (auto &&Pair : PrivatePtrs) {
2819         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2820                             CGF.getContext().getDeclAlign(Pair.first));
2821         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2822       }
2823     }
2824     if (Data.Reductions) {
2825       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2826       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2827                              Data.ReductionOps);
2828       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2829           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2830       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2831         RedCG.emitSharedLValue(CGF, Cnt);
2832         RedCG.emitAggregateType(CGF, Cnt);
2833         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2834             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2835         Replacement =
2836             Address(CGF.EmitScalarConversion(
2837                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2838                         CGF.getContext().getPointerType(
2839                             Data.ReductionCopies[Cnt]->getType()),
2840                         SourceLocation()),
2841                     Replacement.getAlignment());
2842         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2843         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2844                          [Replacement]() { return Replacement; });
2845         // FIXME: This must removed once the runtime library is fixed.
2846         // Emit required threadprivate variables for
2847         // initilizer/combiner/finalizer.
2848         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2849                                                            RedCG, Cnt);
2850       }
2851     }
2852     // Privatize all private variables except for in_reduction items.
2853     (void)Scope.Privatize();
2854     SmallVector<const Expr *, 4> InRedVars;
2855     SmallVector<const Expr *, 4> InRedPrivs;
2856     SmallVector<const Expr *, 4> InRedOps;
2857     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2858     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2859       auto IPriv = C->privates().begin();
2860       auto IRed = C->reduction_ops().begin();
2861       auto ITD = C->taskgroup_descriptors().begin();
2862       for (const auto *Ref : C->varlists()) {
2863         InRedVars.emplace_back(Ref);
2864         InRedPrivs.emplace_back(*IPriv);
2865         InRedOps.emplace_back(*IRed);
2866         TaskgroupDescriptors.emplace_back(*ITD);
2867         std::advance(IPriv, 1);
2868         std::advance(IRed, 1);
2869         std::advance(ITD, 1);
2870       }
2871     }
2872     // Privatize in_reduction items here, because taskgroup descriptors must be
2873     // privatized earlier.
2874     OMPPrivateScope InRedScope(CGF);
2875     if (!InRedVars.empty()) {
2876       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2877       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2878         RedCG.emitSharedLValue(CGF, Cnt);
2879         RedCG.emitAggregateType(CGF, Cnt);
2880         // The taskgroup descriptor variable is always implicit firstprivate and
2881         // privatized already during procoessing of the firstprivates.
2882         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2883             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2884         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2885             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2886         Replacement = Address(
2887             CGF.EmitScalarConversion(
2888                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2889                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2890                 SourceLocation()),
2891             Replacement.getAlignment());
2892         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2893         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2894                               [Replacement]() { return Replacement; });
2895         // FIXME: This must removed once the runtime library is fixed.
2896         // Emit required threadprivate variables for
2897         // initilizer/combiner/finalizer.
2898         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2899                                                            RedCG, Cnt);
2900       }
2901     }
2902     (void)InRedScope.Privatize();
2903 
2904     Action.Enter(CGF);
2905     BodyGen(CGF);
2906   };
2907   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2908       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2909       Data.NumberOfParts);
2910   OMPLexicalScope Scope(*this, S);
2911   TaskGen(*this, OutlinedFn, Data);
2912 }
2913 
2914 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2915   // Emit outlined function for task construct.
2916   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2917   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2918   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2919   const Expr *IfCond = nullptr;
2920   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2921     if (C->getNameModifier() == OMPD_unknown ||
2922         C->getNameModifier() == OMPD_task) {
2923       IfCond = C->getCondition();
2924       break;
2925     }
2926   }
2927 
2928   OMPTaskDataTy Data;
2929   // Check if we should emit tied or untied task.
2930   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2931   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2932     CGF.EmitStmt(CS->getCapturedStmt());
2933   };
2934   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2935                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2936                             const OMPTaskDataTy &Data) {
2937     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2938                                             SharedsTy, CapturedStruct, IfCond,
2939                                             Data);
2940   };
2941   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2942 }
2943 
2944 void CodeGenFunction::EmitOMPTaskyieldDirective(
2945     const OMPTaskyieldDirective &S) {
2946   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2947 }
2948 
2949 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2950   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2951 }
2952 
2953 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2954   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2955 }
2956 
2957 void CodeGenFunction::EmitOMPTaskgroupDirective(
2958     const OMPTaskgroupDirective &S) {
2959   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2960     Action.Enter(CGF);
2961     if (const Expr *E = S.getReductionRef()) {
2962       SmallVector<const Expr *, 4> LHSs;
2963       SmallVector<const Expr *, 4> RHSs;
2964       OMPTaskDataTy Data;
2965       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2966         auto IPriv = C->privates().begin();
2967         auto IRed = C->reduction_ops().begin();
2968         auto ILHS = C->lhs_exprs().begin();
2969         auto IRHS = C->rhs_exprs().begin();
2970         for (const auto *Ref : C->varlists()) {
2971           Data.ReductionVars.emplace_back(Ref);
2972           Data.ReductionCopies.emplace_back(*IPriv);
2973           Data.ReductionOps.emplace_back(*IRed);
2974           LHSs.emplace_back(*ILHS);
2975           RHSs.emplace_back(*IRHS);
2976           std::advance(IPriv, 1);
2977           std::advance(IRed, 1);
2978           std::advance(ILHS, 1);
2979           std::advance(IRHS, 1);
2980         }
2981       }
2982       llvm::Value *ReductionDesc =
2983           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
2984                                                            LHSs, RHSs, Data);
2985       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2986       CGF.EmitVarDecl(*VD);
2987       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
2988                             /*Volatile=*/false, E->getType());
2989     }
2990     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2991   };
2992   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2993   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2994 }
2995 
2996 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2997   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2998     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2999       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3000                                 FlushClause->varlist_end());
3001     }
3002     return llvm::None;
3003   }(), S.getLocStart());
3004 }
3005 
3006 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3007                                             const CodeGenLoopTy &CodeGenLoop,
3008                                             Expr *IncExpr) {
3009   // Emit the loop iteration variable.
3010   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3011   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3012   EmitVarDecl(*IVDecl);
3013 
3014   // Emit the iterations count variable.
3015   // If it is not a variable, Sema decided to calculate iterations count on each
3016   // iteration (e.g., it is foldable into a constant).
3017   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3018     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3019     // Emit calculation of the iterations count.
3020     EmitIgnoredExpr(S.getCalcLastIteration());
3021   }
3022 
3023   auto &RT = CGM.getOpenMPRuntime();
3024 
3025   bool HasLastprivateClause = false;
3026   // Check pre-condition.
3027   {
3028     OMPLoopScope PreInitScope(*this, S);
3029     // Skip the entire loop if we don't meet the precondition.
3030     // If the condition constant folds and can be elided, avoid emitting the
3031     // whole loop.
3032     bool CondConstant;
3033     llvm::BasicBlock *ContBlock = nullptr;
3034     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3035       if (!CondConstant)
3036         return;
3037     } else {
3038       auto *ThenBlock = createBasicBlock("omp.precond.then");
3039       ContBlock = createBasicBlock("omp.precond.end");
3040       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3041                   getProfileCount(&S));
3042       EmitBlock(ThenBlock);
3043       incrementProfileCounter(&S);
3044     }
3045 
3046     // Emit 'then' code.
3047     {
3048       // Emit helper vars inits.
3049 
3050       LValue LB = EmitOMPHelperVar(
3051           *this, cast<DeclRefExpr>(
3052                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3053                           ? S.getCombinedLowerBoundVariable()
3054                           : S.getLowerBoundVariable())));
3055       LValue UB = EmitOMPHelperVar(
3056           *this, cast<DeclRefExpr>(
3057                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3058                           ? S.getCombinedUpperBoundVariable()
3059                           : S.getUpperBoundVariable())));
3060       LValue ST =
3061           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3062       LValue IL =
3063           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3064 
3065       OMPPrivateScope LoopScope(*this);
3066       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3067         // Emit implicit barrier to synchronize threads and avoid data races on
3068         // initialization of firstprivate variables and post-update of
3069         // lastprivate variables.
3070         CGM.getOpenMPRuntime().emitBarrierCall(
3071           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3072           /*ForceSimpleCall=*/true);
3073       }
3074       EmitOMPPrivateClause(S, LoopScope);
3075       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3076       EmitOMPPrivateLoopCounters(S, LoopScope);
3077       (void)LoopScope.Privatize();
3078 
3079       // Detect the distribute schedule kind and chunk.
3080       llvm::Value *Chunk = nullptr;
3081       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3082       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3083         ScheduleKind = C->getDistScheduleKind();
3084         if (const auto *Ch = C->getChunkSize()) {
3085           Chunk = EmitScalarExpr(Ch);
3086           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3087           S.getIterationVariable()->getType(),
3088           S.getLocStart());
3089         }
3090       }
3091       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3092       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3093 
3094       // OpenMP [2.10.8, distribute Construct, Description]
3095       // If dist_schedule is specified, kind must be static. If specified,
3096       // iterations are divided into chunks of size chunk_size, chunks are
3097       // assigned to the teams of the league in a round-robin fashion in the
3098       // order of the team number. When no chunk_size is specified, the
3099       // iteration space is divided into chunks that are approximately equal
3100       // in size, and at most one chunk is distributed to each team of the
3101       // league. The size of the chunks is unspecified in this case.
3102       if (RT.isStaticNonchunked(ScheduleKind,
3103                                 /* Chunked */ Chunk != nullptr)) {
3104         CGOpenMPRuntime::StaticRTInput StaticInit(
3105             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3106             LB.getAddress(), UB.getAddress(), ST.getAddress());
3107         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3108                                     StaticInit);
3109         auto LoopExit =
3110             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3111         // UB = min(UB, GlobalUB);
3112         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3113                             ? S.getCombinedEnsureUpperBound()
3114                             : S.getEnsureUpperBound());
3115         // IV = LB;
3116         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3117                             ? S.getCombinedInit()
3118                             : S.getInit());
3119 
3120         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3121                          ? S.getCombinedCond()
3122                          : S.getCond();
3123 
3124         // for distribute alone,  codegen
3125         // while (idx <= UB) { BODY; ++idx; }
3126         // when combined with 'for' (e.g. as in 'distribute parallel for')
3127         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3128         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3129                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3130                            CodeGenLoop(CGF, S, LoopExit);
3131                          },
3132                          [](CodeGenFunction &) {});
3133         EmitBlock(LoopExit.getBlock());
3134         // Tell the runtime we are done.
3135         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3136       } else {
3137         // Emit the outer loop, which requests its work chunk [LB..UB] from
3138         // runtime and runs the inner loop to process it.
3139         const OMPLoopArguments LoopArguments = {
3140             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3141             Chunk};
3142         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3143                                    CodeGenLoop);
3144       }
3145 
3146       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3147       if (HasLastprivateClause)
3148         EmitOMPLastprivateClauseFinal(
3149             S, /*NoFinals=*/false,
3150             Builder.CreateIsNotNull(
3151                 EmitLoadOfScalar(IL, S.getLocStart())));
3152     }
3153 
3154     // We're now done with the loop, so jump to the continuation block.
3155     if (ContBlock) {
3156       EmitBranch(ContBlock);
3157       EmitBlock(ContBlock, true);
3158     }
3159   }
3160 }
3161 
3162 void CodeGenFunction::EmitOMPDistributeDirective(
3163     const OMPDistributeDirective &S) {
3164   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3165 
3166     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3167   };
3168   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3169   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3170                                               false);
3171 }
3172 
3173 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3174                                                    const CapturedStmt *S) {
3175   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3176   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3177   CGF.CapturedStmtInfo = &CapStmtInfo;
3178   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3179   Fn->addFnAttr(llvm::Attribute::NoInline);
3180   return Fn;
3181 }
3182 
3183 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3184   if (!S.getAssociatedStmt()) {
3185     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3186       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3187     return;
3188   }
3189   auto *C = S.getSingleClause<OMPSIMDClause>();
3190   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3191                                  PrePostActionTy &Action) {
3192     if (C) {
3193       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3194       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3195       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3196       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3197       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3198                                                       OutlinedFn, CapturedVars);
3199     } else {
3200       Action.Enter(CGF);
3201       CGF.EmitStmt(
3202           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3203     }
3204   };
3205   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3206   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3207 }
3208 
3209 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3210                                          QualType SrcType, QualType DestType,
3211                                          SourceLocation Loc) {
3212   assert(CGF.hasScalarEvaluationKind(DestType) &&
3213          "DestType must have scalar evaluation kind.");
3214   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3215   return Val.isScalar()
3216              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3217                                         Loc)
3218              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3219                                                  DestType, Loc);
3220 }
3221 
3222 static CodeGenFunction::ComplexPairTy
3223 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3224                       QualType DestType, SourceLocation Loc) {
3225   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3226          "DestType must have complex evaluation kind.");
3227   CodeGenFunction::ComplexPairTy ComplexVal;
3228   if (Val.isScalar()) {
3229     // Convert the input element to the element type of the complex.
3230     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3231     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3232                                               DestElementType, Loc);
3233     ComplexVal = CodeGenFunction::ComplexPairTy(
3234         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3235   } else {
3236     assert(Val.isComplex() && "Must be a scalar or complex.");
3237     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3238     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3239     ComplexVal.first = CGF.EmitScalarConversion(
3240         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3241     ComplexVal.second = CGF.EmitScalarConversion(
3242         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3243   }
3244   return ComplexVal;
3245 }
3246 
3247 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3248                                   LValue LVal, RValue RVal) {
3249   if (LVal.isGlobalReg()) {
3250     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3251   } else {
3252     CGF.EmitAtomicStore(RVal, LVal,
3253                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3254                                  : llvm::AtomicOrdering::Monotonic,
3255                         LVal.isVolatile(), /*IsInit=*/false);
3256   }
3257 }
3258 
3259 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3260                                          QualType RValTy, SourceLocation Loc) {
3261   switch (getEvaluationKind(LVal.getType())) {
3262   case TEK_Scalar:
3263     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3264                                *this, RVal, RValTy, LVal.getType(), Loc)),
3265                            LVal);
3266     break;
3267   case TEK_Complex:
3268     EmitStoreOfComplex(
3269         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3270         /*isInit=*/false);
3271     break;
3272   case TEK_Aggregate:
3273     llvm_unreachable("Must be a scalar or complex.");
3274   }
3275 }
3276 
3277 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3278                                   const Expr *X, const Expr *V,
3279                                   SourceLocation Loc) {
3280   // v = x;
3281   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3282   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3283   LValue XLValue = CGF.EmitLValue(X);
3284   LValue VLValue = CGF.EmitLValue(V);
3285   RValue Res = XLValue.isGlobalReg()
3286                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3287                    : CGF.EmitAtomicLoad(
3288                          XLValue, Loc,
3289                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3290                                   : llvm::AtomicOrdering::Monotonic,
3291                          XLValue.isVolatile());
3292   // OpenMP, 2.12.6, atomic Construct
3293   // Any atomic construct with a seq_cst clause forces the atomically
3294   // performed operation to include an implicit flush operation without a
3295   // list.
3296   if (IsSeqCst)
3297     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3298   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3299 }
3300 
3301 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3302                                    const Expr *X, const Expr *E,
3303                                    SourceLocation Loc) {
3304   // x = expr;
3305   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3306   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3307   // OpenMP, 2.12.6, atomic Construct
3308   // Any atomic construct with a seq_cst clause forces the atomically
3309   // performed operation to include an implicit flush operation without a
3310   // list.
3311   if (IsSeqCst)
3312     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3313 }
3314 
3315 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3316                                                 RValue Update,
3317                                                 BinaryOperatorKind BO,
3318                                                 llvm::AtomicOrdering AO,
3319                                                 bool IsXLHSInRHSPart) {
3320   auto &Context = CGF.CGM.getContext();
3321   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3322   // expression is simple and atomic is allowed for the given type for the
3323   // target platform.
3324   if (BO == BO_Comma || !Update.isScalar() ||
3325       !Update.getScalarVal()->getType()->isIntegerTy() ||
3326       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3327                         (Update.getScalarVal()->getType() !=
3328                          X.getAddress().getElementType())) ||
3329       !X.getAddress().getElementType()->isIntegerTy() ||
3330       !Context.getTargetInfo().hasBuiltinAtomic(
3331           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3332     return std::make_pair(false, RValue::get(nullptr));
3333 
3334   llvm::AtomicRMWInst::BinOp RMWOp;
3335   switch (BO) {
3336   case BO_Add:
3337     RMWOp = llvm::AtomicRMWInst::Add;
3338     break;
3339   case BO_Sub:
3340     if (!IsXLHSInRHSPart)
3341       return std::make_pair(false, RValue::get(nullptr));
3342     RMWOp = llvm::AtomicRMWInst::Sub;
3343     break;
3344   case BO_And:
3345     RMWOp = llvm::AtomicRMWInst::And;
3346     break;
3347   case BO_Or:
3348     RMWOp = llvm::AtomicRMWInst::Or;
3349     break;
3350   case BO_Xor:
3351     RMWOp = llvm::AtomicRMWInst::Xor;
3352     break;
3353   case BO_LT:
3354     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3355                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3356                                    : llvm::AtomicRMWInst::Max)
3357                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3358                                    : llvm::AtomicRMWInst::UMax);
3359     break;
3360   case BO_GT:
3361     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3362                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3363                                    : llvm::AtomicRMWInst::Min)
3364                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3365                                    : llvm::AtomicRMWInst::UMin);
3366     break;
3367   case BO_Assign:
3368     RMWOp = llvm::AtomicRMWInst::Xchg;
3369     break;
3370   case BO_Mul:
3371   case BO_Div:
3372   case BO_Rem:
3373   case BO_Shl:
3374   case BO_Shr:
3375   case BO_LAnd:
3376   case BO_LOr:
3377     return std::make_pair(false, RValue::get(nullptr));
3378   case BO_PtrMemD:
3379   case BO_PtrMemI:
3380   case BO_LE:
3381   case BO_GE:
3382   case BO_EQ:
3383   case BO_NE:
3384   case BO_AddAssign:
3385   case BO_SubAssign:
3386   case BO_AndAssign:
3387   case BO_OrAssign:
3388   case BO_XorAssign:
3389   case BO_MulAssign:
3390   case BO_DivAssign:
3391   case BO_RemAssign:
3392   case BO_ShlAssign:
3393   case BO_ShrAssign:
3394   case BO_Comma:
3395     llvm_unreachable("Unsupported atomic update operation");
3396   }
3397   auto *UpdateVal = Update.getScalarVal();
3398   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3399     UpdateVal = CGF.Builder.CreateIntCast(
3400         IC, X.getAddress().getElementType(),
3401         X.getType()->hasSignedIntegerRepresentation());
3402   }
3403   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3404   return std::make_pair(true, RValue::get(Res));
3405 }
3406 
3407 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3408     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3409     llvm::AtomicOrdering AO, SourceLocation Loc,
3410     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3411   // Update expressions are allowed to have the following forms:
3412   // x binop= expr; -> xrval + expr;
3413   // x++, ++x -> xrval + 1;
3414   // x--, --x -> xrval - 1;
3415   // x = x binop expr; -> xrval binop expr
3416   // x = expr Op x; - > expr binop xrval;
3417   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3418   if (!Res.first) {
3419     if (X.isGlobalReg()) {
3420       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3421       // 'xrval'.
3422       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3423     } else {
3424       // Perform compare-and-swap procedure.
3425       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3426     }
3427   }
3428   return Res;
3429 }
3430 
3431 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3432                                     const Expr *X, const Expr *E,
3433                                     const Expr *UE, bool IsXLHSInRHSPart,
3434                                     SourceLocation Loc) {
3435   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3436          "Update expr in 'atomic update' must be a binary operator.");
3437   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3438   // Update expressions are allowed to have the following forms:
3439   // x binop= expr; -> xrval + expr;
3440   // x++, ++x -> xrval + 1;
3441   // x--, --x -> xrval - 1;
3442   // x = x binop expr; -> xrval binop expr
3443   // x = expr Op x; - > expr binop xrval;
3444   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3445   LValue XLValue = CGF.EmitLValue(X);
3446   RValue ExprRValue = CGF.EmitAnyExpr(E);
3447   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3448                      : llvm::AtomicOrdering::Monotonic;
3449   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3450   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3451   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3452   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3453   auto Gen =
3454       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3455         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3456         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3457         return CGF.EmitAnyExpr(UE);
3458       };
3459   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3460       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3461   // OpenMP, 2.12.6, atomic Construct
3462   // Any atomic construct with a seq_cst clause forces the atomically
3463   // performed operation to include an implicit flush operation without a
3464   // list.
3465   if (IsSeqCst)
3466     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3467 }
3468 
3469 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3470                             QualType SourceType, QualType ResType,
3471                             SourceLocation Loc) {
3472   switch (CGF.getEvaluationKind(ResType)) {
3473   case TEK_Scalar:
3474     return RValue::get(
3475         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3476   case TEK_Complex: {
3477     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3478     return RValue::getComplex(Res.first, Res.second);
3479   }
3480   case TEK_Aggregate:
3481     break;
3482   }
3483   llvm_unreachable("Must be a scalar or complex.");
3484 }
3485 
3486 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3487                                      bool IsPostfixUpdate, const Expr *V,
3488                                      const Expr *X, const Expr *E,
3489                                      const Expr *UE, bool IsXLHSInRHSPart,
3490                                      SourceLocation Loc) {
3491   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3492   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3493   RValue NewVVal;
3494   LValue VLValue = CGF.EmitLValue(V);
3495   LValue XLValue = CGF.EmitLValue(X);
3496   RValue ExprRValue = CGF.EmitAnyExpr(E);
3497   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3498                      : llvm::AtomicOrdering::Monotonic;
3499   QualType NewVValType;
3500   if (UE) {
3501     // 'x' is updated with some additional value.
3502     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3503            "Update expr in 'atomic capture' must be a binary operator.");
3504     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3505     // Update expressions are allowed to have the following forms:
3506     // x binop= expr; -> xrval + expr;
3507     // x++, ++x -> xrval + 1;
3508     // x--, --x -> xrval - 1;
3509     // x = x binop expr; -> xrval binop expr
3510     // x = expr Op x; - > expr binop xrval;
3511     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3512     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3513     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3514     NewVValType = XRValExpr->getType();
3515     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3516     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3517                   IsPostfixUpdate](RValue XRValue) -> RValue {
3518       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3519       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3520       RValue Res = CGF.EmitAnyExpr(UE);
3521       NewVVal = IsPostfixUpdate ? XRValue : Res;
3522       return Res;
3523     };
3524     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3525         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3526     if (Res.first) {
3527       // 'atomicrmw' instruction was generated.
3528       if (IsPostfixUpdate) {
3529         // Use old value from 'atomicrmw'.
3530         NewVVal = Res.second;
3531       } else {
3532         // 'atomicrmw' does not provide new value, so evaluate it using old
3533         // value of 'x'.
3534         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3535         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3536         NewVVal = CGF.EmitAnyExpr(UE);
3537       }
3538     }
3539   } else {
3540     // 'x' is simply rewritten with some 'expr'.
3541     NewVValType = X->getType().getNonReferenceType();
3542     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3543                                X->getType().getNonReferenceType(), Loc);
3544     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3545       NewVVal = XRValue;
3546       return ExprRValue;
3547     };
3548     // Try to perform atomicrmw xchg, otherwise simple exchange.
3549     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3550         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3551         Loc, Gen);
3552     if (Res.first) {
3553       // 'atomicrmw' instruction was generated.
3554       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3555     }
3556   }
3557   // Emit post-update store to 'v' of old/new 'x' value.
3558   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3559   // OpenMP, 2.12.6, atomic Construct
3560   // Any atomic construct with a seq_cst clause forces the atomically
3561   // performed operation to include an implicit flush operation without a
3562   // list.
3563   if (IsSeqCst)
3564     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3565 }
3566 
3567 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3568                               bool IsSeqCst, bool IsPostfixUpdate,
3569                               const Expr *X, const Expr *V, const Expr *E,
3570                               const Expr *UE, bool IsXLHSInRHSPart,
3571                               SourceLocation Loc) {
3572   switch (Kind) {
3573   case OMPC_read:
3574     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3575     break;
3576   case OMPC_write:
3577     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3578     break;
3579   case OMPC_unknown:
3580   case OMPC_update:
3581     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3582     break;
3583   case OMPC_capture:
3584     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3585                              IsXLHSInRHSPart, Loc);
3586     break;
3587   case OMPC_if:
3588   case OMPC_final:
3589   case OMPC_num_threads:
3590   case OMPC_private:
3591   case OMPC_firstprivate:
3592   case OMPC_lastprivate:
3593   case OMPC_reduction:
3594   case OMPC_task_reduction:
3595   case OMPC_in_reduction:
3596   case OMPC_safelen:
3597   case OMPC_simdlen:
3598   case OMPC_collapse:
3599   case OMPC_default:
3600   case OMPC_seq_cst:
3601   case OMPC_shared:
3602   case OMPC_linear:
3603   case OMPC_aligned:
3604   case OMPC_copyin:
3605   case OMPC_copyprivate:
3606   case OMPC_flush:
3607   case OMPC_proc_bind:
3608   case OMPC_schedule:
3609   case OMPC_ordered:
3610   case OMPC_nowait:
3611   case OMPC_untied:
3612   case OMPC_threadprivate:
3613   case OMPC_depend:
3614   case OMPC_mergeable:
3615   case OMPC_device:
3616   case OMPC_threads:
3617   case OMPC_simd:
3618   case OMPC_map:
3619   case OMPC_num_teams:
3620   case OMPC_thread_limit:
3621   case OMPC_priority:
3622   case OMPC_grainsize:
3623   case OMPC_nogroup:
3624   case OMPC_num_tasks:
3625   case OMPC_hint:
3626   case OMPC_dist_schedule:
3627   case OMPC_defaultmap:
3628   case OMPC_uniform:
3629   case OMPC_to:
3630   case OMPC_from:
3631   case OMPC_use_device_ptr:
3632   case OMPC_is_device_ptr:
3633     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3634   }
3635 }
3636 
3637 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3638   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3639   OpenMPClauseKind Kind = OMPC_unknown;
3640   for (auto *C : S.clauses()) {
3641     // Find first clause (skip seq_cst clause, if it is first).
3642     if (C->getClauseKind() != OMPC_seq_cst) {
3643       Kind = C->getClauseKind();
3644       break;
3645     }
3646   }
3647 
3648   const auto *CS =
3649       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3650   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3651     enterFullExpression(EWC);
3652   }
3653   // Processing for statements under 'atomic capture'.
3654   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3655     for (const auto *C : Compound->body()) {
3656       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3657         enterFullExpression(EWC);
3658       }
3659     }
3660   }
3661 
3662   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3663                                             PrePostActionTy &) {
3664     CGF.EmitStopPoint(CS);
3665     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3666                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3667                       S.isXLHSInRHSPart(), S.getLocStart());
3668   };
3669   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3670   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3671 }
3672 
3673 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3674                                          const OMPExecutableDirective &S,
3675                                          const RegionCodeGenTy &CodeGen) {
3676   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3677   CodeGenModule &CGM = CGF.CGM;
3678   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3679 
3680   llvm::Function *Fn = nullptr;
3681   llvm::Constant *FnID = nullptr;
3682 
3683   const Expr *IfCond = nullptr;
3684   // Check for the at most one if clause associated with the target region.
3685   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3686     if (C->getNameModifier() == OMPD_unknown ||
3687         C->getNameModifier() == OMPD_target) {
3688       IfCond = C->getCondition();
3689       break;
3690     }
3691   }
3692 
3693   // Check if we have any device clause associated with the directive.
3694   const Expr *Device = nullptr;
3695   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3696     Device = C->getDevice();
3697   }
3698 
3699   // Check if we have an if clause whose conditional always evaluates to false
3700   // or if we do not have any targets specified. If so the target region is not
3701   // an offload entry point.
3702   bool IsOffloadEntry = true;
3703   if (IfCond) {
3704     bool Val;
3705     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3706       IsOffloadEntry = false;
3707   }
3708   if (CGM.getLangOpts().OMPTargetTriples.empty())
3709     IsOffloadEntry = false;
3710 
3711   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3712   StringRef ParentName;
3713   // In case we have Ctors/Dtors we use the complete type variant to produce
3714   // the mangling of the device outlined kernel.
3715   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3716     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3717   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3718     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3719   else
3720     ParentName =
3721         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3722 
3723   // Emit target region as a standalone region.
3724   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3725                                                     IsOffloadEntry, CodeGen);
3726   OMPLexicalScope Scope(CGF, S);
3727   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3728   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3729   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3730                                         CapturedVars);
3731 }
3732 
3733 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3734                              PrePostActionTy &Action) {
3735   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3736   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3737   CGF.EmitOMPPrivateClause(S, PrivateScope);
3738   (void)PrivateScope.Privatize();
3739 
3740   Action.Enter(CGF);
3741   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3742 }
3743 
3744 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3745                                                   StringRef ParentName,
3746                                                   const OMPTargetDirective &S) {
3747   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3748     emitTargetRegion(CGF, S, Action);
3749   };
3750   llvm::Function *Fn;
3751   llvm::Constant *Addr;
3752   // Emit target region as a standalone region.
3753   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3754       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3755   assert(Fn && Addr && "Target device function emission failed.");
3756 }
3757 
3758 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3759   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3760     emitTargetRegion(CGF, S, Action);
3761   };
3762   emitCommonOMPTargetDirective(*this, S, CodeGen);
3763 }
3764 
3765 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3766                                         const OMPExecutableDirective &S,
3767                                         OpenMPDirectiveKind InnermostKind,
3768                                         const RegionCodeGenTy &CodeGen) {
3769   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3770   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3771       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3772 
3773   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3774   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3775   if (NT || TL) {
3776     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3777     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3778 
3779     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3780                                                   S.getLocStart());
3781   }
3782 
3783   OMPTeamsScope Scope(CGF, S);
3784   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3785   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3786   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3787                                            CapturedVars);
3788 }
3789 
3790 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3791   // Emit teams region as a standalone region.
3792   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3793     OMPPrivateScope PrivateScope(CGF);
3794     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3795     CGF.EmitOMPPrivateClause(S, PrivateScope);
3796     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3797     (void)PrivateScope.Privatize();
3798     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3799     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3800   };
3801   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3802   emitPostUpdateForReductionClause(
3803       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3804 }
3805 
3806 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3807                                   const OMPTargetTeamsDirective &S) {
3808   auto *CS = S.getCapturedStmt(OMPD_teams);
3809   Action.Enter(CGF);
3810   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3811     // TODO: Add support for clauses.
3812     CGF.EmitStmt(CS->getCapturedStmt());
3813   };
3814   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3815 }
3816 
3817 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3818     CodeGenModule &CGM, StringRef ParentName,
3819     const OMPTargetTeamsDirective &S) {
3820   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3821     emitTargetTeamsRegion(CGF, Action, S);
3822   };
3823   llvm::Function *Fn;
3824   llvm::Constant *Addr;
3825   // Emit target region as a standalone region.
3826   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3827       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3828   assert(Fn && Addr && "Target device function emission failed.");
3829 }
3830 
3831 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3832     const OMPTargetTeamsDirective &S) {
3833   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3834     emitTargetTeamsRegion(CGF, Action, S);
3835   };
3836   emitCommonOMPTargetDirective(*this, S, CodeGen);
3837 }
3838 
3839 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
3840     const OMPTeamsDistributeDirective &S) {
3841 
3842   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3843     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3844   };
3845 
3846   // Emit teams region as a standalone region.
3847   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3848                                             PrePostActionTy &) {
3849     OMPPrivateScope PrivateScope(CGF);
3850     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3851     (void)PrivateScope.Privatize();
3852     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3853                                                     CodeGenDistribute);
3854     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3855   };
3856   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3857   emitPostUpdateForReductionClause(*this, S,
3858                                    [](CodeGenFunction &) { return nullptr; });
3859 }
3860 
3861 void CodeGenFunction::EmitOMPCancellationPointDirective(
3862     const OMPCancellationPointDirective &S) {
3863   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3864                                                    S.getCancelRegion());
3865 }
3866 
3867 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3868   const Expr *IfCond = nullptr;
3869   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3870     if (C->getNameModifier() == OMPD_unknown ||
3871         C->getNameModifier() == OMPD_cancel) {
3872       IfCond = C->getCondition();
3873       break;
3874     }
3875   }
3876   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3877                                         S.getCancelRegion());
3878 }
3879 
3880 CodeGenFunction::JumpDest
3881 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3882   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3883       Kind == OMPD_target_parallel)
3884     return ReturnBlock;
3885   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3886          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3887          Kind == OMPD_distribute_parallel_for ||
3888          Kind == OMPD_target_parallel_for);
3889   return OMPCancelStack.getExitBlock();
3890 }
3891 
3892 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3893     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3894     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3895   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3896   auto OrigVarIt = C.varlist_begin();
3897   auto InitIt = C.inits().begin();
3898   for (auto PvtVarIt : C.private_copies()) {
3899     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3900     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3901     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3902 
3903     // In order to identify the right initializer we need to match the
3904     // declaration used by the mapping logic. In some cases we may get
3905     // OMPCapturedExprDecl that refers to the original declaration.
3906     const ValueDecl *MatchingVD = OrigVD;
3907     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3908       // OMPCapturedExprDecl are used to privative fields of the current
3909       // structure.
3910       auto *ME = cast<MemberExpr>(OED->getInit());
3911       assert(isa<CXXThisExpr>(ME->getBase()) &&
3912              "Base should be the current struct!");
3913       MatchingVD = ME->getMemberDecl();
3914     }
3915 
3916     // If we don't have information about the current list item, move on to
3917     // the next one.
3918     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3919     if (InitAddrIt == CaptureDeviceAddrMap.end())
3920       continue;
3921 
3922     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3923       // Initialize the temporary initialization variable with the address we
3924       // get from the runtime library. We have to cast the source address
3925       // because it is always a void *. References are materialized in the
3926       // privatization scope, so the initialization here disregards the fact
3927       // the original variable is a reference.
3928       QualType AddrQTy =
3929           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3930       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3931       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3932       setAddrOfLocalVar(InitVD, InitAddr);
3933 
3934       // Emit private declaration, it will be initialized by the value we
3935       // declaration we just added to the local declarations map.
3936       EmitDecl(*PvtVD);
3937 
3938       // The initialization variables reached its purpose in the emission
3939       // ofthe previous declaration, so we don't need it anymore.
3940       LocalDeclMap.erase(InitVD);
3941 
3942       // Return the address of the private variable.
3943       return GetAddrOfLocalVar(PvtVD);
3944     });
3945     assert(IsRegistered && "firstprivate var already registered as private");
3946     // Silence the warning about unused variable.
3947     (void)IsRegistered;
3948 
3949     ++OrigVarIt;
3950     ++InitIt;
3951   }
3952 }
3953 
3954 // Generate the instructions for '#pragma omp target data' directive.
3955 void CodeGenFunction::EmitOMPTargetDataDirective(
3956     const OMPTargetDataDirective &S) {
3957   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3958 
3959   // Create a pre/post action to signal the privatization of the device pointer.
3960   // This action can be replaced by the OpenMP runtime code generation to
3961   // deactivate privatization.
3962   bool PrivatizeDevicePointers = false;
3963   class DevicePointerPrivActionTy : public PrePostActionTy {
3964     bool &PrivatizeDevicePointers;
3965 
3966   public:
3967     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3968         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3969     void Enter(CodeGenFunction &CGF) override {
3970       PrivatizeDevicePointers = true;
3971     }
3972   };
3973   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3974 
3975   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3976       CodeGenFunction &CGF, PrePostActionTy &Action) {
3977     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3978       CGF.EmitStmt(
3979           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3980     };
3981 
3982     // Codegen that selects wheather to generate the privatization code or not.
3983     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3984                           &InnermostCodeGen](CodeGenFunction &CGF,
3985                                              PrePostActionTy &Action) {
3986       RegionCodeGenTy RCG(InnermostCodeGen);
3987       PrivatizeDevicePointers = false;
3988 
3989       // Call the pre-action to change the status of PrivatizeDevicePointers if
3990       // needed.
3991       Action.Enter(CGF);
3992 
3993       if (PrivatizeDevicePointers) {
3994         OMPPrivateScope PrivateScope(CGF);
3995         // Emit all instances of the use_device_ptr clause.
3996         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
3997           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
3998                                         Info.CaptureDeviceAddrMap);
3999         (void)PrivateScope.Privatize();
4000         RCG(CGF);
4001       } else
4002         RCG(CGF);
4003     };
4004 
4005     // Forward the provided action to the privatization codegen.
4006     RegionCodeGenTy PrivRCG(PrivCodeGen);
4007     PrivRCG.setAction(Action);
4008 
4009     // Notwithstanding the body of the region is emitted as inlined directive,
4010     // we don't use an inline scope as changes in the references inside the
4011     // region are expected to be visible outside, so we do not privative them.
4012     OMPLexicalScope Scope(CGF, S);
4013     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4014                                                     PrivRCG);
4015   };
4016 
4017   RegionCodeGenTy RCG(CodeGen);
4018 
4019   // If we don't have target devices, don't bother emitting the data mapping
4020   // code.
4021   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4022     RCG(*this);
4023     return;
4024   }
4025 
4026   // Check if we have any if clause associated with the directive.
4027   const Expr *IfCond = nullptr;
4028   if (auto *C = S.getSingleClause<OMPIfClause>())
4029     IfCond = C->getCondition();
4030 
4031   // Check if we have any device clause associated with the directive.
4032   const Expr *Device = nullptr;
4033   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4034     Device = C->getDevice();
4035 
4036   // Set the action to signal privatization of device pointers.
4037   RCG.setAction(PrivAction);
4038 
4039   // Emit region code.
4040   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4041                                              Info);
4042 }
4043 
4044 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4045     const OMPTargetEnterDataDirective &S) {
4046   // If we don't have target devices, don't bother emitting the data mapping
4047   // code.
4048   if (CGM.getLangOpts().OMPTargetTriples.empty())
4049     return;
4050 
4051   // Check if we have any if clause associated with the directive.
4052   const Expr *IfCond = nullptr;
4053   if (auto *C = S.getSingleClause<OMPIfClause>())
4054     IfCond = C->getCondition();
4055 
4056   // Check if we have any device clause associated with the directive.
4057   const Expr *Device = nullptr;
4058   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4059     Device = C->getDevice();
4060 
4061   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4062 }
4063 
4064 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4065     const OMPTargetExitDataDirective &S) {
4066   // If we don't have target devices, don't bother emitting the data mapping
4067   // code.
4068   if (CGM.getLangOpts().OMPTargetTriples.empty())
4069     return;
4070 
4071   // Check if we have any if clause associated with the directive.
4072   const Expr *IfCond = nullptr;
4073   if (auto *C = S.getSingleClause<OMPIfClause>())
4074     IfCond = C->getCondition();
4075 
4076   // Check if we have any device clause associated with the directive.
4077   const Expr *Device = nullptr;
4078   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4079     Device = C->getDevice();
4080 
4081   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4082 }
4083 
4084 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4085                                      const OMPTargetParallelDirective &S,
4086                                      PrePostActionTy &Action) {
4087   // Get the captured statement associated with the 'parallel' region.
4088   auto *CS = S.getCapturedStmt(OMPD_parallel);
4089   Action.Enter(CGF);
4090   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4091     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4092     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4093     CGF.EmitOMPPrivateClause(S, PrivateScope);
4094     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4095     (void)PrivateScope.Privatize();
4096     // TODO: Add support for clauses.
4097     CGF.EmitStmt(CS->getCapturedStmt());
4098     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4099   };
4100   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4101                                  emitEmptyBoundParameters);
4102   emitPostUpdateForReductionClause(
4103       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4104 }
4105 
4106 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4107     CodeGenModule &CGM, StringRef ParentName,
4108     const OMPTargetParallelDirective &S) {
4109   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4110     emitTargetParallelRegion(CGF, S, Action);
4111   };
4112   llvm::Function *Fn;
4113   llvm::Constant *Addr;
4114   // Emit target region as a standalone region.
4115   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4116       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4117   assert(Fn && Addr && "Target device function emission failed.");
4118 }
4119 
4120 void CodeGenFunction::EmitOMPTargetParallelDirective(
4121     const OMPTargetParallelDirective &S) {
4122   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4123     emitTargetParallelRegion(CGF, S, Action);
4124   };
4125   emitCommonOMPTargetDirective(*this, S, CodeGen);
4126 }
4127 
4128 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4129     const OMPTargetParallelForDirective &S) {
4130   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
4131   CGM.getOpenMPRuntime().emitInlinedDirective(
4132       *this, OMPD_target_parallel_for,
4133       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4134         OMPLoopScope PreInitScope(CGF, S);
4135         CGF.EmitStmt(
4136             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
4137       });
4138 }
4139 
4140 /// Emit a helper variable and return corresponding lvalue.
4141 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4142                      const ImplicitParamDecl *PVD,
4143                      CodeGenFunction::OMPPrivateScope &Privates) {
4144   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4145   Privates.addPrivate(
4146       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4147 }
4148 
4149 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4150   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4151   // Emit outlined function for task construct.
4152   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4153   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4154   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4155   const Expr *IfCond = nullptr;
4156   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4157     if (C->getNameModifier() == OMPD_unknown ||
4158         C->getNameModifier() == OMPD_taskloop) {
4159       IfCond = C->getCondition();
4160       break;
4161     }
4162   }
4163 
4164   OMPTaskDataTy Data;
4165   // Check if taskloop must be emitted without taskgroup.
4166   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4167   // TODO: Check if we should emit tied or untied task.
4168   Data.Tied = true;
4169   // Set scheduling for taskloop
4170   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4171     // grainsize clause
4172     Data.Schedule.setInt(/*IntVal=*/false);
4173     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4174   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4175     // num_tasks clause
4176     Data.Schedule.setInt(/*IntVal=*/true);
4177     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4178   }
4179 
4180   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4181     // if (PreCond) {
4182     //   for (IV in 0..LastIteration) BODY;
4183     //   <Final counter/linear vars updates>;
4184     // }
4185     //
4186 
4187     // Emit: if (PreCond) - begin.
4188     // If the condition constant folds and can be elided, avoid emitting the
4189     // whole loop.
4190     bool CondConstant;
4191     llvm::BasicBlock *ContBlock = nullptr;
4192     OMPLoopScope PreInitScope(CGF, S);
4193     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4194       if (!CondConstant)
4195         return;
4196     } else {
4197       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4198       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4199       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4200                   CGF.getProfileCount(&S));
4201       CGF.EmitBlock(ThenBlock);
4202       CGF.incrementProfileCounter(&S);
4203     }
4204 
4205     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4206       CGF.EmitOMPSimdInit(S);
4207 
4208     OMPPrivateScope LoopScope(CGF);
4209     // Emit helper vars inits.
4210     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4211     auto *I = CS->getCapturedDecl()->param_begin();
4212     auto *LBP = std::next(I, LowerBound);
4213     auto *UBP = std::next(I, UpperBound);
4214     auto *STP = std::next(I, Stride);
4215     auto *LIP = std::next(I, LastIter);
4216     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4217              LoopScope);
4218     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4219              LoopScope);
4220     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4221     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4222              LoopScope);
4223     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4224     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4225     (void)LoopScope.Privatize();
4226     // Emit the loop iteration variable.
4227     const Expr *IVExpr = S.getIterationVariable();
4228     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4229     CGF.EmitVarDecl(*IVDecl);
4230     CGF.EmitIgnoredExpr(S.getInit());
4231 
4232     // Emit the iterations count variable.
4233     // If it is not a variable, Sema decided to calculate iterations count on
4234     // each iteration (e.g., it is foldable into a constant).
4235     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4236       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4237       // Emit calculation of the iterations count.
4238       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4239     }
4240 
4241     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4242                          S.getInc(),
4243                          [&S](CodeGenFunction &CGF) {
4244                            CGF.EmitOMPLoopBody(S, JumpDest());
4245                            CGF.EmitStopPoint(&S);
4246                          },
4247                          [](CodeGenFunction &) {});
4248     // Emit: if (PreCond) - end.
4249     if (ContBlock) {
4250       CGF.EmitBranch(ContBlock);
4251       CGF.EmitBlock(ContBlock, true);
4252     }
4253     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4254     if (HasLastprivateClause) {
4255       CGF.EmitOMPLastprivateClauseFinal(
4256           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4257           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4258               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4259               (*LIP)->getType(), S.getLocStart())));
4260     }
4261   };
4262   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4263                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4264                             const OMPTaskDataTy &Data) {
4265     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4266       OMPLoopScope PreInitScope(CGF, S);
4267       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4268                                                   OutlinedFn, SharedsTy,
4269                                                   CapturedStruct, IfCond, Data);
4270     };
4271     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4272                                                     CodeGen);
4273   };
4274   if (Data.Nogroup)
4275     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4276   else {
4277     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4278         *this,
4279         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4280                                         PrePostActionTy &Action) {
4281           Action.Enter(CGF);
4282           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4283         },
4284         S.getLocStart());
4285   }
4286 }
4287 
4288 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4289   EmitOMPTaskLoopBasedDirective(S);
4290 }
4291 
4292 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4293     const OMPTaskLoopSimdDirective &S) {
4294   EmitOMPTaskLoopBasedDirective(S);
4295 }
4296 
4297 // Generate the instructions for '#pragma omp target update' directive.
4298 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4299     const OMPTargetUpdateDirective &S) {
4300   // If we don't have target devices, don't bother emitting the data mapping
4301   // code.
4302   if (CGM.getLangOpts().OMPTargetTriples.empty())
4303     return;
4304 
4305   // Check if we have any if clause associated with the directive.
4306   const Expr *IfCond = nullptr;
4307   if (auto *C = S.getSingleClause<OMPIfClause>())
4308     IfCond = C->getCondition();
4309 
4310   // Check if we have any device clause associated with the directive.
4311   const Expr *Device = nullptr;
4312   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4313     Device = C->getDevice();
4314 
4315   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4316 }
4317