1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             assert(VD == VD->getCanonicalDecl() &&
69                         "Canonical decl must be captured.");
70             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
71                             isCapturedVar(CGF, VD) ||
72                                 (CGF.CapturedStmtInfo &&
73                                  InlinedShareds.isGlobalVarCaptured(VD)),
74                             VD->getType().getNonReferenceType(), VK_LValue,
75                             SourceLocation());
76             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
77               return CGF.EmitLValue(&DRE).getAddress();
78             });
79           }
80         }
81         (void)InlinedShareds.Privatize();
82       }
83     }
84   }
85 };
86 
87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
88 /// for captured expressions.
89 class OMPParallelScope final : public OMPLexicalScope {
90   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
91     OpenMPDirectiveKind Kind = S.getDirectiveKind();
92     return !(isOpenMPTargetExecutionDirective(Kind) ||
93              isOpenMPLoopBoundSharingDirective(Kind)) &&
94            isOpenMPParallelDirective(Kind);
95   }
96 
97 public:
98   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
99       : OMPLexicalScope(CGF, S,
100                         /*AsInlined=*/false,
101                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S,
116                         /*AsInlined=*/false,
117                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 };
119 
120 /// Private scope for OpenMP loop-based directives, that supports capturing
121 /// of used expression from loop statement.
122 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
123   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
124     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
125       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
126         for (const auto *I : PreInits->decls())
127           CGF.EmitVarDecl(cast<VarDecl>(*I));
128       }
129     }
130   }
131 
132 public:
133   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
134       : CodeGenFunction::RunCleanupsScope(CGF) {
135     emitPreInitStmt(CGF, S);
136   }
137 };
138 
139 } // namespace
140 
141 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
142   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
143     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
144       OrigVD = OrigVD->getCanonicalDecl();
145       bool IsCaptured =
146           LambdaCaptureFields.lookup(OrigVD) ||
147           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
148           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
149       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
150                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
151       return EmitLValue(&DRE);
152     }
153   }
154   return EmitLValue(E);
155 }
156 
157 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
158   auto &C = getContext();
159   llvm::Value *Size = nullptr;
160   auto SizeInChars = C.getTypeSizeInChars(Ty);
161   if (SizeInChars.isZero()) {
162     // getTypeSizeInChars() returns 0 for a VLA.
163     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
164       llvm::Value *ArraySize;
165       std::tie(ArraySize, Ty) = getVLASize(VAT);
166       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
167     }
168     SizeInChars = C.getTypeSizeInChars(Ty);
169     if (SizeInChars.isZero())
170       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
171     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
172   } else
173     Size = CGM.getSize(SizeInChars);
174   return Size;
175 }
176 
177 void CodeGenFunction::GenerateOpenMPCapturedVars(
178     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
179   const RecordDecl *RD = S.getCapturedRecordDecl();
180   auto CurField = RD->field_begin();
181   auto CurCap = S.captures().begin();
182   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
183                                                  E = S.capture_init_end();
184        I != E; ++I, ++CurField, ++CurCap) {
185     if (CurField->hasCapturedVLAType()) {
186       auto VAT = CurField->getCapturedVLAType();
187       auto *Val = VLASizeMap[VAT->getSizeExpr()];
188       CapturedVars.push_back(Val);
189     } else if (CurCap->capturesThis())
190       CapturedVars.push_back(CXXThisValue);
191     else if (CurCap->capturesVariableByCopy()) {
192       llvm::Value *CV =
193           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
194 
195       // If the field is not a pointer, we need to save the actual value
196       // and load it as a void pointer.
197       if (!CurField->getType()->isAnyPointerType()) {
198         auto &Ctx = getContext();
199         auto DstAddr = CreateMemTemp(
200             Ctx.getUIntPtrType(),
201             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
202         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
203 
204         auto *SrcAddrVal = EmitScalarConversion(
205             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
206             Ctx.getPointerType(CurField->getType()), SourceLocation());
207         LValue SrcLV =
208             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
209 
210         // Store the value using the source type pointer.
211         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
212 
213         // Load the value using the destination type pointer.
214         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
215       }
216       CapturedVars.push_back(CV);
217     } else {
218       assert(CurCap->capturesVariable() && "Expected capture by reference.");
219       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
220     }
221   }
222 }
223 
224 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
225                                     StringRef Name, LValue AddrLV,
226                                     bool isReferenceType = false) {
227   ASTContext &Ctx = CGF.getContext();
228 
229   auto *CastedPtr = CGF.EmitScalarConversion(
230       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
231       Ctx.getPointerType(DstType), SourceLocation());
232   auto TmpAddr =
233       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
234           .getAddress();
235 
236   // If we are dealing with references we need to return the address of the
237   // reference instead of the reference of the value.
238   if (isReferenceType) {
239     QualType RefType = Ctx.getLValueReferenceType(DstType);
240     auto *RefVal = TmpAddr.getPointer();
241     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
242     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
243     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
244   }
245 
246   return TmpAddr;
247 }
248 
249 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
250   if (T->isLValueReferenceType()) {
251     return C.getLValueReferenceType(
252         getCanonicalParamType(C, T.getNonReferenceType()),
253         /*SpelledAsLValue=*/false);
254   }
255   if (T->isPointerType())
256     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
257   if (auto *A = T->getAsArrayTypeUnsafe()) {
258     if (auto *VLA = dyn_cast<VariableArrayType>(A))
259       return getCanonicalParamType(C, VLA->getElementType());
260     else if (!A->isVariablyModifiedType())
261       return C.getCanonicalType(T);
262   }
263   return C.getCanonicalParamType(T);
264 }
265 
266 namespace {
267   /// Contains required data for proper outlined function codegen.
268   struct FunctionOptions {
269     /// Captured statement for which the function is generated.
270     const CapturedStmt *S = nullptr;
271     /// true if cast to/from  UIntPtr is required for variables captured by
272     /// value.
273     const bool UIntPtrCastRequired = true;
274     /// true if only casted arguments must be registered as local args or VLA
275     /// sizes.
276     const bool RegisterCastedArgsOnly = false;
277     /// Name of the generated function.
278     const StringRef FunctionName;
279     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
280                              bool RegisterCastedArgsOnly,
281                              StringRef FunctionName)
282         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
283           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
284           FunctionName(FunctionName) {}
285   };
286 }
287 
288 static llvm::Function *emitOutlinedFunctionPrologue(
289     CodeGenFunction &CGF, FunctionArgList &Args,
290     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
291         &LocalAddrs,
292     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
293         &VLASizes,
294     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
295   const CapturedDecl *CD = FO.S->getCapturedDecl();
296   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
297   assert(CD->hasBody() && "missing CapturedDecl body");
298 
299   CXXThisValue = nullptr;
300   // Build the argument list.
301   CodeGenModule &CGM = CGF.CGM;
302   ASTContext &Ctx = CGM.getContext();
303   FunctionArgList TargetArgs;
304   Args.append(CD->param_begin(),
305               std::next(CD->param_begin(), CD->getContextParamPosition()));
306   TargetArgs.append(
307       CD->param_begin(),
308       std::next(CD->param_begin(), CD->getContextParamPosition()));
309   auto I = FO.S->captures().begin();
310   for (auto *FD : RD->fields()) {
311     QualType ArgType = FD->getType();
312     IdentifierInfo *II = nullptr;
313     VarDecl *CapVar = nullptr;
314 
315     // If this is a capture by copy and the type is not a pointer, the outlined
316     // function argument type should be uintptr and the value properly casted to
317     // uintptr. This is necessary given that the runtime library is only able to
318     // deal with pointers. We can pass in the same way the VLA type sizes to the
319     // outlined function.
320     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
321         I->capturesVariableArrayType()) {
322       if (FO.UIntPtrCastRequired)
323         ArgType = Ctx.getUIntPtrType();
324     }
325 
326     if (I->capturesVariable() || I->capturesVariableByCopy()) {
327       CapVar = I->getCapturedVar();
328       II = CapVar->getIdentifier();
329     } else if (I->capturesThis())
330       II = &Ctx.Idents.get("this");
331     else {
332       assert(I->capturesVariableArrayType());
333       II = &Ctx.Idents.get("vla");
334     }
335     if (ArgType->isVariablyModifiedType())
336       ArgType = getCanonicalParamType(Ctx, ArgType);
337     auto *Arg =
338         ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
339                                   ArgType, ImplicitParamDecl::Other);
340     Args.emplace_back(Arg);
341     // Do not cast arguments if we emit function with non-original types.
342     TargetArgs.emplace_back(
343         FO.UIntPtrCastRequired
344             ? Arg
345             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
346     ++I;
347   }
348   Args.append(
349       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
350       CD->param_end());
351   TargetArgs.append(
352       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
353       CD->param_end());
354 
355   // Create the function declaration.
356   FunctionType::ExtInfo ExtInfo;
357   const CGFunctionInfo &FuncInfo =
358       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
359   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
360 
361   llvm::Function *F =
362       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
363                              FO.FunctionName, &CGM.getModule());
364   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
365   if (CD->isNothrow())
366     F->setDoesNotThrow();
367 
368   // Generate the function.
369   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
370                     FO.S->getLocStart(), CD->getBody()->getLocStart());
371   unsigned Cnt = CD->getContextParamPosition();
372   I = FO.S->captures().begin();
373   for (auto *FD : RD->fields()) {
374     // Do not map arguments if we emit function with non-original types.
375     Address LocalAddr(Address::invalid());
376     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
377       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
378                                                              TargetArgs[Cnt]);
379     } else {
380       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
381     }
382     // If we are capturing a pointer by copy we don't need to do anything, just
383     // use the value that we get from the arguments.
384     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
385       const VarDecl *CurVD = I->getCapturedVar();
386       // If the variable is a reference we need to materialize it here.
387       if (CurVD->getType()->isReferenceType()) {
388         Address RefAddr = CGF.CreateMemTemp(
389             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
390         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
391                               /*Volatile=*/false, CurVD->getType());
392         LocalAddr = RefAddr;
393       }
394       if (!FO.RegisterCastedArgsOnly)
395         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
396       ++Cnt;
397       ++I;
398       continue;
399     }
400 
401     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
402                                         AlignmentSource::Decl);
403     if (FD->hasCapturedVLAType()) {
404       if (FO.UIntPtrCastRequired) {
405         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
406                                                           Args[Cnt]->getName(),
407                                                           ArgLVal),
408                                      FD->getType(), AlignmentSource::Decl);
409       }
410       auto *ExprArg =
411           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
412       auto VAT = FD->getCapturedVLAType();
413       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
414     } else if (I->capturesVariable()) {
415       auto *Var = I->getCapturedVar();
416       QualType VarTy = Var->getType();
417       Address ArgAddr = ArgLVal.getAddress();
418       if (!VarTy->isReferenceType()) {
419         if (ArgLVal.getType()->isLValueReferenceType()) {
420           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
421         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
422           assert(ArgLVal.getType()->isPointerType());
423           ArgAddr = CGF.EmitLoadOfPointer(
424               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
425         }
426       }
427       if (!FO.RegisterCastedArgsOnly) {
428         LocalAddrs.insert(
429             {Args[Cnt],
430              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
431       }
432     } else if (I->capturesVariableByCopy()) {
433       assert(!FD->getType()->isAnyPointerType() &&
434              "Not expecting a captured pointer.");
435       auto *Var = I->getCapturedVar();
436       QualType VarTy = Var->getType();
437       LocalAddrs.insert(
438           {Args[Cnt],
439            {Var,
440             FO.UIntPtrCastRequired
441                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
442                                        ArgLVal, VarTy->isReferenceType())
443                 : ArgLVal.getAddress()}});
444     } else {
445       // If 'this' is captured, load it into CXXThisValue.
446       assert(I->capturesThis());
447       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
448                          .getScalarVal();
449       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
450     }
451     ++Cnt;
452     ++I;
453   }
454 
455   return F;
456 }
457 
458 llvm::Function *
459 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
460   assert(
461       CapturedStmtInfo &&
462       "CapturedStmtInfo should be set when generating the captured function");
463   const CapturedDecl *CD = S.getCapturedDecl();
464   // Build the argument list.
465   bool NeedWrapperFunction =
466       getDebugInfo() &&
467       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
468   FunctionArgList Args;
469   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
470   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
471   SmallString<256> Buffer;
472   llvm::raw_svector_ostream Out(Buffer);
473   Out << CapturedStmtInfo->getHelperName();
474   if (NeedWrapperFunction)
475     Out << "_debug__";
476   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
477                      Out.str());
478   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
479                                                    VLASizes, CXXThisValue, FO);
480   for (const auto &LocalAddrPair : LocalAddrs) {
481     if (LocalAddrPair.second.first) {
482       setAddrOfLocalVar(LocalAddrPair.second.first,
483                         LocalAddrPair.second.second);
484     }
485   }
486   for (const auto &VLASizePair : VLASizes)
487     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
488   PGO.assignRegionCounters(GlobalDecl(CD), F);
489   CapturedStmtInfo->EmitBody(*this, CD->getBody());
490   FinishFunction(CD->getBodyRBrace());
491   if (!NeedWrapperFunction)
492     return F;
493 
494   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
495                             /*RegisterCastedArgsOnly=*/true,
496                             CapturedStmtInfo->getHelperName());
497   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
498   Args.clear();
499   LocalAddrs.clear();
500   VLASizes.clear();
501   llvm::Function *WrapperF =
502       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
503                                    WrapperCGF.CXXThisValue, WrapperFO);
504   llvm::SmallVector<llvm::Value *, 4> CallArgs;
505   for (const auto *Arg : Args) {
506     llvm::Value *CallArg;
507     auto I = LocalAddrs.find(Arg);
508     if (I != LocalAddrs.end()) {
509       LValue LV = WrapperCGF.MakeAddrLValue(
510           I->second.second,
511           I->second.first ? I->second.first->getType() : Arg->getType(),
512           AlignmentSource::Decl);
513       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
514     } else {
515       auto EI = VLASizes.find(Arg);
516       if (EI != VLASizes.end())
517         CallArg = EI->second.second;
518       else {
519         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
520                                               Arg->getType(),
521                                               AlignmentSource::Decl);
522         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
523       }
524     }
525     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
526   }
527   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
528                                                   F, CallArgs);
529   WrapperCGF.FinishFunction();
530   return WrapperF;
531 }
532 
533 //===----------------------------------------------------------------------===//
534 //                              OpenMP Directive Emission
535 //===----------------------------------------------------------------------===//
536 void CodeGenFunction::EmitOMPAggregateAssign(
537     Address DestAddr, Address SrcAddr, QualType OriginalType,
538     const llvm::function_ref<void(Address, Address)> &CopyGen) {
539   // Perform element-by-element initialization.
540   QualType ElementTy;
541 
542   // Drill down to the base element type on both arrays.
543   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
544   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
545   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
546 
547   auto SrcBegin = SrcAddr.getPointer();
548   auto DestBegin = DestAddr.getPointer();
549   // Cast from pointer to array type to pointer to single element.
550   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
551   // The basic structure here is a while-do loop.
552   auto BodyBB = createBasicBlock("omp.arraycpy.body");
553   auto DoneBB = createBasicBlock("omp.arraycpy.done");
554   auto IsEmpty =
555       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
556   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
557 
558   // Enter the loop body, making that address the current address.
559   auto EntryBB = Builder.GetInsertBlock();
560   EmitBlock(BodyBB);
561 
562   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
563 
564   llvm::PHINode *SrcElementPHI =
565     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
566   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
567   Address SrcElementCurrent =
568       Address(SrcElementPHI,
569               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
570 
571   llvm::PHINode *DestElementPHI =
572     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
573   DestElementPHI->addIncoming(DestBegin, EntryBB);
574   Address DestElementCurrent =
575     Address(DestElementPHI,
576             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
577 
578   // Emit copy.
579   CopyGen(DestElementCurrent, SrcElementCurrent);
580 
581   // Shift the address forward by one element.
582   auto DestElementNext = Builder.CreateConstGEP1_32(
583       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
584   auto SrcElementNext = Builder.CreateConstGEP1_32(
585       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
586   // Check whether we've reached the end.
587   auto Done =
588       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
589   Builder.CreateCondBr(Done, DoneBB, BodyBB);
590   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
591   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
592 
593   // Done.
594   EmitBlock(DoneBB, /*IsFinished=*/true);
595 }
596 
597 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
598                                   Address SrcAddr, const VarDecl *DestVD,
599                                   const VarDecl *SrcVD, const Expr *Copy) {
600   if (OriginalType->isArrayType()) {
601     auto *BO = dyn_cast<BinaryOperator>(Copy);
602     if (BO && BO->getOpcode() == BO_Assign) {
603       // Perform simple memcpy for simple copying.
604       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
605     } else {
606       // For arrays with complex element types perform element by element
607       // copying.
608       EmitOMPAggregateAssign(
609           DestAddr, SrcAddr, OriginalType,
610           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
611             // Working with the single array element, so have to remap
612             // destination and source variables to corresponding array
613             // elements.
614             CodeGenFunction::OMPPrivateScope Remap(*this);
615             Remap.addPrivate(DestVD, [DestElement]() -> Address {
616               return DestElement;
617             });
618             Remap.addPrivate(
619                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
620             (void)Remap.Privatize();
621             EmitIgnoredExpr(Copy);
622           });
623     }
624   } else {
625     // Remap pseudo source variable to private copy.
626     CodeGenFunction::OMPPrivateScope Remap(*this);
627     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
628     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
629     (void)Remap.Privatize();
630     // Emit copying of the whole variable.
631     EmitIgnoredExpr(Copy);
632   }
633 }
634 
635 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
636                                                 OMPPrivateScope &PrivateScope) {
637   if (!HaveInsertPoint())
638     return false;
639   bool FirstprivateIsLastprivate = false;
640   llvm::DenseSet<const VarDecl *> Lastprivates;
641   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
642     for (const auto *D : C->varlists())
643       Lastprivates.insert(
644           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
645   }
646   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
647   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
648   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
649     auto IRef = C->varlist_begin();
650     auto InitsRef = C->inits().begin();
651     for (auto IInit : C->private_copies()) {
652       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
653       bool ThisFirstprivateIsLastprivate =
654           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
655       auto *CapFD = CapturesInfo.lookup(OrigVD);
656       auto *FD = CapturedStmtInfo->lookup(OrigVD);
657       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
658           !FD->getType()->isReferenceType()) {
659         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
660         ++IRef;
661         ++InitsRef;
662         continue;
663       }
664       FirstprivateIsLastprivate =
665           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
666       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
667         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
668         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
669         bool IsRegistered;
670         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
671                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
672                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
673         Address OriginalAddr = EmitLValue(&DRE).getAddress();
674         QualType Type = VD->getType();
675         if (Type->isArrayType()) {
676           // Emit VarDecl with copy init for arrays.
677           // Get the address of the original variable captured in current
678           // captured region.
679           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
680             auto Emission = EmitAutoVarAlloca(*VD);
681             auto *Init = VD->getInit();
682             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
683               // Perform simple memcpy.
684               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
685                                   Type);
686             } else {
687               EmitOMPAggregateAssign(
688                   Emission.getAllocatedAddress(), OriginalAddr, Type,
689                   [this, VDInit, Init](Address DestElement,
690                                        Address SrcElement) {
691                     // Clean up any temporaries needed by the initialization.
692                     RunCleanupsScope InitScope(*this);
693                     // Emit initialization for single element.
694                     setAddrOfLocalVar(VDInit, SrcElement);
695                     EmitAnyExprToMem(Init, DestElement,
696                                      Init->getType().getQualifiers(),
697                                      /*IsInitializer*/ false);
698                     LocalDeclMap.erase(VDInit);
699                   });
700             }
701             EmitAutoVarCleanups(Emission);
702             return Emission.getAllocatedAddress();
703           });
704         } else {
705           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
706             // Emit private VarDecl with copy init.
707             // Remap temp VDInit variable to the address of the original
708             // variable
709             // (for proper handling of captured global variables).
710             setAddrOfLocalVar(VDInit, OriginalAddr);
711             EmitDecl(*VD);
712             LocalDeclMap.erase(VDInit);
713             return GetAddrOfLocalVar(VD);
714           });
715         }
716         assert(IsRegistered &&
717                "firstprivate var already registered as private");
718         // Silence the warning about unused variable.
719         (void)IsRegistered;
720       }
721       ++IRef;
722       ++InitsRef;
723     }
724   }
725   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
726 }
727 
728 void CodeGenFunction::EmitOMPPrivateClause(
729     const OMPExecutableDirective &D,
730     CodeGenFunction::OMPPrivateScope &PrivateScope) {
731   if (!HaveInsertPoint())
732     return;
733   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
734   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
735     auto IRef = C->varlist_begin();
736     for (auto IInit : C->private_copies()) {
737       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
738       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
739         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
740         bool IsRegistered =
741             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
742               // Emit private VarDecl with copy init.
743               EmitDecl(*VD);
744               return GetAddrOfLocalVar(VD);
745             });
746         assert(IsRegistered && "private var already registered as private");
747         // Silence the warning about unused variable.
748         (void)IsRegistered;
749       }
750       ++IRef;
751     }
752   }
753 }
754 
755 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
756   if (!HaveInsertPoint())
757     return false;
758   // threadprivate_var1 = master_threadprivate_var1;
759   // operator=(threadprivate_var2, master_threadprivate_var2);
760   // ...
761   // __kmpc_barrier(&loc, global_tid);
762   llvm::DenseSet<const VarDecl *> CopiedVars;
763   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
764   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
765     auto IRef = C->varlist_begin();
766     auto ISrcRef = C->source_exprs().begin();
767     auto IDestRef = C->destination_exprs().begin();
768     for (auto *AssignOp : C->assignment_ops()) {
769       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
770       QualType Type = VD->getType();
771       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
772         // Get the address of the master variable. If we are emitting code with
773         // TLS support, the address is passed from the master as field in the
774         // captured declaration.
775         Address MasterAddr = Address::invalid();
776         if (getLangOpts().OpenMPUseTLS &&
777             getContext().getTargetInfo().isTLSSupported()) {
778           assert(CapturedStmtInfo->lookup(VD) &&
779                  "Copyin threadprivates should have been captured!");
780           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
781                           VK_LValue, (*IRef)->getExprLoc());
782           MasterAddr = EmitLValue(&DRE).getAddress();
783           LocalDeclMap.erase(VD);
784         } else {
785           MasterAddr =
786             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
787                                         : CGM.GetAddrOfGlobal(VD),
788                     getContext().getDeclAlign(VD));
789         }
790         // Get the address of the threadprivate variable.
791         Address PrivateAddr = EmitLValue(*IRef).getAddress();
792         if (CopiedVars.size() == 1) {
793           // At first check if current thread is a master thread. If it is, no
794           // need to copy data.
795           CopyBegin = createBasicBlock("copyin.not.master");
796           CopyEnd = createBasicBlock("copyin.not.master.end");
797           Builder.CreateCondBr(
798               Builder.CreateICmpNE(
799                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
800                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
801               CopyBegin, CopyEnd);
802           EmitBlock(CopyBegin);
803         }
804         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
805         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
806         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
807       }
808       ++IRef;
809       ++ISrcRef;
810       ++IDestRef;
811     }
812   }
813   if (CopyEnd) {
814     // Exit out of copying procedure for non-master thread.
815     EmitBlock(CopyEnd, /*IsFinished=*/true);
816     return true;
817   }
818   return false;
819 }
820 
821 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
822     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
823   if (!HaveInsertPoint())
824     return false;
825   bool HasAtLeastOneLastprivate = false;
826   llvm::DenseSet<const VarDecl *> SIMDLCVs;
827   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
828     auto *LoopDirective = cast<OMPLoopDirective>(&D);
829     for (auto *C : LoopDirective->counters()) {
830       SIMDLCVs.insert(
831           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
832     }
833   }
834   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
835   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
836     HasAtLeastOneLastprivate = true;
837     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
838       break;
839     auto IRef = C->varlist_begin();
840     auto IDestRef = C->destination_exprs().begin();
841     for (auto *IInit : C->private_copies()) {
842       // Keep the address of the original variable for future update at the end
843       // of the loop.
844       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
845       // Taskloops do not require additional initialization, it is done in
846       // runtime support library.
847       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
848         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
849         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
850           DeclRefExpr DRE(
851               const_cast<VarDecl *>(OrigVD),
852               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
853                   OrigVD) != nullptr,
854               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
855           return EmitLValue(&DRE).getAddress();
856         });
857         // Check if the variable is also a firstprivate: in this case IInit is
858         // not generated. Initialization of this variable will happen in codegen
859         // for 'firstprivate' clause.
860         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
861           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
862           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
863             // Emit private VarDecl with copy init.
864             EmitDecl(*VD);
865             return GetAddrOfLocalVar(VD);
866           });
867           assert(IsRegistered &&
868                  "lastprivate var already registered as private");
869           (void)IsRegistered;
870         }
871       }
872       ++IRef;
873       ++IDestRef;
874     }
875   }
876   return HasAtLeastOneLastprivate;
877 }
878 
879 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
880     const OMPExecutableDirective &D, bool NoFinals,
881     llvm::Value *IsLastIterCond) {
882   if (!HaveInsertPoint())
883     return;
884   // Emit following code:
885   // if (<IsLastIterCond>) {
886   //   orig_var1 = private_orig_var1;
887   //   ...
888   //   orig_varn = private_orig_varn;
889   // }
890   llvm::BasicBlock *ThenBB = nullptr;
891   llvm::BasicBlock *DoneBB = nullptr;
892   if (IsLastIterCond) {
893     ThenBB = createBasicBlock(".omp.lastprivate.then");
894     DoneBB = createBasicBlock(".omp.lastprivate.done");
895     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
896     EmitBlock(ThenBB);
897   }
898   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
899   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
900   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
901     auto IC = LoopDirective->counters().begin();
902     for (auto F : LoopDirective->finals()) {
903       auto *D =
904           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
905       if (NoFinals)
906         AlreadyEmittedVars.insert(D);
907       else
908         LoopCountersAndUpdates[D] = F;
909       ++IC;
910     }
911   }
912   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
913     auto IRef = C->varlist_begin();
914     auto ISrcRef = C->source_exprs().begin();
915     auto IDestRef = C->destination_exprs().begin();
916     for (auto *AssignOp : C->assignment_ops()) {
917       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
918       QualType Type = PrivateVD->getType();
919       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
920       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
921         // If lastprivate variable is a loop control variable for loop-based
922         // directive, update its value before copyin back to original
923         // variable.
924         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
925           EmitIgnoredExpr(FinalExpr);
926         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
927         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
928         // Get the address of the original variable.
929         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
930         // Get the address of the private variable.
931         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
932         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
933           PrivateAddr =
934               Address(Builder.CreateLoad(PrivateAddr),
935                       getNaturalTypeAlignment(RefTy->getPointeeType()));
936         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
937       }
938       ++IRef;
939       ++ISrcRef;
940       ++IDestRef;
941     }
942     if (auto *PostUpdate = C->getPostUpdateExpr())
943       EmitIgnoredExpr(PostUpdate);
944   }
945   if (IsLastIterCond)
946     EmitBlock(DoneBB, /*IsFinished=*/true);
947 }
948 
949 void CodeGenFunction::EmitOMPReductionClauseInit(
950     const OMPExecutableDirective &D,
951     CodeGenFunction::OMPPrivateScope &PrivateScope) {
952   if (!HaveInsertPoint())
953     return;
954   SmallVector<const Expr *, 4> Shareds;
955   SmallVector<const Expr *, 4> Privates;
956   SmallVector<const Expr *, 4> ReductionOps;
957   SmallVector<const Expr *, 4> LHSs;
958   SmallVector<const Expr *, 4> RHSs;
959   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
960     auto IPriv = C->privates().begin();
961     auto IRed = C->reduction_ops().begin();
962     auto ILHS = C->lhs_exprs().begin();
963     auto IRHS = C->rhs_exprs().begin();
964     for (const auto *Ref : C->varlists()) {
965       Shareds.emplace_back(Ref);
966       Privates.emplace_back(*IPriv);
967       ReductionOps.emplace_back(*IRed);
968       LHSs.emplace_back(*ILHS);
969       RHSs.emplace_back(*IRHS);
970       std::advance(IPriv, 1);
971       std::advance(IRed, 1);
972       std::advance(ILHS, 1);
973       std::advance(IRHS, 1);
974     }
975   }
976   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
977   unsigned Count = 0;
978   auto ILHS = LHSs.begin();
979   auto IRHS = RHSs.begin();
980   auto IPriv = Privates.begin();
981   for (const auto *IRef : Shareds) {
982     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
983     // Emit private VarDecl with reduction init.
984     RedCG.emitSharedLValue(*this, Count);
985     RedCG.emitAggregateType(*this, Count);
986     auto Emission = EmitAutoVarAlloca(*PrivateVD);
987     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
988                              RedCG.getSharedLValue(Count),
989                              [&Emission](CodeGenFunction &CGF) {
990                                CGF.EmitAutoVarInit(Emission);
991                                return true;
992                              });
993     EmitAutoVarCleanups(Emission);
994     Address BaseAddr = RedCG.adjustPrivateAddress(
995         *this, Count, Emission.getAllocatedAddress());
996     bool IsRegistered = PrivateScope.addPrivate(
997         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
998     assert(IsRegistered && "private var already registered as private");
999     // Silence the warning about unused variable.
1000     (void)IsRegistered;
1001 
1002     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1003     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1004     QualType Type = PrivateVD->getType();
1005     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1006     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1007       // Store the address of the original variable associated with the LHS
1008       // implicit variable.
1009       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1010         return RedCG.getSharedLValue(Count).getAddress();
1011       });
1012       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1013         return GetAddrOfLocalVar(PrivateVD);
1014       });
1015     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1016                isa<ArraySubscriptExpr>(IRef)) {
1017       // Store the address of the original variable associated with the LHS
1018       // implicit variable.
1019       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1020         return RedCG.getSharedLValue(Count).getAddress();
1021       });
1022       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1023         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1024                                             ConvertTypeForMem(RHSVD->getType()),
1025                                             "rhs.begin");
1026       });
1027     } else {
1028       QualType Type = PrivateVD->getType();
1029       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1030       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1031       // Store the address of the original variable associated with the LHS
1032       // implicit variable.
1033       if (IsArray) {
1034         OriginalAddr = Builder.CreateElementBitCast(
1035             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1036       }
1037       PrivateScope.addPrivate(
1038           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1039       PrivateScope.addPrivate(
1040           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1041             return IsArray
1042                        ? Builder.CreateElementBitCast(
1043                              GetAddrOfLocalVar(PrivateVD),
1044                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1045                        : GetAddrOfLocalVar(PrivateVD);
1046           });
1047     }
1048     ++ILHS;
1049     ++IRHS;
1050     ++IPriv;
1051     ++Count;
1052   }
1053 }
1054 
1055 void CodeGenFunction::EmitOMPReductionClauseFinal(
1056     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1057   if (!HaveInsertPoint())
1058     return;
1059   llvm::SmallVector<const Expr *, 8> Privates;
1060   llvm::SmallVector<const Expr *, 8> LHSExprs;
1061   llvm::SmallVector<const Expr *, 8> RHSExprs;
1062   llvm::SmallVector<const Expr *, 8> ReductionOps;
1063   bool HasAtLeastOneReduction = false;
1064   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1065     HasAtLeastOneReduction = true;
1066     Privates.append(C->privates().begin(), C->privates().end());
1067     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1068     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1069     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1070   }
1071   if (HasAtLeastOneReduction) {
1072     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1073                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1074                       D.getDirectiveKind() == OMPD_simd;
1075     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1076     // Emit nowait reduction if nowait clause is present or directive is a
1077     // parallel directive (it always has implicit barrier).
1078     CGM.getOpenMPRuntime().emitReduction(
1079         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1080         {WithNowait, SimpleReduction, ReductionKind});
1081   }
1082 }
1083 
1084 static void emitPostUpdateForReductionClause(
1085     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1086     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1087   if (!CGF.HaveInsertPoint())
1088     return;
1089   llvm::BasicBlock *DoneBB = nullptr;
1090   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1091     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1092       if (!DoneBB) {
1093         if (auto *Cond = CondGen(CGF)) {
1094           // If the first post-update expression is found, emit conditional
1095           // block if it was requested.
1096           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1097           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1098           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1099           CGF.EmitBlock(ThenBB);
1100         }
1101       }
1102       CGF.EmitIgnoredExpr(PostUpdate);
1103     }
1104   }
1105   if (DoneBB)
1106     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1107 }
1108 
1109 namespace {
1110 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1111 /// parallel function. This is necessary for combined constructs such as
1112 /// 'distribute parallel for'
1113 typedef llvm::function_ref<void(CodeGenFunction &,
1114                                 const OMPExecutableDirective &,
1115                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1116     CodeGenBoundParametersTy;
1117 } // anonymous namespace
1118 
1119 static void emitCommonOMPParallelDirective(
1120     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1121     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1122     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1123   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1124   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1125       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1126   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1127     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1128     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1129                                          /*IgnoreResultAssign*/ true);
1130     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1131         CGF, NumThreads, NumThreadsClause->getLocStart());
1132   }
1133   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1134     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1135     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1136         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1137   }
1138   const Expr *IfCond = nullptr;
1139   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1140     if (C->getNameModifier() == OMPD_unknown ||
1141         C->getNameModifier() == OMPD_parallel) {
1142       IfCond = C->getCondition();
1143       break;
1144     }
1145   }
1146 
1147   OMPParallelScope Scope(CGF, S);
1148   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1149   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1150   // lower and upper bounds with the pragma 'for' chunking mechanism.
1151   // The following lambda takes care of appending the lower and upper bound
1152   // parameters when necessary
1153   CodeGenBoundParameters(CGF, S, CapturedVars);
1154   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1155   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1156                                               CapturedVars, IfCond);
1157 }
1158 
1159 static void emitEmptyBoundParameters(CodeGenFunction &,
1160                                      const OMPExecutableDirective &,
1161                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1162 
1163 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1164   // Emit parallel region as a standalone region.
1165   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1166     OMPPrivateScope PrivateScope(CGF);
1167     bool Copyins = CGF.EmitOMPCopyinClause(S);
1168     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1169     if (Copyins) {
1170       // Emit implicit barrier to synchronize threads and avoid data races on
1171       // propagation master's thread values of threadprivate variables to local
1172       // instances of that variables of all other implicit threads.
1173       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1174           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1175           /*ForceSimpleCall=*/true);
1176     }
1177     CGF.EmitOMPPrivateClause(S, PrivateScope);
1178     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1179     (void)PrivateScope.Privatize();
1180     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1181     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1182   };
1183   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1184                                  emitEmptyBoundParameters);
1185   emitPostUpdateForReductionClause(
1186       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1187 }
1188 
1189 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1190                                       JumpDest LoopExit) {
1191   RunCleanupsScope BodyScope(*this);
1192   // Update counters values on current iteration.
1193   for (auto I : D.updates()) {
1194     EmitIgnoredExpr(I);
1195   }
1196   // Update the linear variables.
1197   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1198     for (auto *U : C->updates())
1199       EmitIgnoredExpr(U);
1200   }
1201 
1202   // On a continue in the body, jump to the end.
1203   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1204   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1205   // Emit loop body.
1206   EmitStmt(D.getBody());
1207   // The end (updates/cleanups).
1208   EmitBlock(Continue.getBlock());
1209   BreakContinueStack.pop_back();
1210 }
1211 
1212 void CodeGenFunction::EmitOMPInnerLoop(
1213     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1214     const Expr *IncExpr,
1215     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1216     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1217   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1218 
1219   // Start the loop with a block that tests the condition.
1220   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1221   EmitBlock(CondBlock);
1222   const SourceRange &R = S.getSourceRange();
1223   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1224                  SourceLocToDebugLoc(R.getEnd()));
1225 
1226   // If there are any cleanups between here and the loop-exit scope,
1227   // create a block to stage a loop exit along.
1228   auto ExitBlock = LoopExit.getBlock();
1229   if (RequiresCleanup)
1230     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1231 
1232   auto LoopBody = createBasicBlock("omp.inner.for.body");
1233 
1234   // Emit condition.
1235   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1236   if (ExitBlock != LoopExit.getBlock()) {
1237     EmitBlock(ExitBlock);
1238     EmitBranchThroughCleanup(LoopExit);
1239   }
1240 
1241   EmitBlock(LoopBody);
1242   incrementProfileCounter(&S);
1243 
1244   // Create a block for the increment.
1245   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1246   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1247 
1248   BodyGen(*this);
1249 
1250   // Emit "IV = IV + 1" and a back-edge to the condition block.
1251   EmitBlock(Continue.getBlock());
1252   EmitIgnoredExpr(IncExpr);
1253   PostIncGen(*this);
1254   BreakContinueStack.pop_back();
1255   EmitBranch(CondBlock);
1256   LoopStack.pop();
1257   // Emit the fall-through block.
1258   EmitBlock(LoopExit.getBlock());
1259 }
1260 
1261 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1262   if (!HaveInsertPoint())
1263     return false;
1264   // Emit inits for the linear variables.
1265   bool HasLinears = false;
1266   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1267     for (auto *Init : C->inits()) {
1268       HasLinears = true;
1269       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1270       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1271         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1272         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1273         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1274                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1275                         VD->getInit()->getType(), VK_LValue,
1276                         VD->getInit()->getExprLoc());
1277         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1278                                                 VD->getType()),
1279                        /*capturedByInit=*/false);
1280         EmitAutoVarCleanups(Emission);
1281       } else
1282         EmitVarDecl(*VD);
1283     }
1284     // Emit the linear steps for the linear clauses.
1285     // If a step is not constant, it is pre-calculated before the loop.
1286     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1287       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1288         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1289         // Emit calculation of the linear step.
1290         EmitIgnoredExpr(CS);
1291       }
1292   }
1293   return HasLinears;
1294 }
1295 
1296 void CodeGenFunction::EmitOMPLinearClauseFinal(
1297     const OMPLoopDirective &D,
1298     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1299   if (!HaveInsertPoint())
1300     return;
1301   llvm::BasicBlock *DoneBB = nullptr;
1302   // Emit the final values of the linear variables.
1303   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1304     auto IC = C->varlist_begin();
1305     for (auto *F : C->finals()) {
1306       if (!DoneBB) {
1307         if (auto *Cond = CondGen(*this)) {
1308           // If the first post-update expression is found, emit conditional
1309           // block if it was requested.
1310           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1311           DoneBB = createBasicBlock(".omp.linear.pu.done");
1312           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1313           EmitBlock(ThenBB);
1314         }
1315       }
1316       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1317       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1318                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1319                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1320       Address OrigAddr = EmitLValue(&DRE).getAddress();
1321       CodeGenFunction::OMPPrivateScope VarScope(*this);
1322       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1323       (void)VarScope.Privatize();
1324       EmitIgnoredExpr(F);
1325       ++IC;
1326     }
1327     if (auto *PostUpdate = C->getPostUpdateExpr())
1328       EmitIgnoredExpr(PostUpdate);
1329   }
1330   if (DoneBB)
1331     EmitBlock(DoneBB, /*IsFinished=*/true);
1332 }
1333 
1334 static void emitAlignedClause(CodeGenFunction &CGF,
1335                               const OMPExecutableDirective &D) {
1336   if (!CGF.HaveInsertPoint())
1337     return;
1338   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1339     unsigned ClauseAlignment = 0;
1340     if (auto AlignmentExpr = Clause->getAlignment()) {
1341       auto AlignmentCI =
1342           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1343       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1344     }
1345     for (auto E : Clause->varlists()) {
1346       unsigned Alignment = ClauseAlignment;
1347       if (Alignment == 0) {
1348         // OpenMP [2.8.1, Description]
1349         // If no optional parameter is specified, implementation-defined default
1350         // alignments for SIMD instructions on the target platforms are assumed.
1351         Alignment =
1352             CGF.getContext()
1353                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1354                     E->getType()->getPointeeType()))
1355                 .getQuantity();
1356       }
1357       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1358              "alignment is not power of 2");
1359       if (Alignment != 0) {
1360         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1361         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1362       }
1363     }
1364   }
1365 }
1366 
1367 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1368     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1369   if (!HaveInsertPoint())
1370     return;
1371   auto I = S.private_counters().begin();
1372   for (auto *E : S.counters()) {
1373     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1374     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1375     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1376       // Emit var without initialization.
1377       if (!LocalDeclMap.count(PrivateVD)) {
1378         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1379         EmitAutoVarCleanups(VarEmission);
1380       }
1381       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1382                       /*RefersToEnclosingVariableOrCapture=*/false,
1383                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1384       return EmitLValue(&DRE).getAddress();
1385     });
1386     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1387         VD->hasGlobalStorage()) {
1388       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1389         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1390                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1391                         E->getType(), VK_LValue, E->getExprLoc());
1392         return EmitLValue(&DRE).getAddress();
1393       });
1394     }
1395     ++I;
1396   }
1397 }
1398 
1399 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1400                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1401                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1402   if (!CGF.HaveInsertPoint())
1403     return;
1404   {
1405     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1406     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1407     (void)PreCondScope.Privatize();
1408     // Get initial values of real counters.
1409     for (auto I : S.inits()) {
1410       CGF.EmitIgnoredExpr(I);
1411     }
1412   }
1413   // Check that loop is executed at least one time.
1414   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1415 }
1416 
1417 void CodeGenFunction::EmitOMPLinearClause(
1418     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1419   if (!HaveInsertPoint())
1420     return;
1421   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1422   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1423     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1424     for (auto *C : LoopDirective->counters()) {
1425       SIMDLCVs.insert(
1426           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1427     }
1428   }
1429   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1430     auto CurPrivate = C->privates().begin();
1431     for (auto *E : C->varlists()) {
1432       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1433       auto *PrivateVD =
1434           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1435       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1436         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1437           // Emit private VarDecl with copy init.
1438           EmitVarDecl(*PrivateVD);
1439           return GetAddrOfLocalVar(PrivateVD);
1440         });
1441         assert(IsRegistered && "linear var already registered as private");
1442         // Silence the warning about unused variable.
1443         (void)IsRegistered;
1444       } else
1445         EmitVarDecl(*PrivateVD);
1446       ++CurPrivate;
1447     }
1448   }
1449 }
1450 
1451 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1452                                      const OMPExecutableDirective &D,
1453                                      bool IsMonotonic) {
1454   if (!CGF.HaveInsertPoint())
1455     return;
1456   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1457     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1458                                  /*ignoreResult=*/true);
1459     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1460     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1461     // In presence of finite 'safelen', it may be unsafe to mark all
1462     // the memory instructions parallel, because loop-carried
1463     // dependences of 'safelen' iterations are possible.
1464     if (!IsMonotonic)
1465       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1466   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1467     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1468                                  /*ignoreResult=*/true);
1469     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1470     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1471     // In presence of finite 'safelen', it may be unsafe to mark all
1472     // the memory instructions parallel, because loop-carried
1473     // dependences of 'safelen' iterations are possible.
1474     CGF.LoopStack.setParallel(false);
1475   }
1476 }
1477 
1478 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1479                                       bool IsMonotonic) {
1480   // Walk clauses and process safelen/lastprivate.
1481   LoopStack.setParallel(!IsMonotonic);
1482   LoopStack.setVectorizeEnable(true);
1483   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1484 }
1485 
1486 void CodeGenFunction::EmitOMPSimdFinal(
1487     const OMPLoopDirective &D,
1488     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1489   if (!HaveInsertPoint())
1490     return;
1491   llvm::BasicBlock *DoneBB = nullptr;
1492   auto IC = D.counters().begin();
1493   auto IPC = D.private_counters().begin();
1494   for (auto F : D.finals()) {
1495     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1496     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1497     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1498     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1499         OrigVD->hasGlobalStorage() || CED) {
1500       if (!DoneBB) {
1501         if (auto *Cond = CondGen(*this)) {
1502           // If the first post-update expression is found, emit conditional
1503           // block if it was requested.
1504           auto *ThenBB = createBasicBlock(".omp.final.then");
1505           DoneBB = createBasicBlock(".omp.final.done");
1506           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1507           EmitBlock(ThenBB);
1508         }
1509       }
1510       Address OrigAddr = Address::invalid();
1511       if (CED)
1512         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1513       else {
1514         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1515                         /*RefersToEnclosingVariableOrCapture=*/false,
1516                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1517         OrigAddr = EmitLValue(&DRE).getAddress();
1518       }
1519       OMPPrivateScope VarScope(*this);
1520       VarScope.addPrivate(OrigVD,
1521                           [OrigAddr]() -> Address { return OrigAddr; });
1522       (void)VarScope.Privatize();
1523       EmitIgnoredExpr(F);
1524     }
1525     ++IC;
1526     ++IPC;
1527   }
1528   if (DoneBB)
1529     EmitBlock(DoneBB, /*IsFinished=*/true);
1530 }
1531 
1532 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1533                                          const OMPLoopDirective &S,
1534                                          CodeGenFunction::JumpDest LoopExit) {
1535   CGF.EmitOMPLoopBody(S, LoopExit);
1536   CGF.EmitStopPoint(&S);
1537 }
1538 
1539 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1540   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1541     OMPLoopScope PreInitScope(CGF, S);
1542     // if (PreCond) {
1543     //   for (IV in 0..LastIteration) BODY;
1544     //   <Final counter/linear vars updates>;
1545     // }
1546     //
1547 
1548     // Emit: if (PreCond) - begin.
1549     // If the condition constant folds and can be elided, avoid emitting the
1550     // whole loop.
1551     bool CondConstant;
1552     llvm::BasicBlock *ContBlock = nullptr;
1553     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1554       if (!CondConstant)
1555         return;
1556     } else {
1557       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1558       ContBlock = CGF.createBasicBlock("simd.if.end");
1559       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1560                   CGF.getProfileCount(&S));
1561       CGF.EmitBlock(ThenBlock);
1562       CGF.incrementProfileCounter(&S);
1563     }
1564 
1565     // Emit the loop iteration variable.
1566     const Expr *IVExpr = S.getIterationVariable();
1567     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1568     CGF.EmitVarDecl(*IVDecl);
1569     CGF.EmitIgnoredExpr(S.getInit());
1570 
1571     // Emit the iterations count variable.
1572     // If it is not a variable, Sema decided to calculate iterations count on
1573     // each iteration (e.g., it is foldable into a constant).
1574     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1575       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1576       // Emit calculation of the iterations count.
1577       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1578     }
1579 
1580     CGF.EmitOMPSimdInit(S);
1581 
1582     emitAlignedClause(CGF, S);
1583     (void)CGF.EmitOMPLinearClauseInit(S);
1584     {
1585       OMPPrivateScope LoopScope(CGF);
1586       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1587       CGF.EmitOMPLinearClause(S, LoopScope);
1588       CGF.EmitOMPPrivateClause(S, LoopScope);
1589       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1590       bool HasLastprivateClause =
1591           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1592       (void)LoopScope.Privatize();
1593       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1594                            S.getInc(),
1595                            [&S](CodeGenFunction &CGF) {
1596                              CGF.EmitOMPLoopBody(S, JumpDest());
1597                              CGF.EmitStopPoint(&S);
1598                            },
1599                            [](CodeGenFunction &) {});
1600       CGF.EmitOMPSimdFinal(
1601           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1602       // Emit final copy of the lastprivate variables at the end of loops.
1603       if (HasLastprivateClause)
1604         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1605       CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1606       emitPostUpdateForReductionClause(
1607           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1608     }
1609     CGF.EmitOMPLinearClauseFinal(
1610         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1611     // Emit: if (PreCond) - end.
1612     if (ContBlock) {
1613       CGF.EmitBranch(ContBlock);
1614       CGF.EmitBlock(ContBlock, true);
1615     }
1616   };
1617   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1618   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1619 }
1620 
1621 void CodeGenFunction::EmitOMPOuterLoop(
1622     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1623     CodeGenFunction::OMPPrivateScope &LoopScope,
1624     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1625     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1626     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1627   auto &RT = CGM.getOpenMPRuntime();
1628 
1629   const Expr *IVExpr = S.getIterationVariable();
1630   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1631   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1632 
1633   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1634 
1635   // Start the loop with a block that tests the condition.
1636   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1637   EmitBlock(CondBlock);
1638   const SourceRange &R = S.getSourceRange();
1639   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1640                  SourceLocToDebugLoc(R.getEnd()));
1641 
1642   llvm::Value *BoolCondVal = nullptr;
1643   if (!DynamicOrOrdered) {
1644     // UB = min(UB, GlobalUB) or
1645     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1646     // 'distribute parallel for')
1647     EmitIgnoredExpr(LoopArgs.EUB);
1648     // IV = LB
1649     EmitIgnoredExpr(LoopArgs.Init);
1650     // IV < UB
1651     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1652   } else {
1653     BoolCondVal =
1654         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1655                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1656   }
1657 
1658   // If there are any cleanups between here and the loop-exit scope,
1659   // create a block to stage a loop exit along.
1660   auto ExitBlock = LoopExit.getBlock();
1661   if (LoopScope.requiresCleanups())
1662     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1663 
1664   auto LoopBody = createBasicBlock("omp.dispatch.body");
1665   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1666   if (ExitBlock != LoopExit.getBlock()) {
1667     EmitBlock(ExitBlock);
1668     EmitBranchThroughCleanup(LoopExit);
1669   }
1670   EmitBlock(LoopBody);
1671 
1672   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1673   // LB for loop condition and emitted it above).
1674   if (DynamicOrOrdered)
1675     EmitIgnoredExpr(LoopArgs.Init);
1676 
1677   // Create a block for the increment.
1678   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1679   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1680 
1681   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1682   // with dynamic/guided scheduling and without ordered clause.
1683   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1684     LoopStack.setParallel(!IsMonotonic);
1685   else
1686     EmitOMPSimdInit(S, IsMonotonic);
1687 
1688   SourceLocation Loc = S.getLocStart();
1689 
1690   // when 'distribute' is not combined with a 'for':
1691   // while (idx <= UB) { BODY; ++idx; }
1692   // when 'distribute' is combined with a 'for'
1693   // (e.g. 'distribute parallel for')
1694   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1695   EmitOMPInnerLoop(
1696       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1697       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1698         CodeGenLoop(CGF, S, LoopExit);
1699       },
1700       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1701         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1702       });
1703 
1704   EmitBlock(Continue.getBlock());
1705   BreakContinueStack.pop_back();
1706   if (!DynamicOrOrdered) {
1707     // Emit "LB = LB + Stride", "UB = UB + Stride".
1708     EmitIgnoredExpr(LoopArgs.NextLB);
1709     EmitIgnoredExpr(LoopArgs.NextUB);
1710   }
1711 
1712   EmitBranch(CondBlock);
1713   LoopStack.pop();
1714   // Emit the fall-through block.
1715   EmitBlock(LoopExit.getBlock());
1716 
1717   // Tell the runtime we are done.
1718   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1719     if (!DynamicOrOrdered)
1720       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1721                                                      S.getDirectiveKind());
1722   };
1723   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1724 }
1725 
1726 void CodeGenFunction::EmitOMPForOuterLoop(
1727     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1728     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1729     const OMPLoopArguments &LoopArgs,
1730     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1731   auto &RT = CGM.getOpenMPRuntime();
1732 
1733   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1734   const bool DynamicOrOrdered =
1735       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1736 
1737   assert((Ordered ||
1738           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1739                                  LoopArgs.Chunk != nullptr)) &&
1740          "static non-chunked schedule does not need outer loop");
1741 
1742   // Emit outer loop.
1743   //
1744   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1745   // When schedule(dynamic,chunk_size) is specified, the iterations are
1746   // distributed to threads in the team in chunks as the threads request them.
1747   // Each thread executes a chunk of iterations, then requests another chunk,
1748   // until no chunks remain to be distributed. Each chunk contains chunk_size
1749   // iterations, except for the last chunk to be distributed, which may have
1750   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1751   //
1752   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1753   // to threads in the team in chunks as the executing threads request them.
1754   // Each thread executes a chunk of iterations, then requests another chunk,
1755   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1756   // each chunk is proportional to the number of unassigned iterations divided
1757   // by the number of threads in the team, decreasing to 1. For a chunk_size
1758   // with value k (greater than 1), the size of each chunk is determined in the
1759   // same way, with the restriction that the chunks do not contain fewer than k
1760   // iterations (except for the last chunk to be assigned, which may have fewer
1761   // than k iterations).
1762   //
1763   // When schedule(auto) is specified, the decision regarding scheduling is
1764   // delegated to the compiler and/or runtime system. The programmer gives the
1765   // implementation the freedom to choose any possible mapping of iterations to
1766   // threads in the team.
1767   //
1768   // When schedule(runtime) is specified, the decision regarding scheduling is
1769   // deferred until run time, and the schedule and chunk size are taken from the
1770   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1771   // implementation defined
1772   //
1773   // while(__kmpc_dispatch_next(&LB, &UB)) {
1774   //   idx = LB;
1775   //   while (idx <= UB) { BODY; ++idx;
1776   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1777   //   } // inner loop
1778   // }
1779   //
1780   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1781   // When schedule(static, chunk_size) is specified, iterations are divided into
1782   // chunks of size chunk_size, and the chunks are assigned to the threads in
1783   // the team in a round-robin fashion in the order of the thread number.
1784   //
1785   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1786   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1787   //   LB = LB + ST;
1788   //   UB = UB + ST;
1789   // }
1790   //
1791 
1792   const Expr *IVExpr = S.getIterationVariable();
1793   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1794   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1795 
1796   if (DynamicOrOrdered) {
1797     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1798     llvm::Value *LBVal = DispatchBounds.first;
1799     llvm::Value *UBVal = DispatchBounds.second;
1800     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1801                                                              LoopArgs.Chunk};
1802     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1803                            IVSigned, Ordered, DipatchRTInputValues);
1804   } else {
1805     CGOpenMPRuntime::StaticRTInput StaticInit(
1806         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1807         LoopArgs.ST, LoopArgs.Chunk);
1808     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1809                          ScheduleKind, StaticInit);
1810   }
1811 
1812   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1813                                     const unsigned IVSize,
1814                                     const bool IVSigned) {
1815     if (Ordered) {
1816       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1817                                                             IVSigned);
1818     }
1819   };
1820 
1821   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1822                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1823   OuterLoopArgs.IncExpr = S.getInc();
1824   OuterLoopArgs.Init = S.getInit();
1825   OuterLoopArgs.Cond = S.getCond();
1826   OuterLoopArgs.NextLB = S.getNextLowerBound();
1827   OuterLoopArgs.NextUB = S.getNextUpperBound();
1828   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1829                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1830 }
1831 
1832 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1833                              const unsigned IVSize, const bool IVSigned) {}
1834 
1835 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1836     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1837     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1838     const CodeGenLoopTy &CodeGenLoopContent) {
1839 
1840   auto &RT = CGM.getOpenMPRuntime();
1841 
1842   // Emit outer loop.
1843   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1844   // dynamic
1845   //
1846 
1847   const Expr *IVExpr = S.getIterationVariable();
1848   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1849   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1850 
1851   CGOpenMPRuntime::StaticRTInput StaticInit(
1852       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1853       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1854   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1855 
1856   // for combined 'distribute' and 'for' the increment expression of distribute
1857   // is store in DistInc. For 'distribute' alone, it is in Inc.
1858   Expr *IncExpr;
1859   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1860     IncExpr = S.getDistInc();
1861   else
1862     IncExpr = S.getInc();
1863 
1864   // this routine is shared by 'omp distribute parallel for' and
1865   // 'omp distribute': select the right EUB expression depending on the
1866   // directive
1867   OMPLoopArguments OuterLoopArgs;
1868   OuterLoopArgs.LB = LoopArgs.LB;
1869   OuterLoopArgs.UB = LoopArgs.UB;
1870   OuterLoopArgs.ST = LoopArgs.ST;
1871   OuterLoopArgs.IL = LoopArgs.IL;
1872   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1873   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1874                           ? S.getCombinedEnsureUpperBound()
1875                           : S.getEnsureUpperBound();
1876   OuterLoopArgs.IncExpr = IncExpr;
1877   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1878                            ? S.getCombinedInit()
1879                            : S.getInit();
1880   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1881                            ? S.getCombinedCond()
1882                            : S.getCond();
1883   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1884                              ? S.getCombinedNextLowerBound()
1885                              : S.getNextLowerBound();
1886   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1887                              ? S.getCombinedNextUpperBound()
1888                              : S.getNextUpperBound();
1889 
1890   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1891                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1892                    emitEmptyOrdered);
1893 }
1894 
1895 /// Emit a helper variable and return corresponding lvalue.
1896 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1897                                const DeclRefExpr *Helper) {
1898   auto VDecl = cast<VarDecl>(Helper->getDecl());
1899   CGF.EmitVarDecl(*VDecl);
1900   return CGF.EmitLValue(Helper);
1901 }
1902 
1903 static std::pair<LValue, LValue>
1904 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1905                                      const OMPExecutableDirective &S) {
1906   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1907   LValue LB =
1908       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1909   LValue UB =
1910       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1911 
1912   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1913   // parallel for') we need to use the 'distribute'
1914   // chunk lower and upper bounds rather than the whole loop iteration
1915   // space. These are parameters to the outlined function for 'parallel'
1916   // and we copy the bounds of the previous schedule into the
1917   // the current ones.
1918   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1919   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1920   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1921   PrevLBVal = CGF.EmitScalarConversion(
1922       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1923       LS.getIterationVariable()->getType(), SourceLocation());
1924   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1925   PrevUBVal = CGF.EmitScalarConversion(
1926       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1927       LS.getIterationVariable()->getType(), SourceLocation());
1928 
1929   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1930   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1931 
1932   return {LB, UB};
1933 }
1934 
1935 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1936 /// we need to use the LB and UB expressions generated by the worksharing
1937 /// code generation support, whereas in non combined situations we would
1938 /// just emit 0 and the LastIteration expression
1939 /// This function is necessary due to the difference of the LB and UB
1940 /// types for the RT emission routines for 'for_static_init' and
1941 /// 'for_dispatch_init'
1942 static std::pair<llvm::Value *, llvm::Value *>
1943 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1944                                         const OMPExecutableDirective &S,
1945                                         Address LB, Address UB) {
1946   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1947   const Expr *IVExpr = LS.getIterationVariable();
1948   // when implementing a dynamic schedule for a 'for' combined with a
1949   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1950   // is not normalized as each team only executes its own assigned
1951   // distribute chunk
1952   QualType IteratorTy = IVExpr->getType();
1953   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1954                                             SourceLocation());
1955   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1956                                             SourceLocation());
1957   return {LBVal, UBVal};
1958 }
1959 
1960 static void emitDistributeParallelForDistributeInnerBoundParams(
1961     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1962     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1963   const auto &Dir = cast<OMPLoopDirective>(S);
1964   LValue LB =
1965       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1966   auto LBCast = CGF.Builder.CreateIntCast(
1967       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1968   CapturedVars.push_back(LBCast);
1969   LValue UB =
1970       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1971 
1972   auto UBCast = CGF.Builder.CreateIntCast(
1973       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1974   CapturedVars.push_back(UBCast);
1975 }
1976 
1977 static void
1978 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
1979                                  const OMPLoopDirective &S,
1980                                  CodeGenFunction::JumpDest LoopExit) {
1981   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
1982                                          PrePostActionTy &) {
1983     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
1984                                emitDistributeParallelForInnerBounds,
1985                                emitDistributeParallelForDispatchBounds);
1986   };
1987 
1988   emitCommonOMPParallelDirective(
1989       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
1990       emitDistributeParallelForDistributeInnerBoundParams);
1991 }
1992 
1993 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1994     const OMPDistributeParallelForDirective &S) {
1995   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1996     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
1997                               S.getDistInc());
1998   };
1999   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2000   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
2001                                   /*HasCancel=*/false);
2002   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2003                                               /*HasCancel=*/false);
2004 }
2005 
2006 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2007     const OMPDistributeParallelForSimdDirective &S) {
2008   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2009   CGM.getOpenMPRuntime().emitInlinedDirective(
2010       *this, OMPD_distribute_parallel_for_simd,
2011       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2012         OMPLoopScope PreInitScope(CGF, S);
2013         CGF.EmitStmt(
2014             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2015       });
2016 }
2017 
2018 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2019     const OMPDistributeSimdDirective &S) {
2020   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2021   CGM.getOpenMPRuntime().emitInlinedDirective(
2022       *this, OMPD_distribute_simd,
2023       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2024         OMPLoopScope PreInitScope(CGF, S);
2025         CGF.EmitStmt(
2026             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2027       });
2028 }
2029 
2030 void CodeGenFunction::EmitOMPTargetSimdDirective(
2031     const OMPTargetSimdDirective &S) {
2032   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2033   CGM.getOpenMPRuntime().emitInlinedDirective(
2034       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2035         OMPLoopScope PreInitScope(CGF, S);
2036         CGF.EmitStmt(
2037             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2038       });
2039 }
2040 
2041 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2042     const OMPTeamsDistributeSimdDirective &S) {
2043   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2044   CGM.getOpenMPRuntime().emitInlinedDirective(
2045       *this, OMPD_teams_distribute_simd,
2046       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2047         OMPLoopScope PreInitScope(CGF, S);
2048         CGF.EmitStmt(
2049             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2050       });
2051 }
2052 
2053 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2054     const OMPTeamsDistributeParallelForSimdDirective &S) {
2055   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2056   CGM.getOpenMPRuntime().emitInlinedDirective(
2057       *this, OMPD_teams_distribute_parallel_for_simd,
2058       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2059         OMPLoopScope PreInitScope(CGF, S);
2060         CGF.EmitStmt(
2061             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2062       });
2063 }
2064 
2065 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2066     const OMPTeamsDistributeParallelForDirective &S) {
2067   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2068   CGM.getOpenMPRuntime().emitInlinedDirective(
2069       *this, OMPD_teams_distribute_parallel_for,
2070       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2071         OMPLoopScope PreInitScope(CGF, S);
2072         CGF.EmitStmt(
2073             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2074       });
2075 }
2076 
2077 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2078     const OMPTargetTeamsDistributeDirective &S) {
2079   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2080   CGM.getOpenMPRuntime().emitInlinedDirective(
2081       *this, OMPD_target_teams_distribute,
2082       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2083         CGF.EmitStmt(
2084             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2085       });
2086 }
2087 
2088 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2089     const OMPTargetTeamsDistributeParallelForDirective &S) {
2090   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2091   CGM.getOpenMPRuntime().emitInlinedDirective(
2092       *this, OMPD_target_teams_distribute_parallel_for,
2093       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2094         CGF.EmitStmt(
2095             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2096       });
2097 }
2098 
2099 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2100     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2101   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2102   CGM.getOpenMPRuntime().emitInlinedDirective(
2103       *this, OMPD_target_teams_distribute_parallel_for_simd,
2104       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2105         CGF.EmitStmt(
2106             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2107       });
2108 }
2109 
2110 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2111     const OMPTargetTeamsDistributeSimdDirective &S) {
2112   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2113   CGM.getOpenMPRuntime().emitInlinedDirective(
2114       *this, OMPD_target_teams_distribute_simd,
2115       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2116         CGF.EmitStmt(
2117             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2118       });
2119 }
2120 
2121 namespace {
2122   struct ScheduleKindModifiersTy {
2123     OpenMPScheduleClauseKind Kind;
2124     OpenMPScheduleClauseModifier M1;
2125     OpenMPScheduleClauseModifier M2;
2126     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2127                             OpenMPScheduleClauseModifier M1,
2128                             OpenMPScheduleClauseModifier M2)
2129         : Kind(Kind), M1(M1), M2(M2) {}
2130   };
2131 } // namespace
2132 
2133 bool CodeGenFunction::EmitOMPWorksharingLoop(
2134     const OMPLoopDirective &S, Expr *EUB,
2135     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2136     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2137   // Emit the loop iteration variable.
2138   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2139   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2140   EmitVarDecl(*IVDecl);
2141 
2142   // Emit the iterations count variable.
2143   // If it is not a variable, Sema decided to calculate iterations count on each
2144   // iteration (e.g., it is foldable into a constant).
2145   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2146     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2147     // Emit calculation of the iterations count.
2148     EmitIgnoredExpr(S.getCalcLastIteration());
2149   }
2150 
2151   auto &RT = CGM.getOpenMPRuntime();
2152 
2153   bool HasLastprivateClause;
2154   // Check pre-condition.
2155   {
2156     OMPLoopScope PreInitScope(*this, S);
2157     // Skip the entire loop if we don't meet the precondition.
2158     // If the condition constant folds and can be elided, avoid emitting the
2159     // whole loop.
2160     bool CondConstant;
2161     llvm::BasicBlock *ContBlock = nullptr;
2162     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2163       if (!CondConstant)
2164         return false;
2165     } else {
2166       auto *ThenBlock = createBasicBlock("omp.precond.then");
2167       ContBlock = createBasicBlock("omp.precond.end");
2168       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2169                   getProfileCount(&S));
2170       EmitBlock(ThenBlock);
2171       incrementProfileCounter(&S);
2172     }
2173 
2174     bool Ordered = false;
2175     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2176       if (OrderedClause->getNumForLoops())
2177         RT.emitDoacrossInit(*this, S);
2178       else
2179         Ordered = true;
2180     }
2181 
2182     llvm::DenseSet<const Expr *> EmittedFinals;
2183     emitAlignedClause(*this, S);
2184     bool HasLinears = EmitOMPLinearClauseInit(S);
2185     // Emit helper vars inits.
2186 
2187     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2188     LValue LB = Bounds.first;
2189     LValue UB = Bounds.second;
2190     LValue ST =
2191         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2192     LValue IL =
2193         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2194 
2195     // Emit 'then' code.
2196     {
2197       OMPPrivateScope LoopScope(*this);
2198       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2199         // Emit implicit barrier to synchronize threads and avoid data races on
2200         // initialization of firstprivate variables and post-update of
2201         // lastprivate variables.
2202         CGM.getOpenMPRuntime().emitBarrierCall(
2203             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2204             /*ForceSimpleCall=*/true);
2205       }
2206       EmitOMPPrivateClause(S, LoopScope);
2207       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2208       EmitOMPReductionClauseInit(S, LoopScope);
2209       EmitOMPPrivateLoopCounters(S, LoopScope);
2210       EmitOMPLinearClause(S, LoopScope);
2211       (void)LoopScope.Privatize();
2212 
2213       // Detect the loop schedule kind and chunk.
2214       llvm::Value *Chunk = nullptr;
2215       OpenMPScheduleTy ScheduleKind;
2216       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2217         ScheduleKind.Schedule = C->getScheduleKind();
2218         ScheduleKind.M1 = C->getFirstScheduleModifier();
2219         ScheduleKind.M2 = C->getSecondScheduleModifier();
2220         if (const auto *Ch = C->getChunkSize()) {
2221           Chunk = EmitScalarExpr(Ch);
2222           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2223                                        S.getIterationVariable()->getType(),
2224                                        S.getLocStart());
2225         }
2226       }
2227       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2228       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2229       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2230       // If the static schedule kind is specified or if the ordered clause is
2231       // specified, and if no monotonic modifier is specified, the effect will
2232       // be as if the monotonic modifier was specified.
2233       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2234                                 /* Chunked */ Chunk != nullptr) &&
2235           !Ordered) {
2236         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2237           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2238         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2239         // When no chunk_size is specified, the iteration space is divided into
2240         // chunks that are approximately equal in size, and at most one chunk is
2241         // distributed to each thread. Note that the size of the chunks is
2242         // unspecified in this case.
2243         CGOpenMPRuntime::StaticRTInput StaticInit(
2244             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2245             UB.getAddress(), ST.getAddress());
2246         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2247                              ScheduleKind, StaticInit);
2248         auto LoopExit =
2249             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2250         // UB = min(UB, GlobalUB);
2251         EmitIgnoredExpr(S.getEnsureUpperBound());
2252         // IV = LB;
2253         EmitIgnoredExpr(S.getInit());
2254         // while (idx <= UB) { BODY; ++idx; }
2255         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2256                          S.getInc(),
2257                          [&S, LoopExit](CodeGenFunction &CGF) {
2258                            CGF.EmitOMPLoopBody(S, LoopExit);
2259                            CGF.EmitStopPoint(&S);
2260                          },
2261                          [](CodeGenFunction &) {});
2262         EmitBlock(LoopExit.getBlock());
2263         // Tell the runtime we are done.
2264         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2265           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2266                                                          S.getDirectiveKind());
2267         };
2268         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2269       } else {
2270         const bool IsMonotonic =
2271             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2272             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2273             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2274             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2275         // Emit the outer loop, which requests its work chunk [LB..UB] from
2276         // runtime and runs the inner loop to process it.
2277         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2278                                              ST.getAddress(), IL.getAddress(),
2279                                              Chunk, EUB);
2280         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2281                             LoopArguments, CGDispatchBounds);
2282       }
2283       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2284         EmitOMPSimdFinal(S,
2285                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2286                            return CGF.Builder.CreateIsNotNull(
2287                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2288                          });
2289       }
2290       EmitOMPReductionClauseFinal(
2291           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2292                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2293                  : /*Parallel only*/ OMPD_parallel);
2294       // Emit post-update of the reduction variables if IsLastIter != 0.
2295       emitPostUpdateForReductionClause(
2296           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2297             return CGF.Builder.CreateIsNotNull(
2298                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2299           });
2300       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2301       if (HasLastprivateClause)
2302         EmitOMPLastprivateClauseFinal(
2303             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2304             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2305     }
2306     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2307       return CGF.Builder.CreateIsNotNull(
2308           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2309     });
2310     // We're now done with the loop, so jump to the continuation block.
2311     if (ContBlock) {
2312       EmitBranch(ContBlock);
2313       EmitBlock(ContBlock, true);
2314     }
2315   }
2316   return HasLastprivateClause;
2317 }
2318 
2319 /// The following two functions generate expressions for the loop lower
2320 /// and upper bounds in case of static and dynamic (dispatch) schedule
2321 /// of the associated 'for' or 'distribute' loop.
2322 static std::pair<LValue, LValue>
2323 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2324   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2325   LValue LB =
2326       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2327   LValue UB =
2328       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2329   return {LB, UB};
2330 }
2331 
2332 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2333 /// consider the lower and upper bound expressions generated by the
2334 /// worksharing loop support, but we use 0 and the iteration space size as
2335 /// constants
2336 static std::pair<llvm::Value *, llvm::Value *>
2337 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2338                           Address LB, Address UB) {
2339   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2340   const Expr *IVExpr = LS.getIterationVariable();
2341   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2342   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2343   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2344   return {LBVal, UBVal};
2345 }
2346 
2347 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2348   bool HasLastprivates = false;
2349   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2350                                           PrePostActionTy &) {
2351     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2352     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2353                                                  emitForLoopBounds,
2354                                                  emitDispatchForLoopBounds);
2355   };
2356   {
2357     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2358     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2359                                                 S.hasCancel());
2360   }
2361 
2362   // Emit an implicit barrier at the end.
2363   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2364     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2365   }
2366 }
2367 
2368 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2369   bool HasLastprivates = false;
2370   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2371                                           PrePostActionTy &) {
2372     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2373                                                  emitForLoopBounds,
2374                                                  emitDispatchForLoopBounds);
2375   };
2376   {
2377     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2378     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2379   }
2380 
2381   // Emit an implicit barrier at the end.
2382   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2383     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2384   }
2385 }
2386 
2387 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2388                                 const Twine &Name,
2389                                 llvm::Value *Init = nullptr) {
2390   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2391   if (Init)
2392     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2393   return LVal;
2394 }
2395 
2396 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2397   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2398   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2399   bool HasLastprivates = false;
2400   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2401                                                     PrePostActionTy &) {
2402     auto &C = CGF.CGM.getContext();
2403     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2404     // Emit helper vars inits.
2405     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2406                                   CGF.Builder.getInt32(0));
2407     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2408                                       : CGF.Builder.getInt32(0);
2409     LValue UB =
2410         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2411     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2412                                   CGF.Builder.getInt32(1));
2413     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2414                                   CGF.Builder.getInt32(0));
2415     // Loop counter.
2416     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2417     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2418     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2419     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2420     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2421     // Generate condition for loop.
2422     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2423                         OK_Ordinary, S.getLocStart(), FPOptions());
2424     // Increment for loop counter.
2425     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2426                       S.getLocStart());
2427     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2428       // Iterate through all sections and emit a switch construct:
2429       // switch (IV) {
2430       //   case 0:
2431       //     <SectionStmt[0]>;
2432       //     break;
2433       // ...
2434       //   case <NumSection> - 1:
2435       //     <SectionStmt[<NumSection> - 1]>;
2436       //     break;
2437       // }
2438       // .omp.sections.exit:
2439       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2440       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2441           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2442           CS == nullptr ? 1 : CS->size());
2443       if (CS) {
2444         unsigned CaseNumber = 0;
2445         for (auto *SubStmt : CS->children()) {
2446           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2447           CGF.EmitBlock(CaseBB);
2448           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2449           CGF.EmitStmt(SubStmt);
2450           CGF.EmitBranch(ExitBB);
2451           ++CaseNumber;
2452         }
2453       } else {
2454         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2455         CGF.EmitBlock(CaseBB);
2456         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2457         CGF.EmitStmt(Stmt);
2458         CGF.EmitBranch(ExitBB);
2459       }
2460       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2461     };
2462 
2463     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2464     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2465       // Emit implicit barrier to synchronize threads and avoid data races on
2466       // initialization of firstprivate variables and post-update of lastprivate
2467       // variables.
2468       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2469           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2470           /*ForceSimpleCall=*/true);
2471     }
2472     CGF.EmitOMPPrivateClause(S, LoopScope);
2473     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2474     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2475     (void)LoopScope.Privatize();
2476 
2477     // Emit static non-chunked loop.
2478     OpenMPScheduleTy ScheduleKind;
2479     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2480     CGOpenMPRuntime::StaticRTInput StaticInit(
2481         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2482         LB.getAddress(), UB.getAddress(), ST.getAddress());
2483     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2484         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2485     // UB = min(UB, GlobalUB);
2486     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2487     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2488         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2489     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2490     // IV = LB;
2491     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2492     // while (idx <= UB) { BODY; ++idx; }
2493     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2494                          [](CodeGenFunction &) {});
2495     // Tell the runtime we are done.
2496     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2497       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2498                                                      S.getDirectiveKind());
2499     };
2500     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2501     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2502     // Emit post-update of the reduction variables if IsLastIter != 0.
2503     emitPostUpdateForReductionClause(
2504         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2505           return CGF.Builder.CreateIsNotNull(
2506               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2507         });
2508 
2509     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2510     if (HasLastprivates)
2511       CGF.EmitOMPLastprivateClauseFinal(
2512           S, /*NoFinals=*/false,
2513           CGF.Builder.CreateIsNotNull(
2514               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2515   };
2516 
2517   bool HasCancel = false;
2518   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2519     HasCancel = OSD->hasCancel();
2520   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2521     HasCancel = OPSD->hasCancel();
2522   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2523   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2524                                               HasCancel);
2525   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2526   // clause. Otherwise the barrier will be generated by the codegen for the
2527   // directive.
2528   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2529     // Emit implicit barrier to synchronize threads and avoid data races on
2530     // initialization of firstprivate variables.
2531     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2532                                            OMPD_unknown);
2533   }
2534 }
2535 
2536 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2537   {
2538     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2539     EmitSections(S);
2540   }
2541   // Emit an implicit barrier at the end.
2542   if (!S.getSingleClause<OMPNowaitClause>()) {
2543     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2544                                            OMPD_sections);
2545   }
2546 }
2547 
2548 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2549   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2550     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2551   };
2552   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2553   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2554                                               S.hasCancel());
2555 }
2556 
2557 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2558   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2559   llvm::SmallVector<const Expr *, 8> DestExprs;
2560   llvm::SmallVector<const Expr *, 8> SrcExprs;
2561   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2562   // Check if there are any 'copyprivate' clauses associated with this
2563   // 'single' construct.
2564   // Build a list of copyprivate variables along with helper expressions
2565   // (<source>, <destination>, <destination>=<source> expressions)
2566   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2567     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2568     DestExprs.append(C->destination_exprs().begin(),
2569                      C->destination_exprs().end());
2570     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2571     AssignmentOps.append(C->assignment_ops().begin(),
2572                          C->assignment_ops().end());
2573   }
2574   // Emit code for 'single' region along with 'copyprivate' clauses
2575   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2576     Action.Enter(CGF);
2577     OMPPrivateScope SingleScope(CGF);
2578     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2579     CGF.EmitOMPPrivateClause(S, SingleScope);
2580     (void)SingleScope.Privatize();
2581     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2582   };
2583   {
2584     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2585     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2586                                             CopyprivateVars, DestExprs,
2587                                             SrcExprs, AssignmentOps);
2588   }
2589   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2590   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2591   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2592     CGM.getOpenMPRuntime().emitBarrierCall(
2593         *this, S.getLocStart(),
2594         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2595   }
2596 }
2597 
2598 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2599   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2600     Action.Enter(CGF);
2601     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2602   };
2603   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2604   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2605 }
2606 
2607 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2608   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2609     Action.Enter(CGF);
2610     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2611   };
2612   Expr *Hint = nullptr;
2613   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2614     Hint = HintClause->getHint();
2615   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2616   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2617                                             S.getDirectiveName().getAsString(),
2618                                             CodeGen, S.getLocStart(), Hint);
2619 }
2620 
2621 void CodeGenFunction::EmitOMPParallelForDirective(
2622     const OMPParallelForDirective &S) {
2623   // Emit directive as a combined directive that consists of two implicit
2624   // directives: 'parallel' with 'for' directive.
2625   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2626     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2627     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2628                                emitDispatchForLoopBounds);
2629   };
2630   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2631                                  emitEmptyBoundParameters);
2632 }
2633 
2634 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2635     const OMPParallelForSimdDirective &S) {
2636   // Emit directive as a combined directive that consists of two implicit
2637   // directives: 'parallel' with 'for' directive.
2638   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2639     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2640                                emitDispatchForLoopBounds);
2641   };
2642   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2643                                  emitEmptyBoundParameters);
2644 }
2645 
2646 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2647     const OMPParallelSectionsDirective &S) {
2648   // Emit directive as a combined directive that consists of two implicit
2649   // directives: 'parallel' with 'sections' directive.
2650   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2651     CGF.EmitSections(S);
2652   };
2653   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2654                                  emitEmptyBoundParameters);
2655 }
2656 
2657 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2658                                                 const RegionCodeGenTy &BodyGen,
2659                                                 const TaskGenTy &TaskGen,
2660                                                 OMPTaskDataTy &Data) {
2661   // Emit outlined function for task construct.
2662   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2663   auto *I = CS->getCapturedDecl()->param_begin();
2664   auto *PartId = std::next(I);
2665   auto *TaskT = std::next(I, 4);
2666   // Check if the task is final
2667   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2668     // If the condition constant folds and can be elided, try to avoid emitting
2669     // the condition and the dead arm of the if/else.
2670     auto *Cond = Clause->getCondition();
2671     bool CondConstant;
2672     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2673       Data.Final.setInt(CondConstant);
2674     else
2675       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2676   } else {
2677     // By default the task is not final.
2678     Data.Final.setInt(/*IntVal=*/false);
2679   }
2680   // Check if the task has 'priority' clause.
2681   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2682     auto *Prio = Clause->getPriority();
2683     Data.Priority.setInt(/*IntVal=*/true);
2684     Data.Priority.setPointer(EmitScalarConversion(
2685         EmitScalarExpr(Prio), Prio->getType(),
2686         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2687         Prio->getExprLoc()));
2688   }
2689   // The first function argument for tasks is a thread id, the second one is a
2690   // part id (0 for tied tasks, >=0 for untied task).
2691   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2692   // Get list of private variables.
2693   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2694     auto IRef = C->varlist_begin();
2695     for (auto *IInit : C->private_copies()) {
2696       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2697       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2698         Data.PrivateVars.push_back(*IRef);
2699         Data.PrivateCopies.push_back(IInit);
2700       }
2701       ++IRef;
2702     }
2703   }
2704   EmittedAsPrivate.clear();
2705   // Get list of firstprivate variables.
2706   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2707     auto IRef = C->varlist_begin();
2708     auto IElemInitRef = C->inits().begin();
2709     for (auto *IInit : C->private_copies()) {
2710       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2711       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2712         Data.FirstprivateVars.push_back(*IRef);
2713         Data.FirstprivateCopies.push_back(IInit);
2714         Data.FirstprivateInits.push_back(*IElemInitRef);
2715       }
2716       ++IRef;
2717       ++IElemInitRef;
2718     }
2719   }
2720   // Get list of lastprivate variables (for taskloops).
2721   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2722   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2723     auto IRef = C->varlist_begin();
2724     auto ID = C->destination_exprs().begin();
2725     for (auto *IInit : C->private_copies()) {
2726       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2727       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2728         Data.LastprivateVars.push_back(*IRef);
2729         Data.LastprivateCopies.push_back(IInit);
2730       }
2731       LastprivateDstsOrigs.insert(
2732           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2733            cast<DeclRefExpr>(*IRef)});
2734       ++IRef;
2735       ++ID;
2736     }
2737   }
2738   SmallVector<const Expr *, 4> LHSs;
2739   SmallVector<const Expr *, 4> RHSs;
2740   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2741     auto IPriv = C->privates().begin();
2742     auto IRed = C->reduction_ops().begin();
2743     auto ILHS = C->lhs_exprs().begin();
2744     auto IRHS = C->rhs_exprs().begin();
2745     for (const auto *Ref : C->varlists()) {
2746       Data.ReductionVars.emplace_back(Ref);
2747       Data.ReductionCopies.emplace_back(*IPriv);
2748       Data.ReductionOps.emplace_back(*IRed);
2749       LHSs.emplace_back(*ILHS);
2750       RHSs.emplace_back(*IRHS);
2751       std::advance(IPriv, 1);
2752       std::advance(IRed, 1);
2753       std::advance(ILHS, 1);
2754       std::advance(IRHS, 1);
2755     }
2756   }
2757   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2758       *this, S.getLocStart(), LHSs, RHSs, Data);
2759   // Build list of dependences.
2760   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2761     for (auto *IRef : C->varlists())
2762       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2763   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2764       CodeGenFunction &CGF, PrePostActionTy &Action) {
2765     // Set proper addresses for generated private copies.
2766     OMPPrivateScope Scope(CGF);
2767     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2768         !Data.LastprivateVars.empty()) {
2769       enum { PrivatesParam = 2, CopyFnParam = 3 };
2770       auto *CopyFn = CGF.Builder.CreateLoad(
2771           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2772       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2773           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2774       // Map privates.
2775       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2776       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2777       CallArgs.push_back(PrivatesPtr);
2778       for (auto *E : Data.PrivateVars) {
2779         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2780         Address PrivatePtr = CGF.CreateMemTemp(
2781             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2782         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2783         CallArgs.push_back(PrivatePtr.getPointer());
2784       }
2785       for (auto *E : Data.FirstprivateVars) {
2786         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2787         Address PrivatePtr =
2788             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2789                               ".firstpriv.ptr.addr");
2790         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2791         CallArgs.push_back(PrivatePtr.getPointer());
2792       }
2793       for (auto *E : Data.LastprivateVars) {
2794         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2795         Address PrivatePtr =
2796             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2797                               ".lastpriv.ptr.addr");
2798         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2799         CallArgs.push_back(PrivatePtr.getPointer());
2800       }
2801       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2802                                                           CopyFn, CallArgs);
2803       for (auto &&Pair : LastprivateDstsOrigs) {
2804         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2805         DeclRefExpr DRE(
2806             const_cast<VarDecl *>(OrigVD),
2807             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2808                 OrigVD) != nullptr,
2809             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2810         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2811           return CGF.EmitLValue(&DRE).getAddress();
2812         });
2813       }
2814       for (auto &&Pair : PrivatePtrs) {
2815         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2816                             CGF.getContext().getDeclAlign(Pair.first));
2817         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2818       }
2819     }
2820     if (Data.Reductions) {
2821       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2822       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2823                              Data.ReductionOps);
2824       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2825           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2826       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2827         RedCG.emitSharedLValue(CGF, Cnt);
2828         RedCG.emitAggregateType(CGF, Cnt);
2829         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2830             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2831         Replacement =
2832             Address(CGF.EmitScalarConversion(
2833                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2834                         CGF.getContext().getPointerType(
2835                             Data.ReductionCopies[Cnt]->getType()),
2836                         SourceLocation()),
2837                     Replacement.getAlignment());
2838         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2839         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2840                          [Replacement]() { return Replacement; });
2841         // FIXME: This must removed once the runtime library is fixed.
2842         // Emit required threadprivate variables for
2843         // initilizer/combiner/finalizer.
2844         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2845                                                            RedCG, Cnt);
2846       }
2847     }
2848     // Privatize all private variables except for in_reduction items.
2849     (void)Scope.Privatize();
2850     SmallVector<const Expr *, 4> InRedVars;
2851     SmallVector<const Expr *, 4> InRedPrivs;
2852     SmallVector<const Expr *, 4> InRedOps;
2853     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2854     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2855       auto IPriv = C->privates().begin();
2856       auto IRed = C->reduction_ops().begin();
2857       auto ITD = C->taskgroup_descriptors().begin();
2858       for (const auto *Ref : C->varlists()) {
2859         InRedVars.emplace_back(Ref);
2860         InRedPrivs.emplace_back(*IPriv);
2861         InRedOps.emplace_back(*IRed);
2862         TaskgroupDescriptors.emplace_back(*ITD);
2863         std::advance(IPriv, 1);
2864         std::advance(IRed, 1);
2865         std::advance(ITD, 1);
2866       }
2867     }
2868     // Privatize in_reduction items here, because taskgroup descriptors must be
2869     // privatized earlier.
2870     OMPPrivateScope InRedScope(CGF);
2871     if (!InRedVars.empty()) {
2872       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2873       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2874         RedCG.emitSharedLValue(CGF, Cnt);
2875         RedCG.emitAggregateType(CGF, Cnt);
2876         // The taskgroup descriptor variable is always implicit firstprivate and
2877         // privatized already during procoessing of the firstprivates.
2878         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2879             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2880         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2881             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2882         Replacement = Address(
2883             CGF.EmitScalarConversion(
2884                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2885                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2886                 SourceLocation()),
2887             Replacement.getAlignment());
2888         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2889         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2890                               [Replacement]() { return Replacement; });
2891         // FIXME: This must removed once the runtime library is fixed.
2892         // Emit required threadprivate variables for
2893         // initilizer/combiner/finalizer.
2894         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2895                                                            RedCG, Cnt);
2896       }
2897     }
2898     (void)InRedScope.Privatize();
2899 
2900     Action.Enter(CGF);
2901     BodyGen(CGF);
2902   };
2903   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2904       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2905       Data.NumberOfParts);
2906   OMPLexicalScope Scope(*this, S);
2907   TaskGen(*this, OutlinedFn, Data);
2908 }
2909 
2910 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2911   // Emit outlined function for task construct.
2912   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2913   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2914   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2915   const Expr *IfCond = nullptr;
2916   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2917     if (C->getNameModifier() == OMPD_unknown ||
2918         C->getNameModifier() == OMPD_task) {
2919       IfCond = C->getCondition();
2920       break;
2921     }
2922   }
2923 
2924   OMPTaskDataTy Data;
2925   // Check if we should emit tied or untied task.
2926   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2927   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2928     CGF.EmitStmt(CS->getCapturedStmt());
2929   };
2930   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2931                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2932                             const OMPTaskDataTy &Data) {
2933     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2934                                             SharedsTy, CapturedStruct, IfCond,
2935                                             Data);
2936   };
2937   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2938 }
2939 
2940 void CodeGenFunction::EmitOMPTaskyieldDirective(
2941     const OMPTaskyieldDirective &S) {
2942   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2943 }
2944 
2945 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2946   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2947 }
2948 
2949 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2950   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2951 }
2952 
2953 void CodeGenFunction::EmitOMPTaskgroupDirective(
2954     const OMPTaskgroupDirective &S) {
2955   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2956     Action.Enter(CGF);
2957     if (const Expr *E = S.getReductionRef()) {
2958       SmallVector<const Expr *, 4> LHSs;
2959       SmallVector<const Expr *, 4> RHSs;
2960       OMPTaskDataTy Data;
2961       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2962         auto IPriv = C->privates().begin();
2963         auto IRed = C->reduction_ops().begin();
2964         auto ILHS = C->lhs_exprs().begin();
2965         auto IRHS = C->rhs_exprs().begin();
2966         for (const auto *Ref : C->varlists()) {
2967           Data.ReductionVars.emplace_back(Ref);
2968           Data.ReductionCopies.emplace_back(*IPriv);
2969           Data.ReductionOps.emplace_back(*IRed);
2970           LHSs.emplace_back(*ILHS);
2971           RHSs.emplace_back(*IRHS);
2972           std::advance(IPriv, 1);
2973           std::advance(IRed, 1);
2974           std::advance(ILHS, 1);
2975           std::advance(IRHS, 1);
2976         }
2977       }
2978       llvm::Value *ReductionDesc =
2979           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
2980                                                            LHSs, RHSs, Data);
2981       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2982       CGF.EmitVarDecl(*VD);
2983       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
2984                             /*Volatile=*/false, E->getType());
2985     }
2986     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2987   };
2988   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2989   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2990 }
2991 
2992 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2993   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2994     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2995       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2996                                 FlushClause->varlist_end());
2997     }
2998     return llvm::None;
2999   }(), S.getLocStart());
3000 }
3001 
3002 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3003                                             const CodeGenLoopTy &CodeGenLoop,
3004                                             Expr *IncExpr) {
3005   // Emit the loop iteration variable.
3006   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3007   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3008   EmitVarDecl(*IVDecl);
3009 
3010   // Emit the iterations count variable.
3011   // If it is not a variable, Sema decided to calculate iterations count on each
3012   // iteration (e.g., it is foldable into a constant).
3013   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3014     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3015     // Emit calculation of the iterations count.
3016     EmitIgnoredExpr(S.getCalcLastIteration());
3017   }
3018 
3019   auto &RT = CGM.getOpenMPRuntime();
3020 
3021   bool HasLastprivateClause = false;
3022   // Check pre-condition.
3023   {
3024     OMPLoopScope PreInitScope(*this, S);
3025     // Skip the entire loop if we don't meet the precondition.
3026     // If the condition constant folds and can be elided, avoid emitting the
3027     // whole loop.
3028     bool CondConstant;
3029     llvm::BasicBlock *ContBlock = nullptr;
3030     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3031       if (!CondConstant)
3032         return;
3033     } else {
3034       auto *ThenBlock = createBasicBlock("omp.precond.then");
3035       ContBlock = createBasicBlock("omp.precond.end");
3036       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3037                   getProfileCount(&S));
3038       EmitBlock(ThenBlock);
3039       incrementProfileCounter(&S);
3040     }
3041 
3042     // Emit 'then' code.
3043     {
3044       // Emit helper vars inits.
3045 
3046       LValue LB = EmitOMPHelperVar(
3047           *this, cast<DeclRefExpr>(
3048                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3049                           ? S.getCombinedLowerBoundVariable()
3050                           : S.getLowerBoundVariable())));
3051       LValue UB = EmitOMPHelperVar(
3052           *this, cast<DeclRefExpr>(
3053                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3054                           ? S.getCombinedUpperBoundVariable()
3055                           : S.getUpperBoundVariable())));
3056       LValue ST =
3057           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3058       LValue IL =
3059           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3060 
3061       OMPPrivateScope LoopScope(*this);
3062       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3063         // Emit implicit barrier to synchronize threads and avoid data races on
3064         // initialization of firstprivate variables and post-update of
3065         // lastprivate variables.
3066         CGM.getOpenMPRuntime().emitBarrierCall(
3067           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3068           /*ForceSimpleCall=*/true);
3069       }
3070       EmitOMPPrivateClause(S, LoopScope);
3071       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3072       EmitOMPPrivateLoopCounters(S, LoopScope);
3073       (void)LoopScope.Privatize();
3074 
3075       // Detect the distribute schedule kind and chunk.
3076       llvm::Value *Chunk = nullptr;
3077       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3078       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3079         ScheduleKind = C->getDistScheduleKind();
3080         if (const auto *Ch = C->getChunkSize()) {
3081           Chunk = EmitScalarExpr(Ch);
3082           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3083           S.getIterationVariable()->getType(),
3084           S.getLocStart());
3085         }
3086       }
3087       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3088       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3089 
3090       // OpenMP [2.10.8, distribute Construct, Description]
3091       // If dist_schedule is specified, kind must be static. If specified,
3092       // iterations are divided into chunks of size chunk_size, chunks are
3093       // assigned to the teams of the league in a round-robin fashion in the
3094       // order of the team number. When no chunk_size is specified, the
3095       // iteration space is divided into chunks that are approximately equal
3096       // in size, and at most one chunk is distributed to each team of the
3097       // league. The size of the chunks is unspecified in this case.
3098       if (RT.isStaticNonchunked(ScheduleKind,
3099                                 /* Chunked */ Chunk != nullptr)) {
3100         CGOpenMPRuntime::StaticRTInput StaticInit(
3101             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3102             LB.getAddress(), UB.getAddress(), ST.getAddress());
3103         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3104                                     StaticInit);
3105         auto LoopExit =
3106             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3107         // UB = min(UB, GlobalUB);
3108         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3109                             ? S.getCombinedEnsureUpperBound()
3110                             : S.getEnsureUpperBound());
3111         // IV = LB;
3112         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3113                             ? S.getCombinedInit()
3114                             : S.getInit());
3115 
3116         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3117                          ? S.getCombinedCond()
3118                          : S.getCond();
3119 
3120         // for distribute alone,  codegen
3121         // while (idx <= UB) { BODY; ++idx; }
3122         // when combined with 'for' (e.g. as in 'distribute parallel for')
3123         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3124         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3125                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3126                            CodeGenLoop(CGF, S, LoopExit);
3127                          },
3128                          [](CodeGenFunction &) {});
3129         EmitBlock(LoopExit.getBlock());
3130         // Tell the runtime we are done.
3131         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3132       } else {
3133         // Emit the outer loop, which requests its work chunk [LB..UB] from
3134         // runtime and runs the inner loop to process it.
3135         const OMPLoopArguments LoopArguments = {
3136             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3137             Chunk};
3138         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3139                                    CodeGenLoop);
3140       }
3141 
3142       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3143       if (HasLastprivateClause)
3144         EmitOMPLastprivateClauseFinal(
3145             S, /*NoFinals=*/false,
3146             Builder.CreateIsNotNull(
3147                 EmitLoadOfScalar(IL, S.getLocStart())));
3148     }
3149 
3150     // We're now done with the loop, so jump to the continuation block.
3151     if (ContBlock) {
3152       EmitBranch(ContBlock);
3153       EmitBlock(ContBlock, true);
3154     }
3155   }
3156 }
3157 
3158 void CodeGenFunction::EmitOMPDistributeDirective(
3159     const OMPDistributeDirective &S) {
3160   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3161 
3162     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3163   };
3164   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3165   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3166                                               false);
3167 }
3168 
3169 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3170                                                    const CapturedStmt *S) {
3171   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3172   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3173   CGF.CapturedStmtInfo = &CapStmtInfo;
3174   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3175   Fn->addFnAttr(llvm::Attribute::NoInline);
3176   return Fn;
3177 }
3178 
3179 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3180   if (!S.getAssociatedStmt()) {
3181     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3182       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3183     return;
3184   }
3185   auto *C = S.getSingleClause<OMPSIMDClause>();
3186   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3187                                  PrePostActionTy &Action) {
3188     if (C) {
3189       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3190       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3191       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3192       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3193       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3194                                                       OutlinedFn, CapturedVars);
3195     } else {
3196       Action.Enter(CGF);
3197       CGF.EmitStmt(
3198           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3199     }
3200   };
3201   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3202   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3203 }
3204 
3205 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3206                                          QualType SrcType, QualType DestType,
3207                                          SourceLocation Loc) {
3208   assert(CGF.hasScalarEvaluationKind(DestType) &&
3209          "DestType must have scalar evaluation kind.");
3210   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3211   return Val.isScalar()
3212              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3213                                         Loc)
3214              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3215                                                  DestType, Loc);
3216 }
3217 
3218 static CodeGenFunction::ComplexPairTy
3219 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3220                       QualType DestType, SourceLocation Loc) {
3221   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3222          "DestType must have complex evaluation kind.");
3223   CodeGenFunction::ComplexPairTy ComplexVal;
3224   if (Val.isScalar()) {
3225     // Convert the input element to the element type of the complex.
3226     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3227     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3228                                               DestElementType, Loc);
3229     ComplexVal = CodeGenFunction::ComplexPairTy(
3230         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3231   } else {
3232     assert(Val.isComplex() && "Must be a scalar or complex.");
3233     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3234     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3235     ComplexVal.first = CGF.EmitScalarConversion(
3236         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3237     ComplexVal.second = CGF.EmitScalarConversion(
3238         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3239   }
3240   return ComplexVal;
3241 }
3242 
3243 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3244                                   LValue LVal, RValue RVal) {
3245   if (LVal.isGlobalReg()) {
3246     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3247   } else {
3248     CGF.EmitAtomicStore(RVal, LVal,
3249                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3250                                  : llvm::AtomicOrdering::Monotonic,
3251                         LVal.isVolatile(), /*IsInit=*/false);
3252   }
3253 }
3254 
3255 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3256                                          QualType RValTy, SourceLocation Loc) {
3257   switch (getEvaluationKind(LVal.getType())) {
3258   case TEK_Scalar:
3259     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3260                                *this, RVal, RValTy, LVal.getType(), Loc)),
3261                            LVal);
3262     break;
3263   case TEK_Complex:
3264     EmitStoreOfComplex(
3265         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3266         /*isInit=*/false);
3267     break;
3268   case TEK_Aggregate:
3269     llvm_unreachable("Must be a scalar or complex.");
3270   }
3271 }
3272 
3273 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3274                                   const Expr *X, const Expr *V,
3275                                   SourceLocation Loc) {
3276   // v = x;
3277   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3278   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3279   LValue XLValue = CGF.EmitLValue(X);
3280   LValue VLValue = CGF.EmitLValue(V);
3281   RValue Res = XLValue.isGlobalReg()
3282                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3283                    : CGF.EmitAtomicLoad(
3284                          XLValue, Loc,
3285                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3286                                   : llvm::AtomicOrdering::Monotonic,
3287                          XLValue.isVolatile());
3288   // OpenMP, 2.12.6, atomic Construct
3289   // Any atomic construct with a seq_cst clause forces the atomically
3290   // performed operation to include an implicit flush operation without a
3291   // list.
3292   if (IsSeqCst)
3293     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3294   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3295 }
3296 
3297 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3298                                    const Expr *X, const Expr *E,
3299                                    SourceLocation Loc) {
3300   // x = expr;
3301   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3302   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3303   // OpenMP, 2.12.6, atomic Construct
3304   // Any atomic construct with a seq_cst clause forces the atomically
3305   // performed operation to include an implicit flush operation without a
3306   // list.
3307   if (IsSeqCst)
3308     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3309 }
3310 
3311 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3312                                                 RValue Update,
3313                                                 BinaryOperatorKind BO,
3314                                                 llvm::AtomicOrdering AO,
3315                                                 bool IsXLHSInRHSPart) {
3316   auto &Context = CGF.CGM.getContext();
3317   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3318   // expression is simple and atomic is allowed for the given type for the
3319   // target platform.
3320   if (BO == BO_Comma || !Update.isScalar() ||
3321       !Update.getScalarVal()->getType()->isIntegerTy() ||
3322       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3323                         (Update.getScalarVal()->getType() !=
3324                          X.getAddress().getElementType())) ||
3325       !X.getAddress().getElementType()->isIntegerTy() ||
3326       !Context.getTargetInfo().hasBuiltinAtomic(
3327           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3328     return std::make_pair(false, RValue::get(nullptr));
3329 
3330   llvm::AtomicRMWInst::BinOp RMWOp;
3331   switch (BO) {
3332   case BO_Add:
3333     RMWOp = llvm::AtomicRMWInst::Add;
3334     break;
3335   case BO_Sub:
3336     if (!IsXLHSInRHSPart)
3337       return std::make_pair(false, RValue::get(nullptr));
3338     RMWOp = llvm::AtomicRMWInst::Sub;
3339     break;
3340   case BO_And:
3341     RMWOp = llvm::AtomicRMWInst::And;
3342     break;
3343   case BO_Or:
3344     RMWOp = llvm::AtomicRMWInst::Or;
3345     break;
3346   case BO_Xor:
3347     RMWOp = llvm::AtomicRMWInst::Xor;
3348     break;
3349   case BO_LT:
3350     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3351                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3352                                    : llvm::AtomicRMWInst::Max)
3353                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3354                                    : llvm::AtomicRMWInst::UMax);
3355     break;
3356   case BO_GT:
3357     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3358                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3359                                    : llvm::AtomicRMWInst::Min)
3360                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3361                                    : llvm::AtomicRMWInst::UMin);
3362     break;
3363   case BO_Assign:
3364     RMWOp = llvm::AtomicRMWInst::Xchg;
3365     break;
3366   case BO_Mul:
3367   case BO_Div:
3368   case BO_Rem:
3369   case BO_Shl:
3370   case BO_Shr:
3371   case BO_LAnd:
3372   case BO_LOr:
3373     return std::make_pair(false, RValue::get(nullptr));
3374   case BO_PtrMemD:
3375   case BO_PtrMemI:
3376   case BO_LE:
3377   case BO_GE:
3378   case BO_EQ:
3379   case BO_NE:
3380   case BO_AddAssign:
3381   case BO_SubAssign:
3382   case BO_AndAssign:
3383   case BO_OrAssign:
3384   case BO_XorAssign:
3385   case BO_MulAssign:
3386   case BO_DivAssign:
3387   case BO_RemAssign:
3388   case BO_ShlAssign:
3389   case BO_ShrAssign:
3390   case BO_Comma:
3391     llvm_unreachable("Unsupported atomic update operation");
3392   }
3393   auto *UpdateVal = Update.getScalarVal();
3394   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3395     UpdateVal = CGF.Builder.CreateIntCast(
3396         IC, X.getAddress().getElementType(),
3397         X.getType()->hasSignedIntegerRepresentation());
3398   }
3399   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3400   return std::make_pair(true, RValue::get(Res));
3401 }
3402 
3403 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3404     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3405     llvm::AtomicOrdering AO, SourceLocation Loc,
3406     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3407   // Update expressions are allowed to have the following forms:
3408   // x binop= expr; -> xrval + expr;
3409   // x++, ++x -> xrval + 1;
3410   // x--, --x -> xrval - 1;
3411   // x = x binop expr; -> xrval binop expr
3412   // x = expr Op x; - > expr binop xrval;
3413   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3414   if (!Res.first) {
3415     if (X.isGlobalReg()) {
3416       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3417       // 'xrval'.
3418       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3419     } else {
3420       // Perform compare-and-swap procedure.
3421       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3422     }
3423   }
3424   return Res;
3425 }
3426 
3427 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3428                                     const Expr *X, const Expr *E,
3429                                     const Expr *UE, bool IsXLHSInRHSPart,
3430                                     SourceLocation Loc) {
3431   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3432          "Update expr in 'atomic update' must be a binary operator.");
3433   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3434   // Update expressions are allowed to have the following forms:
3435   // x binop= expr; -> xrval + expr;
3436   // x++, ++x -> xrval + 1;
3437   // x--, --x -> xrval - 1;
3438   // x = x binop expr; -> xrval binop expr
3439   // x = expr Op x; - > expr binop xrval;
3440   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3441   LValue XLValue = CGF.EmitLValue(X);
3442   RValue ExprRValue = CGF.EmitAnyExpr(E);
3443   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3444                      : llvm::AtomicOrdering::Monotonic;
3445   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3446   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3447   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3448   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3449   auto Gen =
3450       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3451         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3452         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3453         return CGF.EmitAnyExpr(UE);
3454       };
3455   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3456       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3457   // OpenMP, 2.12.6, atomic Construct
3458   // Any atomic construct with a seq_cst clause forces the atomically
3459   // performed operation to include an implicit flush operation without a
3460   // list.
3461   if (IsSeqCst)
3462     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3463 }
3464 
3465 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3466                             QualType SourceType, QualType ResType,
3467                             SourceLocation Loc) {
3468   switch (CGF.getEvaluationKind(ResType)) {
3469   case TEK_Scalar:
3470     return RValue::get(
3471         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3472   case TEK_Complex: {
3473     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3474     return RValue::getComplex(Res.first, Res.second);
3475   }
3476   case TEK_Aggregate:
3477     break;
3478   }
3479   llvm_unreachable("Must be a scalar or complex.");
3480 }
3481 
3482 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3483                                      bool IsPostfixUpdate, const Expr *V,
3484                                      const Expr *X, const Expr *E,
3485                                      const Expr *UE, bool IsXLHSInRHSPart,
3486                                      SourceLocation Loc) {
3487   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3488   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3489   RValue NewVVal;
3490   LValue VLValue = CGF.EmitLValue(V);
3491   LValue XLValue = CGF.EmitLValue(X);
3492   RValue ExprRValue = CGF.EmitAnyExpr(E);
3493   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3494                      : llvm::AtomicOrdering::Monotonic;
3495   QualType NewVValType;
3496   if (UE) {
3497     // 'x' is updated with some additional value.
3498     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3499            "Update expr in 'atomic capture' must be a binary operator.");
3500     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3501     // Update expressions are allowed to have the following forms:
3502     // x binop= expr; -> xrval + expr;
3503     // x++, ++x -> xrval + 1;
3504     // x--, --x -> xrval - 1;
3505     // x = x binop expr; -> xrval binop expr
3506     // x = expr Op x; - > expr binop xrval;
3507     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3508     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3509     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3510     NewVValType = XRValExpr->getType();
3511     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3512     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3513                   IsPostfixUpdate](RValue XRValue) -> RValue {
3514       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3515       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3516       RValue Res = CGF.EmitAnyExpr(UE);
3517       NewVVal = IsPostfixUpdate ? XRValue : Res;
3518       return Res;
3519     };
3520     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3521         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3522     if (Res.first) {
3523       // 'atomicrmw' instruction was generated.
3524       if (IsPostfixUpdate) {
3525         // Use old value from 'atomicrmw'.
3526         NewVVal = Res.second;
3527       } else {
3528         // 'atomicrmw' does not provide new value, so evaluate it using old
3529         // value of 'x'.
3530         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3531         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3532         NewVVal = CGF.EmitAnyExpr(UE);
3533       }
3534     }
3535   } else {
3536     // 'x' is simply rewritten with some 'expr'.
3537     NewVValType = X->getType().getNonReferenceType();
3538     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3539                                X->getType().getNonReferenceType(), Loc);
3540     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3541       NewVVal = XRValue;
3542       return ExprRValue;
3543     };
3544     // Try to perform atomicrmw xchg, otherwise simple exchange.
3545     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3546         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3547         Loc, Gen);
3548     if (Res.first) {
3549       // 'atomicrmw' instruction was generated.
3550       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3551     }
3552   }
3553   // Emit post-update store to 'v' of old/new 'x' value.
3554   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3555   // OpenMP, 2.12.6, atomic Construct
3556   // Any atomic construct with a seq_cst clause forces the atomically
3557   // performed operation to include an implicit flush operation without a
3558   // list.
3559   if (IsSeqCst)
3560     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3561 }
3562 
3563 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3564                               bool IsSeqCst, bool IsPostfixUpdate,
3565                               const Expr *X, const Expr *V, const Expr *E,
3566                               const Expr *UE, bool IsXLHSInRHSPart,
3567                               SourceLocation Loc) {
3568   switch (Kind) {
3569   case OMPC_read:
3570     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3571     break;
3572   case OMPC_write:
3573     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3574     break;
3575   case OMPC_unknown:
3576   case OMPC_update:
3577     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3578     break;
3579   case OMPC_capture:
3580     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3581                              IsXLHSInRHSPart, Loc);
3582     break;
3583   case OMPC_if:
3584   case OMPC_final:
3585   case OMPC_num_threads:
3586   case OMPC_private:
3587   case OMPC_firstprivate:
3588   case OMPC_lastprivate:
3589   case OMPC_reduction:
3590   case OMPC_task_reduction:
3591   case OMPC_in_reduction:
3592   case OMPC_safelen:
3593   case OMPC_simdlen:
3594   case OMPC_collapse:
3595   case OMPC_default:
3596   case OMPC_seq_cst:
3597   case OMPC_shared:
3598   case OMPC_linear:
3599   case OMPC_aligned:
3600   case OMPC_copyin:
3601   case OMPC_copyprivate:
3602   case OMPC_flush:
3603   case OMPC_proc_bind:
3604   case OMPC_schedule:
3605   case OMPC_ordered:
3606   case OMPC_nowait:
3607   case OMPC_untied:
3608   case OMPC_threadprivate:
3609   case OMPC_depend:
3610   case OMPC_mergeable:
3611   case OMPC_device:
3612   case OMPC_threads:
3613   case OMPC_simd:
3614   case OMPC_map:
3615   case OMPC_num_teams:
3616   case OMPC_thread_limit:
3617   case OMPC_priority:
3618   case OMPC_grainsize:
3619   case OMPC_nogroup:
3620   case OMPC_num_tasks:
3621   case OMPC_hint:
3622   case OMPC_dist_schedule:
3623   case OMPC_defaultmap:
3624   case OMPC_uniform:
3625   case OMPC_to:
3626   case OMPC_from:
3627   case OMPC_use_device_ptr:
3628   case OMPC_is_device_ptr:
3629     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3630   }
3631 }
3632 
3633 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3634   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3635   OpenMPClauseKind Kind = OMPC_unknown;
3636   for (auto *C : S.clauses()) {
3637     // Find first clause (skip seq_cst clause, if it is first).
3638     if (C->getClauseKind() != OMPC_seq_cst) {
3639       Kind = C->getClauseKind();
3640       break;
3641     }
3642   }
3643 
3644   const auto *CS =
3645       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3646   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3647     enterFullExpression(EWC);
3648   }
3649   // Processing for statements under 'atomic capture'.
3650   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3651     for (const auto *C : Compound->body()) {
3652       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3653         enterFullExpression(EWC);
3654       }
3655     }
3656   }
3657 
3658   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3659                                             PrePostActionTy &) {
3660     CGF.EmitStopPoint(CS);
3661     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3662                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3663                       S.isXLHSInRHSPart(), S.getLocStart());
3664   };
3665   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3666   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3667 }
3668 
3669 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3670                                          const OMPExecutableDirective &S,
3671                                          const RegionCodeGenTy &CodeGen) {
3672   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3673   CodeGenModule &CGM = CGF.CGM;
3674   const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target);
3675 
3676   llvm::Function *Fn = nullptr;
3677   llvm::Constant *FnID = nullptr;
3678 
3679   const Expr *IfCond = nullptr;
3680   // Check for the at most one if clause associated with the target region.
3681   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3682     if (C->getNameModifier() == OMPD_unknown ||
3683         C->getNameModifier() == OMPD_target) {
3684       IfCond = C->getCondition();
3685       break;
3686     }
3687   }
3688 
3689   // Check if we have any device clause associated with the directive.
3690   const Expr *Device = nullptr;
3691   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3692     Device = C->getDevice();
3693   }
3694 
3695   // Check if we have an if clause whose conditional always evaluates to false
3696   // or if we do not have any targets specified. If so the target region is not
3697   // an offload entry point.
3698   bool IsOffloadEntry = true;
3699   if (IfCond) {
3700     bool Val;
3701     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3702       IsOffloadEntry = false;
3703   }
3704   if (CGM.getLangOpts().OMPTargetTriples.empty())
3705     IsOffloadEntry = false;
3706 
3707   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3708   StringRef ParentName;
3709   // In case we have Ctors/Dtors we use the complete type variant to produce
3710   // the mangling of the device outlined kernel.
3711   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3712     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3713   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3714     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3715   else
3716     ParentName =
3717         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3718 
3719   // Emit target region as a standalone region.
3720   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3721                                                     IsOffloadEntry, CodeGen);
3722   OMPLexicalScope Scope(CGF, S);
3723   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3724   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3725   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3726                                         CapturedVars);
3727 }
3728 
3729 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3730                              PrePostActionTy &Action) {
3731   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3732   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3733   CGF.EmitOMPPrivateClause(S, PrivateScope);
3734   (void)PrivateScope.Privatize();
3735 
3736   Action.Enter(CGF);
3737   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3738 }
3739 
3740 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3741                                                   StringRef ParentName,
3742                                                   const OMPTargetDirective &S) {
3743   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3744     emitTargetRegion(CGF, S, Action);
3745   };
3746   llvm::Function *Fn;
3747   llvm::Constant *Addr;
3748   // Emit target region as a standalone region.
3749   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3750       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3751   assert(Fn && Addr && "Target device function emission failed.");
3752 }
3753 
3754 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3755   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3756     emitTargetRegion(CGF, S, Action);
3757   };
3758   emitCommonOMPTargetDirective(*this, S, CodeGen);
3759 }
3760 
3761 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3762                                         const OMPExecutableDirective &S,
3763                                         OpenMPDirectiveKind InnermostKind,
3764                                         const RegionCodeGenTy &CodeGen) {
3765   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3766   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3767       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3768 
3769   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3770   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3771   if (NT || TL) {
3772     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3773     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3774 
3775     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3776                                                   S.getLocStart());
3777   }
3778 
3779   OMPTeamsScope Scope(CGF, S);
3780   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3781   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3782   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3783                                            CapturedVars);
3784 }
3785 
3786 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3787   // Emit teams region as a standalone region.
3788   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3789     OMPPrivateScope PrivateScope(CGF);
3790     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3791     CGF.EmitOMPPrivateClause(S, PrivateScope);
3792     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3793     (void)PrivateScope.Privatize();
3794     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3795     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3796   };
3797   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3798   emitPostUpdateForReductionClause(
3799       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3800 }
3801 
3802 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3803                                   const OMPTargetTeamsDirective &S) {
3804   auto *CS = S.getCapturedStmt(OMPD_teams);
3805   Action.Enter(CGF);
3806   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3807     // TODO: Add support for clauses.
3808     CGF.EmitStmt(CS->getCapturedStmt());
3809   };
3810   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3811 }
3812 
3813 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3814     CodeGenModule &CGM, StringRef ParentName,
3815     const OMPTargetTeamsDirective &S) {
3816   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3817     emitTargetTeamsRegion(CGF, Action, S);
3818   };
3819   llvm::Function *Fn;
3820   llvm::Constant *Addr;
3821   // Emit target region as a standalone region.
3822   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3823       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3824   assert(Fn && Addr && "Target device function emission failed.");
3825 }
3826 
3827 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3828     const OMPTargetTeamsDirective &S) {
3829   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3830     emitTargetTeamsRegion(CGF, Action, S);
3831   };
3832   emitCommonOMPTargetDirective(*this, S, CodeGen);
3833 }
3834 
3835 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
3836     const OMPTeamsDistributeDirective &S) {
3837 
3838   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3839     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3840   };
3841 
3842   // Emit teams region as a standalone region.
3843   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
3844                                             PrePostActionTy &) {
3845     OMPPrivateScope PrivateScope(CGF);
3846     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3847     (void)PrivateScope.Privatize();
3848     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
3849                                                     CodeGenDistribute);
3850     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3851   };
3852   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3853   emitPostUpdateForReductionClause(*this, S,
3854                                    [](CodeGenFunction &) { return nullptr; });
3855 }
3856 
3857 void CodeGenFunction::EmitOMPCancellationPointDirective(
3858     const OMPCancellationPointDirective &S) {
3859   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3860                                                    S.getCancelRegion());
3861 }
3862 
3863 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3864   const Expr *IfCond = nullptr;
3865   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3866     if (C->getNameModifier() == OMPD_unknown ||
3867         C->getNameModifier() == OMPD_cancel) {
3868       IfCond = C->getCondition();
3869       break;
3870     }
3871   }
3872   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3873                                         S.getCancelRegion());
3874 }
3875 
3876 CodeGenFunction::JumpDest
3877 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3878   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3879       Kind == OMPD_target_parallel)
3880     return ReturnBlock;
3881   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3882          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3883          Kind == OMPD_distribute_parallel_for ||
3884          Kind == OMPD_target_parallel_for);
3885   return OMPCancelStack.getExitBlock();
3886 }
3887 
3888 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3889     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3890     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3891   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3892   auto OrigVarIt = C.varlist_begin();
3893   auto InitIt = C.inits().begin();
3894   for (auto PvtVarIt : C.private_copies()) {
3895     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3896     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3897     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3898 
3899     // In order to identify the right initializer we need to match the
3900     // declaration used by the mapping logic. In some cases we may get
3901     // OMPCapturedExprDecl that refers to the original declaration.
3902     const ValueDecl *MatchingVD = OrigVD;
3903     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3904       // OMPCapturedExprDecl are used to privative fields of the current
3905       // structure.
3906       auto *ME = cast<MemberExpr>(OED->getInit());
3907       assert(isa<CXXThisExpr>(ME->getBase()) &&
3908              "Base should be the current struct!");
3909       MatchingVD = ME->getMemberDecl();
3910     }
3911 
3912     // If we don't have information about the current list item, move on to
3913     // the next one.
3914     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3915     if (InitAddrIt == CaptureDeviceAddrMap.end())
3916       continue;
3917 
3918     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3919       // Initialize the temporary initialization variable with the address we
3920       // get from the runtime library. We have to cast the source address
3921       // because it is always a void *. References are materialized in the
3922       // privatization scope, so the initialization here disregards the fact
3923       // the original variable is a reference.
3924       QualType AddrQTy =
3925           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3926       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3927       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3928       setAddrOfLocalVar(InitVD, InitAddr);
3929 
3930       // Emit private declaration, it will be initialized by the value we
3931       // declaration we just added to the local declarations map.
3932       EmitDecl(*PvtVD);
3933 
3934       // The initialization variables reached its purpose in the emission
3935       // ofthe previous declaration, so we don't need it anymore.
3936       LocalDeclMap.erase(InitVD);
3937 
3938       // Return the address of the private variable.
3939       return GetAddrOfLocalVar(PvtVD);
3940     });
3941     assert(IsRegistered && "firstprivate var already registered as private");
3942     // Silence the warning about unused variable.
3943     (void)IsRegistered;
3944 
3945     ++OrigVarIt;
3946     ++InitIt;
3947   }
3948 }
3949 
3950 // Generate the instructions for '#pragma omp target data' directive.
3951 void CodeGenFunction::EmitOMPTargetDataDirective(
3952     const OMPTargetDataDirective &S) {
3953   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3954 
3955   // Create a pre/post action to signal the privatization of the device pointer.
3956   // This action can be replaced by the OpenMP runtime code generation to
3957   // deactivate privatization.
3958   bool PrivatizeDevicePointers = false;
3959   class DevicePointerPrivActionTy : public PrePostActionTy {
3960     bool &PrivatizeDevicePointers;
3961 
3962   public:
3963     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3964         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3965     void Enter(CodeGenFunction &CGF) override {
3966       PrivatizeDevicePointers = true;
3967     }
3968   };
3969   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3970 
3971   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3972       CodeGenFunction &CGF, PrePostActionTy &Action) {
3973     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3974       CGF.EmitStmt(
3975           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3976     };
3977 
3978     // Codegen that selects wheather to generate the privatization code or not.
3979     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3980                           &InnermostCodeGen](CodeGenFunction &CGF,
3981                                              PrePostActionTy &Action) {
3982       RegionCodeGenTy RCG(InnermostCodeGen);
3983       PrivatizeDevicePointers = false;
3984 
3985       // Call the pre-action to change the status of PrivatizeDevicePointers if
3986       // needed.
3987       Action.Enter(CGF);
3988 
3989       if (PrivatizeDevicePointers) {
3990         OMPPrivateScope PrivateScope(CGF);
3991         // Emit all instances of the use_device_ptr clause.
3992         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
3993           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
3994                                         Info.CaptureDeviceAddrMap);
3995         (void)PrivateScope.Privatize();
3996         RCG(CGF);
3997       } else
3998         RCG(CGF);
3999     };
4000 
4001     // Forward the provided action to the privatization codegen.
4002     RegionCodeGenTy PrivRCG(PrivCodeGen);
4003     PrivRCG.setAction(Action);
4004 
4005     // Notwithstanding the body of the region is emitted as inlined directive,
4006     // we don't use an inline scope as changes in the references inside the
4007     // region are expected to be visible outside, so we do not privative them.
4008     OMPLexicalScope Scope(CGF, S);
4009     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4010                                                     PrivRCG);
4011   };
4012 
4013   RegionCodeGenTy RCG(CodeGen);
4014 
4015   // If we don't have target devices, don't bother emitting the data mapping
4016   // code.
4017   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4018     RCG(*this);
4019     return;
4020   }
4021 
4022   // Check if we have any if clause associated with the directive.
4023   const Expr *IfCond = nullptr;
4024   if (auto *C = S.getSingleClause<OMPIfClause>())
4025     IfCond = C->getCondition();
4026 
4027   // Check if we have any device clause associated with the directive.
4028   const Expr *Device = nullptr;
4029   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4030     Device = C->getDevice();
4031 
4032   // Set the action to signal privatization of device pointers.
4033   RCG.setAction(PrivAction);
4034 
4035   // Emit region code.
4036   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4037                                              Info);
4038 }
4039 
4040 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4041     const OMPTargetEnterDataDirective &S) {
4042   // If we don't have target devices, don't bother emitting the data mapping
4043   // code.
4044   if (CGM.getLangOpts().OMPTargetTriples.empty())
4045     return;
4046 
4047   // Check if we have any if clause associated with the directive.
4048   const Expr *IfCond = nullptr;
4049   if (auto *C = S.getSingleClause<OMPIfClause>())
4050     IfCond = C->getCondition();
4051 
4052   // Check if we have any device clause associated with the directive.
4053   const Expr *Device = nullptr;
4054   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4055     Device = C->getDevice();
4056 
4057   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4058 }
4059 
4060 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4061     const OMPTargetExitDataDirective &S) {
4062   // If we don't have target devices, don't bother emitting the data mapping
4063   // code.
4064   if (CGM.getLangOpts().OMPTargetTriples.empty())
4065     return;
4066 
4067   // Check if we have any if clause associated with the directive.
4068   const Expr *IfCond = nullptr;
4069   if (auto *C = S.getSingleClause<OMPIfClause>())
4070     IfCond = C->getCondition();
4071 
4072   // Check if we have any device clause associated with the directive.
4073   const Expr *Device = nullptr;
4074   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4075     Device = C->getDevice();
4076 
4077   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4078 }
4079 
4080 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4081                                      const OMPTargetParallelDirective &S,
4082                                      PrePostActionTy &Action) {
4083   // Get the captured statement associated with the 'parallel' region.
4084   auto *CS = S.getCapturedStmt(OMPD_parallel);
4085   Action.Enter(CGF);
4086   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4087     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4088     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4089     CGF.EmitOMPPrivateClause(S, PrivateScope);
4090     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4091     (void)PrivateScope.Privatize();
4092     // TODO: Add support for clauses.
4093     CGF.EmitStmt(CS->getCapturedStmt());
4094     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4095   };
4096   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4097                                  emitEmptyBoundParameters);
4098   emitPostUpdateForReductionClause(
4099       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4100 }
4101 
4102 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4103     CodeGenModule &CGM, StringRef ParentName,
4104     const OMPTargetParallelDirective &S) {
4105   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4106     emitTargetParallelRegion(CGF, S, Action);
4107   };
4108   llvm::Function *Fn;
4109   llvm::Constant *Addr;
4110   // Emit target region as a standalone region.
4111   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4112       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4113   assert(Fn && Addr && "Target device function emission failed.");
4114 }
4115 
4116 void CodeGenFunction::EmitOMPTargetParallelDirective(
4117     const OMPTargetParallelDirective &S) {
4118   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4119     emitTargetParallelRegion(CGF, S, Action);
4120   };
4121   emitCommonOMPTargetDirective(*this, S, CodeGen);
4122 }
4123 
4124 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4125                                         const OMPTargetParallelForDirective &S,
4126                                         PrePostActionTy &Action) {
4127   Action.Enter(CGF);
4128   // Emit directive as a combined directive that consists of two implicit
4129   // directives: 'parallel' with 'for' directive.
4130   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4131     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4132                                emitDispatchForLoopBounds);
4133   };
4134   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4135                                  emitEmptyBoundParameters);
4136 }
4137 
4138 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4139     CodeGenModule &CGM, StringRef ParentName,
4140     const OMPTargetParallelForDirective &S) {
4141   // Emit SPMD target parallel for region as a standalone region.
4142   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4143     emitTargetParallelForRegion(CGF, S, Action);
4144   };
4145   llvm::Function *Fn;
4146   llvm::Constant *Addr;
4147   // Emit target region as a standalone region.
4148   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4149       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4150   assert(Fn && Addr && "Target device function emission failed.");
4151 }
4152 
4153 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4154     const OMPTargetParallelForDirective &S) {
4155   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4156     emitTargetParallelForRegion(CGF, S, Action);
4157   };
4158   emitCommonOMPTargetDirective(*this, S, CodeGen);
4159 }
4160 
4161 static void
4162 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4163                                 const OMPTargetParallelForSimdDirective &S,
4164                                 PrePostActionTy &Action) {
4165   Action.Enter(CGF);
4166   // Emit directive as a combined directive that consists of two implicit
4167   // directives: 'parallel' with 'for' directive.
4168   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4169     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4170                                emitDispatchForLoopBounds);
4171   };
4172   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4173                                  emitEmptyBoundParameters);
4174 }
4175 
4176 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4177     CodeGenModule &CGM, StringRef ParentName,
4178     const OMPTargetParallelForSimdDirective &S) {
4179   // Emit SPMD target parallel for region as a standalone region.
4180   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4181     emitTargetParallelForSimdRegion(CGF, S, Action);
4182   };
4183   llvm::Function *Fn;
4184   llvm::Constant *Addr;
4185   // Emit target region as a standalone region.
4186   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4187       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4188   assert(Fn && Addr && "Target device function emission failed.");
4189 }
4190 
4191 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4192     const OMPTargetParallelForSimdDirective &S) {
4193   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4194     emitTargetParallelForSimdRegion(CGF, S, Action);
4195   };
4196   emitCommonOMPTargetDirective(*this, S, CodeGen);
4197 }
4198 
4199 /// Emit a helper variable and return corresponding lvalue.
4200 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4201                      const ImplicitParamDecl *PVD,
4202                      CodeGenFunction::OMPPrivateScope &Privates) {
4203   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4204   Privates.addPrivate(
4205       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4206 }
4207 
4208 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4209   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4210   // Emit outlined function for task construct.
4211   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4212   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4213   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4214   const Expr *IfCond = nullptr;
4215   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4216     if (C->getNameModifier() == OMPD_unknown ||
4217         C->getNameModifier() == OMPD_taskloop) {
4218       IfCond = C->getCondition();
4219       break;
4220     }
4221   }
4222 
4223   OMPTaskDataTy Data;
4224   // Check if taskloop must be emitted without taskgroup.
4225   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4226   // TODO: Check if we should emit tied or untied task.
4227   Data.Tied = true;
4228   // Set scheduling for taskloop
4229   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4230     // grainsize clause
4231     Data.Schedule.setInt(/*IntVal=*/false);
4232     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4233   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4234     // num_tasks clause
4235     Data.Schedule.setInt(/*IntVal=*/true);
4236     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4237   }
4238 
4239   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4240     // if (PreCond) {
4241     //   for (IV in 0..LastIteration) BODY;
4242     //   <Final counter/linear vars updates>;
4243     // }
4244     //
4245 
4246     // Emit: if (PreCond) - begin.
4247     // If the condition constant folds and can be elided, avoid emitting the
4248     // whole loop.
4249     bool CondConstant;
4250     llvm::BasicBlock *ContBlock = nullptr;
4251     OMPLoopScope PreInitScope(CGF, S);
4252     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4253       if (!CondConstant)
4254         return;
4255     } else {
4256       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4257       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4258       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4259                   CGF.getProfileCount(&S));
4260       CGF.EmitBlock(ThenBlock);
4261       CGF.incrementProfileCounter(&S);
4262     }
4263 
4264     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4265       CGF.EmitOMPSimdInit(S);
4266 
4267     OMPPrivateScope LoopScope(CGF);
4268     // Emit helper vars inits.
4269     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4270     auto *I = CS->getCapturedDecl()->param_begin();
4271     auto *LBP = std::next(I, LowerBound);
4272     auto *UBP = std::next(I, UpperBound);
4273     auto *STP = std::next(I, Stride);
4274     auto *LIP = std::next(I, LastIter);
4275     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4276              LoopScope);
4277     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4278              LoopScope);
4279     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4280     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4281              LoopScope);
4282     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4283     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4284     (void)LoopScope.Privatize();
4285     // Emit the loop iteration variable.
4286     const Expr *IVExpr = S.getIterationVariable();
4287     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4288     CGF.EmitVarDecl(*IVDecl);
4289     CGF.EmitIgnoredExpr(S.getInit());
4290 
4291     // Emit the iterations count variable.
4292     // If it is not a variable, Sema decided to calculate iterations count on
4293     // each iteration (e.g., it is foldable into a constant).
4294     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4295       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4296       // Emit calculation of the iterations count.
4297       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4298     }
4299 
4300     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4301                          S.getInc(),
4302                          [&S](CodeGenFunction &CGF) {
4303                            CGF.EmitOMPLoopBody(S, JumpDest());
4304                            CGF.EmitStopPoint(&S);
4305                          },
4306                          [](CodeGenFunction &) {});
4307     // Emit: if (PreCond) - end.
4308     if (ContBlock) {
4309       CGF.EmitBranch(ContBlock);
4310       CGF.EmitBlock(ContBlock, true);
4311     }
4312     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4313     if (HasLastprivateClause) {
4314       CGF.EmitOMPLastprivateClauseFinal(
4315           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4316           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4317               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4318               (*LIP)->getType(), S.getLocStart())));
4319     }
4320   };
4321   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4322                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4323                             const OMPTaskDataTy &Data) {
4324     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4325       OMPLoopScope PreInitScope(CGF, S);
4326       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4327                                                   OutlinedFn, SharedsTy,
4328                                                   CapturedStruct, IfCond, Data);
4329     };
4330     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4331                                                     CodeGen);
4332   };
4333   if (Data.Nogroup)
4334     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4335   else {
4336     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4337         *this,
4338         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4339                                         PrePostActionTy &Action) {
4340           Action.Enter(CGF);
4341           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4342         },
4343         S.getLocStart());
4344   }
4345 }
4346 
4347 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4348   EmitOMPTaskLoopBasedDirective(S);
4349 }
4350 
4351 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4352     const OMPTaskLoopSimdDirective &S) {
4353   EmitOMPTaskLoopBasedDirective(S);
4354 }
4355 
4356 // Generate the instructions for '#pragma omp target update' directive.
4357 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4358     const OMPTargetUpdateDirective &S) {
4359   // If we don't have target devices, don't bother emitting the data mapping
4360   // code.
4361   if (CGM.getLangOpts().OMPTargetTriples.empty())
4362     return;
4363 
4364   // Check if we have any if clause associated with the directive.
4365   const Expr *IfCond = nullptr;
4366   if (auto *C = S.getSingleClause<OMPIfClause>())
4367     IfCond = C->getCondition();
4368 
4369   // Check if we have any device clause associated with the directive.
4370   const Expr *Device = nullptr;
4371   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4372     Device = C->getDevice();
4373 
4374   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4375 }
4376