1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
69                             isCapturedVar(CGF, VD) ||
70                                 (CGF.CapturedStmtInfo &&
71                                  InlinedShareds.isGlobalVarCaptured(VD)),
72                             VD->getType().getNonReferenceType(), VK_LValue,
73                             SourceLocation());
74             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
75               return CGF.EmitLValue(&DRE).getAddress();
76             });
77           }
78         }
79         (void)InlinedShareds.Privatize();
80       }
81     }
82   }
83 };
84 
85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
86 /// for captured expressions.
87 class OMPParallelScope final : public OMPLexicalScope {
88   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
89     OpenMPDirectiveKind Kind = S.getDirectiveKind();
90     return !(isOpenMPTargetExecutionDirective(Kind) ||
91              isOpenMPLoopBoundSharingDirective(Kind)) &&
92            isOpenMPParallelDirective(Kind);
93   }
94 
95 public:
96   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
97       : OMPLexicalScope(CGF, S,
98                         /*AsInlined=*/false,
99                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
100 };
101 
102 /// Lexical scope for OpenMP teams construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPTeamsScope final : public OMPLexicalScope {
105   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106     OpenMPDirectiveKind Kind = S.getDirectiveKind();
107     return !isOpenMPTargetExecutionDirective(Kind) &&
108            isOpenMPTeamsDirective(Kind);
109   }
110 
111 public:
112   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
113       : OMPLexicalScope(CGF, S,
114                         /*AsInlined=*/false,
115                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
116 };
117 
118 /// Private scope for OpenMP loop-based directives, that supports capturing
119 /// of used expression from loop statement.
120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
121   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
122     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
123       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
124         for (const auto *I : PreInits->decls())
125           CGF.EmitVarDecl(cast<VarDecl>(*I));
126       }
127     }
128   }
129 
130 public:
131   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
132       : CodeGenFunction::RunCleanupsScope(CGF) {
133     emitPreInitStmt(CGF, S);
134   }
135 };
136 
137 } // namespace
138 
139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
140   auto &C = getContext();
141   llvm::Value *Size = nullptr;
142   auto SizeInChars = C.getTypeSizeInChars(Ty);
143   if (SizeInChars.isZero()) {
144     // getTypeSizeInChars() returns 0 for a VLA.
145     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
146       llvm::Value *ArraySize;
147       std::tie(ArraySize, Ty) = getVLASize(VAT);
148       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
149     }
150     SizeInChars = C.getTypeSizeInChars(Ty);
151     if (SizeInChars.isZero())
152       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
153     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
154   } else
155     Size = CGM.getSize(SizeInChars);
156   return Size;
157 }
158 
159 void CodeGenFunction::GenerateOpenMPCapturedVars(
160     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
161   const RecordDecl *RD = S.getCapturedRecordDecl();
162   auto CurField = RD->field_begin();
163   auto CurCap = S.captures().begin();
164   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
165                                                  E = S.capture_init_end();
166        I != E; ++I, ++CurField, ++CurCap) {
167     if (CurField->hasCapturedVLAType()) {
168       auto VAT = CurField->getCapturedVLAType();
169       auto *Val = VLASizeMap[VAT->getSizeExpr()];
170       CapturedVars.push_back(Val);
171     } else if (CurCap->capturesThis())
172       CapturedVars.push_back(CXXThisValue);
173     else if (CurCap->capturesVariableByCopy()) {
174       llvm::Value *CV =
175           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
176 
177       // If the field is not a pointer, we need to save the actual value
178       // and load it as a void pointer.
179       if (!CurField->getType()->isAnyPointerType()) {
180         auto &Ctx = getContext();
181         auto DstAddr = CreateMemTemp(
182             Ctx.getUIntPtrType(),
183             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
184         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
185 
186         auto *SrcAddrVal = EmitScalarConversion(
187             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
188             Ctx.getPointerType(CurField->getType()), SourceLocation());
189         LValue SrcLV =
190             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
191 
192         // Store the value using the source type pointer.
193         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
194 
195         // Load the value using the destination type pointer.
196         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
197       }
198       CapturedVars.push_back(CV);
199     } else {
200       assert(CurCap->capturesVariable() && "Expected capture by reference.");
201       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
202     }
203   }
204 }
205 
206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
207                                     StringRef Name, LValue AddrLV,
208                                     bool isReferenceType = false) {
209   ASTContext &Ctx = CGF.getContext();
210 
211   auto *CastedPtr = CGF.EmitScalarConversion(
212       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
213       Ctx.getPointerType(DstType), SourceLocation());
214   auto TmpAddr =
215       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
216           .getAddress();
217 
218   // If we are dealing with references we need to return the address of the
219   // reference instead of the reference of the value.
220   if (isReferenceType) {
221     QualType RefType = Ctx.getLValueReferenceType(DstType);
222     auto *RefVal = TmpAddr.getPointer();
223     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
224     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
225     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
226   }
227 
228   return TmpAddr;
229 }
230 
231 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
232   if (T->isLValueReferenceType()) {
233     return C.getLValueReferenceType(
234         getCanonicalParamType(C, T.getNonReferenceType()),
235         /*SpelledAsLValue=*/false);
236   }
237   if (T->isPointerType())
238     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
239   return C.getCanonicalParamType(T);
240 }
241 
242 namespace {
243   /// Contains required data for proper outlined function codegen.
244   struct FunctionOptions {
245     /// Captured statement for which the function is generated.
246     const CapturedStmt *S = nullptr;
247     /// true if cast to/from  UIntPtr is required for variables captured by
248     /// value.
249     const bool UIntPtrCastRequired = true;
250     /// true if only casted arguments must be registered as local args or VLA
251     /// sizes.
252     const bool RegisterCastedArgsOnly = false;
253     /// Name of the generated function.
254     const StringRef FunctionName;
255     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
256                              bool RegisterCastedArgsOnly,
257                              StringRef FunctionName)
258         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
259           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
260           FunctionName(FunctionName) {}
261   };
262 }
263 
264 static llvm::Function *emitOutlinedFunctionPrologue(
265     CodeGenFunction &CGF, FunctionArgList &Args,
266     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
267         &LocalAddrs,
268     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
269         &VLASizes,
270     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
271   const CapturedDecl *CD = FO.S->getCapturedDecl();
272   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
273   assert(CD->hasBody() && "missing CapturedDecl body");
274 
275   CXXThisValue = nullptr;
276   // Build the argument list.
277   CodeGenModule &CGM = CGF.CGM;
278   ASTContext &Ctx = CGM.getContext();
279   FunctionArgList TargetArgs;
280   Args.append(CD->param_begin(),
281               std::next(CD->param_begin(), CD->getContextParamPosition()));
282   TargetArgs.append(
283       CD->param_begin(),
284       std::next(CD->param_begin(), CD->getContextParamPosition()));
285   auto I = FO.S->captures().begin();
286   for (auto *FD : RD->fields()) {
287     QualType ArgType = FD->getType();
288     IdentifierInfo *II = nullptr;
289     VarDecl *CapVar = nullptr;
290 
291     // If this is a capture by copy and the type is not a pointer, the outlined
292     // function argument type should be uintptr and the value properly casted to
293     // uintptr. This is necessary given that the runtime library is only able to
294     // deal with pointers. We can pass in the same way the VLA type sizes to the
295     // outlined function.
296     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
297         I->capturesVariableArrayType()) {
298       if (FO.UIntPtrCastRequired)
299         ArgType = Ctx.getUIntPtrType();
300     }
301 
302     if (I->capturesVariable() || I->capturesVariableByCopy()) {
303       CapVar = I->getCapturedVar();
304       II = CapVar->getIdentifier();
305     } else if (I->capturesThis())
306       II = &Ctx.Idents.get("this");
307     else {
308       assert(I->capturesVariableArrayType());
309       II = &Ctx.Idents.get("vla");
310     }
311     if (ArgType->isVariablyModifiedType())
312       ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
313     auto *Arg =
314         ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II,
315                                   ArgType, ImplicitParamDecl::Other);
316     Args.emplace_back(Arg);
317     // Do not cast arguments if we emit function with non-original types.
318     TargetArgs.emplace_back(
319         FO.UIntPtrCastRequired
320             ? Arg
321             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
322     ++I;
323   }
324   Args.append(
325       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
326       CD->param_end());
327   TargetArgs.append(
328       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
329       CD->param_end());
330 
331   // Create the function declaration.
332   FunctionType::ExtInfo ExtInfo;
333   const CGFunctionInfo &FuncInfo =
334       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
335   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
336 
337   llvm::Function *F =
338       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
339                              FO.FunctionName, &CGM.getModule());
340   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
341   if (CD->isNothrow())
342     F->setDoesNotThrow();
343 
344   // Generate the function.
345   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
346                     FO.S->getLocStart(), CD->getBody()->getLocStart());
347   unsigned Cnt = CD->getContextParamPosition();
348   I = FO.S->captures().begin();
349   for (auto *FD : RD->fields()) {
350     // Do not map arguments if we emit function with non-original types.
351     Address LocalAddr(Address::invalid());
352     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
353       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
354                                                              TargetArgs[Cnt]);
355     } else {
356       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
357     }
358     // If we are capturing a pointer by copy we don't need to do anything, just
359     // use the value that we get from the arguments.
360     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
361       const VarDecl *CurVD = I->getCapturedVar();
362       // If the variable is a reference we need to materialize it here.
363       if (CurVD->getType()->isReferenceType()) {
364         Address RefAddr = CGF.CreateMemTemp(
365             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
366         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
367                               /*Volatile=*/false, CurVD->getType());
368         LocalAddr = RefAddr;
369       }
370       if (!FO.RegisterCastedArgsOnly)
371         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
372       ++Cnt;
373       ++I;
374       continue;
375     }
376 
377     LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
378     LValue ArgLVal =
379         CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo);
380     if (FD->hasCapturedVLAType()) {
381       if (FO.UIntPtrCastRequired) {
382         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
383                                                           Args[Cnt]->getName(),
384                                                           ArgLVal),
385                                      FD->getType(), BaseInfo);
386       }
387       auto *ExprArg =
388           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
389       auto VAT = FD->getCapturedVLAType();
390       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
391     } else if (I->capturesVariable()) {
392       auto *Var = I->getCapturedVar();
393       QualType VarTy = Var->getType();
394       Address ArgAddr = ArgLVal.getAddress();
395       if (!VarTy->isReferenceType()) {
396         if (ArgLVal.getType()->isLValueReferenceType()) {
397           ArgAddr = CGF.EmitLoadOfReference(
398               ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
399         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
400           assert(ArgLVal.getType()->isPointerType());
401           ArgAddr = CGF.EmitLoadOfPointer(
402               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
403         }
404       }
405       if (!FO.RegisterCastedArgsOnly) {
406         LocalAddrs.insert(
407             {Args[Cnt],
408              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
409       }
410     } else if (I->capturesVariableByCopy()) {
411       assert(!FD->getType()->isAnyPointerType() &&
412              "Not expecting a captured pointer.");
413       auto *Var = I->getCapturedVar();
414       QualType VarTy = Var->getType();
415       LocalAddrs.insert(
416           {Args[Cnt],
417            {Var,
418             FO.UIntPtrCastRequired
419                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
420                                        ArgLVal, VarTy->isReferenceType())
421                 : ArgLVal.getAddress()}});
422     } else {
423       // If 'this' is captured, load it into CXXThisValue.
424       assert(I->capturesThis());
425       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
426                          .getScalarVal();
427       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
428     }
429     ++Cnt;
430     ++I;
431   }
432 
433   return F;
434 }
435 
436 llvm::Function *
437 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
438   assert(
439       CapturedStmtInfo &&
440       "CapturedStmtInfo should be set when generating the captured function");
441   const CapturedDecl *CD = S.getCapturedDecl();
442   // Build the argument list.
443   bool NeedWrapperFunction =
444       getDebugInfo() &&
445       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
446   FunctionArgList Args;
447   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
448   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
449   SmallString<256> Buffer;
450   llvm::raw_svector_ostream Out(Buffer);
451   Out << CapturedStmtInfo->getHelperName();
452   if (NeedWrapperFunction)
453     Out << "_debug__";
454   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
455                      Out.str());
456   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
457                                                    VLASizes, CXXThisValue, FO);
458   for (const auto &LocalAddrPair : LocalAddrs) {
459     if (LocalAddrPair.second.first) {
460       setAddrOfLocalVar(LocalAddrPair.second.first,
461                         LocalAddrPair.second.second);
462     }
463   }
464   for (const auto &VLASizePair : VLASizes)
465     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
466   PGO.assignRegionCounters(GlobalDecl(CD), F);
467   CapturedStmtInfo->EmitBody(*this, CD->getBody());
468   FinishFunction(CD->getBodyRBrace());
469   if (!NeedWrapperFunction)
470     return F;
471 
472   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
473                             /*RegisterCastedArgsOnly=*/true,
474                             CapturedStmtInfo->getHelperName());
475   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
476   Args.clear();
477   LocalAddrs.clear();
478   VLASizes.clear();
479   llvm::Function *WrapperF =
480       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
481                                    WrapperCGF.CXXThisValue, WrapperFO);
482   LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
483   llvm::SmallVector<llvm::Value *, 4> CallArgs;
484   for (const auto *Arg : Args) {
485     llvm::Value *CallArg;
486     auto I = LocalAddrs.find(Arg);
487     if (I != LocalAddrs.end()) {
488       LValue LV =
489           WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo);
490       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
491     } else {
492       auto EI = VLASizes.find(Arg);
493       if (EI != VLASizes.end())
494         CallArg = EI->second.second;
495       else {
496         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
497                                               Arg->getType(), BaseInfo);
498         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
499       }
500     }
501     CallArgs.emplace_back(CallArg);
502   }
503   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
504                                                   F, CallArgs);
505   WrapperCGF.FinishFunction();
506   return WrapperF;
507 }
508 
509 //===----------------------------------------------------------------------===//
510 //                              OpenMP Directive Emission
511 //===----------------------------------------------------------------------===//
512 void CodeGenFunction::EmitOMPAggregateAssign(
513     Address DestAddr, Address SrcAddr, QualType OriginalType,
514     const llvm::function_ref<void(Address, Address)> &CopyGen) {
515   // Perform element-by-element initialization.
516   QualType ElementTy;
517 
518   // Drill down to the base element type on both arrays.
519   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
520   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
521   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
522 
523   auto SrcBegin = SrcAddr.getPointer();
524   auto DestBegin = DestAddr.getPointer();
525   // Cast from pointer to array type to pointer to single element.
526   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
527   // The basic structure here is a while-do loop.
528   auto BodyBB = createBasicBlock("omp.arraycpy.body");
529   auto DoneBB = createBasicBlock("omp.arraycpy.done");
530   auto IsEmpty =
531       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
532   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
533 
534   // Enter the loop body, making that address the current address.
535   auto EntryBB = Builder.GetInsertBlock();
536   EmitBlock(BodyBB);
537 
538   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
539 
540   llvm::PHINode *SrcElementPHI =
541     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
542   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
543   Address SrcElementCurrent =
544       Address(SrcElementPHI,
545               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
546 
547   llvm::PHINode *DestElementPHI =
548     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
549   DestElementPHI->addIncoming(DestBegin, EntryBB);
550   Address DestElementCurrent =
551     Address(DestElementPHI,
552             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
553 
554   // Emit copy.
555   CopyGen(DestElementCurrent, SrcElementCurrent);
556 
557   // Shift the address forward by one element.
558   auto DestElementNext = Builder.CreateConstGEP1_32(
559       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
560   auto SrcElementNext = Builder.CreateConstGEP1_32(
561       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
562   // Check whether we've reached the end.
563   auto Done =
564       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
565   Builder.CreateCondBr(Done, DoneBB, BodyBB);
566   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
567   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
568 
569   // Done.
570   EmitBlock(DoneBB, /*IsFinished=*/true);
571 }
572 
573 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
574                                   Address SrcAddr, const VarDecl *DestVD,
575                                   const VarDecl *SrcVD, const Expr *Copy) {
576   if (OriginalType->isArrayType()) {
577     auto *BO = dyn_cast<BinaryOperator>(Copy);
578     if (BO && BO->getOpcode() == BO_Assign) {
579       // Perform simple memcpy for simple copying.
580       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
581     } else {
582       // For arrays with complex element types perform element by element
583       // copying.
584       EmitOMPAggregateAssign(
585           DestAddr, SrcAddr, OriginalType,
586           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
587             // Working with the single array element, so have to remap
588             // destination and source variables to corresponding array
589             // elements.
590             CodeGenFunction::OMPPrivateScope Remap(*this);
591             Remap.addPrivate(DestVD, [DestElement]() -> Address {
592               return DestElement;
593             });
594             Remap.addPrivate(
595                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
596             (void)Remap.Privatize();
597             EmitIgnoredExpr(Copy);
598           });
599     }
600   } else {
601     // Remap pseudo source variable to private copy.
602     CodeGenFunction::OMPPrivateScope Remap(*this);
603     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
604     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
605     (void)Remap.Privatize();
606     // Emit copying of the whole variable.
607     EmitIgnoredExpr(Copy);
608   }
609 }
610 
611 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
612                                                 OMPPrivateScope &PrivateScope) {
613   if (!HaveInsertPoint())
614     return false;
615   bool FirstprivateIsLastprivate = false;
616   llvm::DenseSet<const VarDecl *> Lastprivates;
617   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
618     for (const auto *D : C->varlists())
619       Lastprivates.insert(
620           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
621   }
622   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
623   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
624   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
625     auto IRef = C->varlist_begin();
626     auto InitsRef = C->inits().begin();
627     for (auto IInit : C->private_copies()) {
628       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
629       bool ThisFirstprivateIsLastprivate =
630           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
631       auto *CapFD = CapturesInfo.lookup(OrigVD);
632       auto *FD = CapturedStmtInfo->lookup(OrigVD);
633       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
634           !FD->getType()->isReferenceType()) {
635         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
636         ++IRef;
637         ++InitsRef;
638         continue;
639       }
640       FirstprivateIsLastprivate =
641           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
642       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
643         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
644         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
645         bool IsRegistered;
646         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
647                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
648                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
649         Address OriginalAddr = EmitLValue(&DRE).getAddress();
650         QualType Type = VD->getType();
651         if (Type->isArrayType()) {
652           // Emit VarDecl with copy init for arrays.
653           // Get the address of the original variable captured in current
654           // captured region.
655           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
656             auto Emission = EmitAutoVarAlloca(*VD);
657             auto *Init = VD->getInit();
658             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
659               // Perform simple memcpy.
660               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
661                                   Type);
662             } else {
663               EmitOMPAggregateAssign(
664                   Emission.getAllocatedAddress(), OriginalAddr, Type,
665                   [this, VDInit, Init](Address DestElement,
666                                        Address SrcElement) {
667                     // Clean up any temporaries needed by the initialization.
668                     RunCleanupsScope InitScope(*this);
669                     // Emit initialization for single element.
670                     setAddrOfLocalVar(VDInit, SrcElement);
671                     EmitAnyExprToMem(Init, DestElement,
672                                      Init->getType().getQualifiers(),
673                                      /*IsInitializer*/ false);
674                     LocalDeclMap.erase(VDInit);
675                   });
676             }
677             EmitAutoVarCleanups(Emission);
678             return Emission.getAllocatedAddress();
679           });
680         } else {
681           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
682             // Emit private VarDecl with copy init.
683             // Remap temp VDInit variable to the address of the original
684             // variable
685             // (for proper handling of captured global variables).
686             setAddrOfLocalVar(VDInit, OriginalAddr);
687             EmitDecl(*VD);
688             LocalDeclMap.erase(VDInit);
689             return GetAddrOfLocalVar(VD);
690           });
691         }
692         assert(IsRegistered &&
693                "firstprivate var already registered as private");
694         // Silence the warning about unused variable.
695         (void)IsRegistered;
696       }
697       ++IRef;
698       ++InitsRef;
699     }
700   }
701   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
702 }
703 
704 void CodeGenFunction::EmitOMPPrivateClause(
705     const OMPExecutableDirective &D,
706     CodeGenFunction::OMPPrivateScope &PrivateScope) {
707   if (!HaveInsertPoint())
708     return;
709   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
710   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
711     auto IRef = C->varlist_begin();
712     for (auto IInit : C->private_copies()) {
713       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
714       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
715         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
716         bool IsRegistered =
717             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
718               // Emit private VarDecl with copy init.
719               EmitDecl(*VD);
720               return GetAddrOfLocalVar(VD);
721             });
722         assert(IsRegistered && "private var already registered as private");
723         // Silence the warning about unused variable.
724         (void)IsRegistered;
725       }
726       ++IRef;
727     }
728   }
729 }
730 
731 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
732   if (!HaveInsertPoint())
733     return false;
734   // threadprivate_var1 = master_threadprivate_var1;
735   // operator=(threadprivate_var2, master_threadprivate_var2);
736   // ...
737   // __kmpc_barrier(&loc, global_tid);
738   llvm::DenseSet<const VarDecl *> CopiedVars;
739   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
740   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
741     auto IRef = C->varlist_begin();
742     auto ISrcRef = C->source_exprs().begin();
743     auto IDestRef = C->destination_exprs().begin();
744     for (auto *AssignOp : C->assignment_ops()) {
745       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
746       QualType Type = VD->getType();
747       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
748         // Get the address of the master variable. If we are emitting code with
749         // TLS support, the address is passed from the master as field in the
750         // captured declaration.
751         Address MasterAddr = Address::invalid();
752         if (getLangOpts().OpenMPUseTLS &&
753             getContext().getTargetInfo().isTLSSupported()) {
754           assert(CapturedStmtInfo->lookup(VD) &&
755                  "Copyin threadprivates should have been captured!");
756           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
757                           VK_LValue, (*IRef)->getExprLoc());
758           MasterAddr = EmitLValue(&DRE).getAddress();
759           LocalDeclMap.erase(VD);
760         } else {
761           MasterAddr =
762             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
763                                         : CGM.GetAddrOfGlobal(VD),
764                     getContext().getDeclAlign(VD));
765         }
766         // Get the address of the threadprivate variable.
767         Address PrivateAddr = EmitLValue(*IRef).getAddress();
768         if (CopiedVars.size() == 1) {
769           // At first check if current thread is a master thread. If it is, no
770           // need to copy data.
771           CopyBegin = createBasicBlock("copyin.not.master");
772           CopyEnd = createBasicBlock("copyin.not.master.end");
773           Builder.CreateCondBr(
774               Builder.CreateICmpNE(
775                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
776                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
777               CopyBegin, CopyEnd);
778           EmitBlock(CopyBegin);
779         }
780         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
781         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
782         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
783       }
784       ++IRef;
785       ++ISrcRef;
786       ++IDestRef;
787     }
788   }
789   if (CopyEnd) {
790     // Exit out of copying procedure for non-master thread.
791     EmitBlock(CopyEnd, /*IsFinished=*/true);
792     return true;
793   }
794   return false;
795 }
796 
797 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
798     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
799   if (!HaveInsertPoint())
800     return false;
801   bool HasAtLeastOneLastprivate = false;
802   llvm::DenseSet<const VarDecl *> SIMDLCVs;
803   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
804     auto *LoopDirective = cast<OMPLoopDirective>(&D);
805     for (auto *C : LoopDirective->counters()) {
806       SIMDLCVs.insert(
807           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
808     }
809   }
810   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
811   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
812     HasAtLeastOneLastprivate = true;
813     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
814       break;
815     auto IRef = C->varlist_begin();
816     auto IDestRef = C->destination_exprs().begin();
817     for (auto *IInit : C->private_copies()) {
818       // Keep the address of the original variable for future update at the end
819       // of the loop.
820       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
821       // Taskloops do not require additional initialization, it is done in
822       // runtime support library.
823       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
824         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
825         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
826           DeclRefExpr DRE(
827               const_cast<VarDecl *>(OrigVD),
828               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
829                   OrigVD) != nullptr,
830               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
831           return EmitLValue(&DRE).getAddress();
832         });
833         // Check if the variable is also a firstprivate: in this case IInit is
834         // not generated. Initialization of this variable will happen in codegen
835         // for 'firstprivate' clause.
836         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
837           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
838           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
839             // Emit private VarDecl with copy init.
840             EmitDecl(*VD);
841             return GetAddrOfLocalVar(VD);
842           });
843           assert(IsRegistered &&
844                  "lastprivate var already registered as private");
845           (void)IsRegistered;
846         }
847       }
848       ++IRef;
849       ++IDestRef;
850     }
851   }
852   return HasAtLeastOneLastprivate;
853 }
854 
855 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
856     const OMPExecutableDirective &D, bool NoFinals,
857     llvm::Value *IsLastIterCond) {
858   if (!HaveInsertPoint())
859     return;
860   // Emit following code:
861   // if (<IsLastIterCond>) {
862   //   orig_var1 = private_orig_var1;
863   //   ...
864   //   orig_varn = private_orig_varn;
865   // }
866   llvm::BasicBlock *ThenBB = nullptr;
867   llvm::BasicBlock *DoneBB = nullptr;
868   if (IsLastIterCond) {
869     ThenBB = createBasicBlock(".omp.lastprivate.then");
870     DoneBB = createBasicBlock(".omp.lastprivate.done");
871     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
872     EmitBlock(ThenBB);
873   }
874   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
875   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
876   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
877     auto IC = LoopDirective->counters().begin();
878     for (auto F : LoopDirective->finals()) {
879       auto *D =
880           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
881       if (NoFinals)
882         AlreadyEmittedVars.insert(D);
883       else
884         LoopCountersAndUpdates[D] = F;
885       ++IC;
886     }
887   }
888   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
889     auto IRef = C->varlist_begin();
890     auto ISrcRef = C->source_exprs().begin();
891     auto IDestRef = C->destination_exprs().begin();
892     for (auto *AssignOp : C->assignment_ops()) {
893       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
894       QualType Type = PrivateVD->getType();
895       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
896       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
897         // If lastprivate variable is a loop control variable for loop-based
898         // directive, update its value before copyin back to original
899         // variable.
900         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
901           EmitIgnoredExpr(FinalExpr);
902         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
903         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
904         // Get the address of the original variable.
905         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
906         // Get the address of the private variable.
907         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
908         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
909           PrivateAddr =
910               Address(Builder.CreateLoad(PrivateAddr),
911                       getNaturalTypeAlignment(RefTy->getPointeeType()));
912         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
913       }
914       ++IRef;
915       ++ISrcRef;
916       ++IDestRef;
917     }
918     if (auto *PostUpdate = C->getPostUpdateExpr())
919       EmitIgnoredExpr(PostUpdate);
920   }
921   if (IsLastIterCond)
922     EmitBlock(DoneBB, /*IsFinished=*/true);
923 }
924 
925 void CodeGenFunction::EmitOMPReductionClauseInit(
926     const OMPExecutableDirective &D,
927     CodeGenFunction::OMPPrivateScope &PrivateScope) {
928   if (!HaveInsertPoint())
929     return;
930   SmallVector<const Expr *, 4> Shareds;
931   SmallVector<const Expr *, 4> Privates;
932   SmallVector<const Expr *, 4> ReductionOps;
933   SmallVector<const Expr *, 4> LHSs;
934   SmallVector<const Expr *, 4> RHSs;
935   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
936     auto IPriv = C->privates().begin();
937     auto IRed = C->reduction_ops().begin();
938     auto ILHS = C->lhs_exprs().begin();
939     auto IRHS = C->rhs_exprs().begin();
940     for (const auto *Ref : C->varlists()) {
941       Shareds.emplace_back(Ref);
942       Privates.emplace_back(*IPriv);
943       ReductionOps.emplace_back(*IRed);
944       LHSs.emplace_back(*ILHS);
945       RHSs.emplace_back(*IRHS);
946       std::advance(IPriv, 1);
947       std::advance(IRed, 1);
948       std::advance(ILHS, 1);
949       std::advance(IRHS, 1);
950     }
951   }
952   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
953   unsigned Count = 0;
954   auto ILHS = LHSs.begin();
955   auto IRHS = RHSs.begin();
956   auto IPriv = Privates.begin();
957   for (const auto *IRef : Shareds) {
958     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
959     // Emit private VarDecl with reduction init.
960     RedCG.emitSharedLValue(*this, Count);
961     RedCG.emitAggregateType(*this, Count);
962     auto Emission = EmitAutoVarAlloca(*PrivateVD);
963     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
964                              RedCG.getSharedLValue(Count),
965                              [&Emission](CodeGenFunction &CGF) {
966                                CGF.EmitAutoVarInit(Emission);
967                                return true;
968                              });
969     EmitAutoVarCleanups(Emission);
970     Address BaseAddr = RedCG.adjustPrivateAddress(
971         *this, Count, Emission.getAllocatedAddress());
972     bool IsRegistered = PrivateScope.addPrivate(
973         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
974     assert(IsRegistered && "private var already registered as private");
975     // Silence the warning about unused variable.
976     (void)IsRegistered;
977 
978     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
979     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
980     if (isa<OMPArraySectionExpr>(IRef)) {
981       // Store the address of the original variable associated with the LHS
982       // implicit variable.
983       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
984         return RedCG.getSharedLValue(Count).getAddress();
985       });
986       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
987         return GetAddrOfLocalVar(PrivateVD);
988       });
989     } else if (isa<ArraySubscriptExpr>(IRef)) {
990       // Store the address of the original variable associated with the LHS
991       // implicit variable.
992       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
993         return RedCG.getSharedLValue(Count).getAddress();
994       });
995       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
996         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
997                                             ConvertTypeForMem(RHSVD->getType()),
998                                             "rhs.begin");
999       });
1000     } else {
1001       QualType Type = PrivateVD->getType();
1002       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1003       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1004       // Store the address of the original variable associated with the LHS
1005       // implicit variable.
1006       if (IsArray) {
1007         OriginalAddr = Builder.CreateElementBitCast(
1008             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1009       }
1010       PrivateScope.addPrivate(
1011           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1012       PrivateScope.addPrivate(
1013           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1014             return IsArray
1015                        ? Builder.CreateElementBitCast(
1016                              GetAddrOfLocalVar(PrivateVD),
1017                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1018                        : GetAddrOfLocalVar(PrivateVD);
1019           });
1020     }
1021     ++ILHS;
1022     ++IRHS;
1023     ++IPriv;
1024     ++Count;
1025   }
1026 }
1027 
1028 void CodeGenFunction::EmitOMPReductionClauseFinal(
1029     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1030   if (!HaveInsertPoint())
1031     return;
1032   llvm::SmallVector<const Expr *, 8> Privates;
1033   llvm::SmallVector<const Expr *, 8> LHSExprs;
1034   llvm::SmallVector<const Expr *, 8> RHSExprs;
1035   llvm::SmallVector<const Expr *, 8> ReductionOps;
1036   bool HasAtLeastOneReduction = false;
1037   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1038     HasAtLeastOneReduction = true;
1039     Privates.append(C->privates().begin(), C->privates().end());
1040     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1041     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1042     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1043   }
1044   if (HasAtLeastOneReduction) {
1045     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1046                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1047                       D.getDirectiveKind() == OMPD_simd;
1048     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1049     // Emit nowait reduction if nowait clause is present or directive is a
1050     // parallel directive (it always has implicit barrier).
1051     CGM.getOpenMPRuntime().emitReduction(
1052         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1053         {WithNowait, SimpleReduction, ReductionKind});
1054   }
1055 }
1056 
1057 static void emitPostUpdateForReductionClause(
1058     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1059     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1060   if (!CGF.HaveInsertPoint())
1061     return;
1062   llvm::BasicBlock *DoneBB = nullptr;
1063   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1064     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1065       if (!DoneBB) {
1066         if (auto *Cond = CondGen(CGF)) {
1067           // If the first post-update expression is found, emit conditional
1068           // block if it was requested.
1069           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1070           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1071           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1072           CGF.EmitBlock(ThenBB);
1073         }
1074       }
1075       CGF.EmitIgnoredExpr(PostUpdate);
1076     }
1077   }
1078   if (DoneBB)
1079     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1080 }
1081 
1082 namespace {
1083 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1084 /// parallel function. This is necessary for combined constructs such as
1085 /// 'distribute parallel for'
1086 typedef llvm::function_ref<void(CodeGenFunction &,
1087                                 const OMPExecutableDirective &,
1088                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1089     CodeGenBoundParametersTy;
1090 } // anonymous namespace
1091 
1092 static void emitCommonOMPParallelDirective(
1093     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1094     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1095     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1096   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1097   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1098       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1099   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1100     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1101     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1102                                          /*IgnoreResultAssign*/ true);
1103     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1104         CGF, NumThreads, NumThreadsClause->getLocStart());
1105   }
1106   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1107     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1108     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1109         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1110   }
1111   const Expr *IfCond = nullptr;
1112   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1113     if (C->getNameModifier() == OMPD_unknown ||
1114         C->getNameModifier() == OMPD_parallel) {
1115       IfCond = C->getCondition();
1116       break;
1117     }
1118   }
1119 
1120   OMPParallelScope Scope(CGF, S);
1121   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1122   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1123   // lower and upper bounds with the pragma 'for' chunking mechanism.
1124   // The following lambda takes care of appending the lower and upper bound
1125   // parameters when necessary
1126   CodeGenBoundParameters(CGF, S, CapturedVars);
1127   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1128   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1129                                               CapturedVars, IfCond);
1130 }
1131 
1132 static void emitEmptyBoundParameters(CodeGenFunction &,
1133                                      const OMPExecutableDirective &,
1134                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1135 
1136 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1137   // Emit parallel region as a standalone region.
1138   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1139     OMPPrivateScope PrivateScope(CGF);
1140     bool Copyins = CGF.EmitOMPCopyinClause(S);
1141     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1142     if (Copyins) {
1143       // Emit implicit barrier to synchronize threads and avoid data races on
1144       // propagation master's thread values of threadprivate variables to local
1145       // instances of that variables of all other implicit threads.
1146       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1147           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1148           /*ForceSimpleCall=*/true);
1149     }
1150     CGF.EmitOMPPrivateClause(S, PrivateScope);
1151     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1152     (void)PrivateScope.Privatize();
1153     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1154     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1155   };
1156   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1157                                  emitEmptyBoundParameters);
1158   emitPostUpdateForReductionClause(
1159       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1160 }
1161 
1162 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1163                                       JumpDest LoopExit) {
1164   RunCleanupsScope BodyScope(*this);
1165   // Update counters values on current iteration.
1166   for (auto I : D.updates()) {
1167     EmitIgnoredExpr(I);
1168   }
1169   // Update the linear variables.
1170   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1171     for (auto *U : C->updates())
1172       EmitIgnoredExpr(U);
1173   }
1174 
1175   // On a continue in the body, jump to the end.
1176   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1177   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1178   // Emit loop body.
1179   EmitStmt(D.getBody());
1180   // The end (updates/cleanups).
1181   EmitBlock(Continue.getBlock());
1182   BreakContinueStack.pop_back();
1183 }
1184 
1185 void CodeGenFunction::EmitOMPInnerLoop(
1186     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1187     const Expr *IncExpr,
1188     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1189     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1190   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1191 
1192   // Start the loop with a block that tests the condition.
1193   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1194   EmitBlock(CondBlock);
1195   const SourceRange &R = S.getSourceRange();
1196   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1197                  SourceLocToDebugLoc(R.getEnd()));
1198 
1199   // If there are any cleanups between here and the loop-exit scope,
1200   // create a block to stage a loop exit along.
1201   auto ExitBlock = LoopExit.getBlock();
1202   if (RequiresCleanup)
1203     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1204 
1205   auto LoopBody = createBasicBlock("omp.inner.for.body");
1206 
1207   // Emit condition.
1208   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1209   if (ExitBlock != LoopExit.getBlock()) {
1210     EmitBlock(ExitBlock);
1211     EmitBranchThroughCleanup(LoopExit);
1212   }
1213 
1214   EmitBlock(LoopBody);
1215   incrementProfileCounter(&S);
1216 
1217   // Create a block for the increment.
1218   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1219   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1220 
1221   BodyGen(*this);
1222 
1223   // Emit "IV = IV + 1" and a back-edge to the condition block.
1224   EmitBlock(Continue.getBlock());
1225   EmitIgnoredExpr(IncExpr);
1226   PostIncGen(*this);
1227   BreakContinueStack.pop_back();
1228   EmitBranch(CondBlock);
1229   LoopStack.pop();
1230   // Emit the fall-through block.
1231   EmitBlock(LoopExit.getBlock());
1232 }
1233 
1234 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1235   if (!HaveInsertPoint())
1236     return false;
1237   // Emit inits for the linear variables.
1238   bool HasLinears = false;
1239   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1240     for (auto *Init : C->inits()) {
1241       HasLinears = true;
1242       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1243       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1244         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1245         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1246         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1247                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1248                         VD->getInit()->getType(), VK_LValue,
1249                         VD->getInit()->getExprLoc());
1250         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1251                                                 VD->getType()),
1252                        /*capturedByInit=*/false);
1253         EmitAutoVarCleanups(Emission);
1254       } else
1255         EmitVarDecl(*VD);
1256     }
1257     // Emit the linear steps for the linear clauses.
1258     // If a step is not constant, it is pre-calculated before the loop.
1259     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1260       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1261         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1262         // Emit calculation of the linear step.
1263         EmitIgnoredExpr(CS);
1264       }
1265   }
1266   return HasLinears;
1267 }
1268 
1269 void CodeGenFunction::EmitOMPLinearClauseFinal(
1270     const OMPLoopDirective &D,
1271     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1272   if (!HaveInsertPoint())
1273     return;
1274   llvm::BasicBlock *DoneBB = nullptr;
1275   // Emit the final values of the linear variables.
1276   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1277     auto IC = C->varlist_begin();
1278     for (auto *F : C->finals()) {
1279       if (!DoneBB) {
1280         if (auto *Cond = CondGen(*this)) {
1281           // If the first post-update expression is found, emit conditional
1282           // block if it was requested.
1283           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1284           DoneBB = createBasicBlock(".omp.linear.pu.done");
1285           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1286           EmitBlock(ThenBB);
1287         }
1288       }
1289       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1290       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1291                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1292                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1293       Address OrigAddr = EmitLValue(&DRE).getAddress();
1294       CodeGenFunction::OMPPrivateScope VarScope(*this);
1295       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1296       (void)VarScope.Privatize();
1297       EmitIgnoredExpr(F);
1298       ++IC;
1299     }
1300     if (auto *PostUpdate = C->getPostUpdateExpr())
1301       EmitIgnoredExpr(PostUpdate);
1302   }
1303   if (DoneBB)
1304     EmitBlock(DoneBB, /*IsFinished=*/true);
1305 }
1306 
1307 static void emitAlignedClause(CodeGenFunction &CGF,
1308                               const OMPExecutableDirective &D) {
1309   if (!CGF.HaveInsertPoint())
1310     return;
1311   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1312     unsigned ClauseAlignment = 0;
1313     if (auto AlignmentExpr = Clause->getAlignment()) {
1314       auto AlignmentCI =
1315           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1316       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1317     }
1318     for (auto E : Clause->varlists()) {
1319       unsigned Alignment = ClauseAlignment;
1320       if (Alignment == 0) {
1321         // OpenMP [2.8.1, Description]
1322         // If no optional parameter is specified, implementation-defined default
1323         // alignments for SIMD instructions on the target platforms are assumed.
1324         Alignment =
1325             CGF.getContext()
1326                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1327                     E->getType()->getPointeeType()))
1328                 .getQuantity();
1329       }
1330       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1331              "alignment is not power of 2");
1332       if (Alignment != 0) {
1333         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1334         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1335       }
1336     }
1337   }
1338 }
1339 
1340 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1341     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1342   if (!HaveInsertPoint())
1343     return;
1344   auto I = S.private_counters().begin();
1345   for (auto *E : S.counters()) {
1346     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1347     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1348     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1349       // Emit var without initialization.
1350       if (!LocalDeclMap.count(PrivateVD)) {
1351         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1352         EmitAutoVarCleanups(VarEmission);
1353       }
1354       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1355                       /*RefersToEnclosingVariableOrCapture=*/false,
1356                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1357       return EmitLValue(&DRE).getAddress();
1358     });
1359     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1360         VD->hasGlobalStorage()) {
1361       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1362         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1363                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1364                         E->getType(), VK_LValue, E->getExprLoc());
1365         return EmitLValue(&DRE).getAddress();
1366       });
1367     }
1368     ++I;
1369   }
1370 }
1371 
1372 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1373                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1374                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1375   if (!CGF.HaveInsertPoint())
1376     return;
1377   {
1378     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1379     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1380     (void)PreCondScope.Privatize();
1381     // Get initial values of real counters.
1382     for (auto I : S.inits()) {
1383       CGF.EmitIgnoredExpr(I);
1384     }
1385   }
1386   // Check that loop is executed at least one time.
1387   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1388 }
1389 
1390 void CodeGenFunction::EmitOMPLinearClause(
1391     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1392   if (!HaveInsertPoint())
1393     return;
1394   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1395   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1396     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1397     for (auto *C : LoopDirective->counters()) {
1398       SIMDLCVs.insert(
1399           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1400     }
1401   }
1402   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1403     auto CurPrivate = C->privates().begin();
1404     for (auto *E : C->varlists()) {
1405       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1406       auto *PrivateVD =
1407           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1408       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1409         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1410           // Emit private VarDecl with copy init.
1411           EmitVarDecl(*PrivateVD);
1412           return GetAddrOfLocalVar(PrivateVD);
1413         });
1414         assert(IsRegistered && "linear var already registered as private");
1415         // Silence the warning about unused variable.
1416         (void)IsRegistered;
1417       } else
1418         EmitVarDecl(*PrivateVD);
1419       ++CurPrivate;
1420     }
1421   }
1422 }
1423 
1424 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1425                                      const OMPExecutableDirective &D,
1426                                      bool IsMonotonic) {
1427   if (!CGF.HaveInsertPoint())
1428     return;
1429   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1430     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1431                                  /*ignoreResult=*/true);
1432     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1433     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1434     // In presence of finite 'safelen', it may be unsafe to mark all
1435     // the memory instructions parallel, because loop-carried
1436     // dependences of 'safelen' iterations are possible.
1437     if (!IsMonotonic)
1438       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1439   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1440     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1441                                  /*ignoreResult=*/true);
1442     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1443     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1444     // In presence of finite 'safelen', it may be unsafe to mark all
1445     // the memory instructions parallel, because loop-carried
1446     // dependences of 'safelen' iterations are possible.
1447     CGF.LoopStack.setParallel(false);
1448   }
1449 }
1450 
1451 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1452                                       bool IsMonotonic) {
1453   // Walk clauses and process safelen/lastprivate.
1454   LoopStack.setParallel(!IsMonotonic);
1455   LoopStack.setVectorizeEnable(true);
1456   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1457 }
1458 
1459 void CodeGenFunction::EmitOMPSimdFinal(
1460     const OMPLoopDirective &D,
1461     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1462   if (!HaveInsertPoint())
1463     return;
1464   llvm::BasicBlock *DoneBB = nullptr;
1465   auto IC = D.counters().begin();
1466   auto IPC = D.private_counters().begin();
1467   for (auto F : D.finals()) {
1468     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1469     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1470     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1471     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1472         OrigVD->hasGlobalStorage() || CED) {
1473       if (!DoneBB) {
1474         if (auto *Cond = CondGen(*this)) {
1475           // If the first post-update expression is found, emit conditional
1476           // block if it was requested.
1477           auto *ThenBB = createBasicBlock(".omp.final.then");
1478           DoneBB = createBasicBlock(".omp.final.done");
1479           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1480           EmitBlock(ThenBB);
1481         }
1482       }
1483       Address OrigAddr = Address::invalid();
1484       if (CED)
1485         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1486       else {
1487         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1488                         /*RefersToEnclosingVariableOrCapture=*/false,
1489                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1490         OrigAddr = EmitLValue(&DRE).getAddress();
1491       }
1492       OMPPrivateScope VarScope(*this);
1493       VarScope.addPrivate(OrigVD,
1494                           [OrigAddr]() -> Address { return OrigAddr; });
1495       (void)VarScope.Privatize();
1496       EmitIgnoredExpr(F);
1497     }
1498     ++IC;
1499     ++IPC;
1500   }
1501   if (DoneBB)
1502     EmitBlock(DoneBB, /*IsFinished=*/true);
1503 }
1504 
1505 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1506                                          const OMPLoopDirective &S,
1507                                          CodeGenFunction::JumpDest LoopExit) {
1508   CGF.EmitOMPLoopBody(S, LoopExit);
1509   CGF.EmitStopPoint(&S);
1510 }
1511 
1512 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1513   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1514     OMPLoopScope PreInitScope(CGF, S);
1515     // if (PreCond) {
1516     //   for (IV in 0..LastIteration) BODY;
1517     //   <Final counter/linear vars updates>;
1518     // }
1519     //
1520 
1521     // Emit: if (PreCond) - begin.
1522     // If the condition constant folds and can be elided, avoid emitting the
1523     // whole loop.
1524     bool CondConstant;
1525     llvm::BasicBlock *ContBlock = nullptr;
1526     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1527       if (!CondConstant)
1528         return;
1529     } else {
1530       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1531       ContBlock = CGF.createBasicBlock("simd.if.end");
1532       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1533                   CGF.getProfileCount(&S));
1534       CGF.EmitBlock(ThenBlock);
1535       CGF.incrementProfileCounter(&S);
1536     }
1537 
1538     // Emit the loop iteration variable.
1539     const Expr *IVExpr = S.getIterationVariable();
1540     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1541     CGF.EmitVarDecl(*IVDecl);
1542     CGF.EmitIgnoredExpr(S.getInit());
1543 
1544     // Emit the iterations count variable.
1545     // If it is not a variable, Sema decided to calculate iterations count on
1546     // each iteration (e.g., it is foldable into a constant).
1547     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1548       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1549       // Emit calculation of the iterations count.
1550       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1551     }
1552 
1553     CGF.EmitOMPSimdInit(S);
1554 
1555     emitAlignedClause(CGF, S);
1556     (void)CGF.EmitOMPLinearClauseInit(S);
1557     {
1558       OMPPrivateScope LoopScope(CGF);
1559       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1560       CGF.EmitOMPLinearClause(S, LoopScope);
1561       CGF.EmitOMPPrivateClause(S, LoopScope);
1562       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1563       bool HasLastprivateClause =
1564           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1565       (void)LoopScope.Privatize();
1566       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1567                            S.getInc(),
1568                            [&S](CodeGenFunction &CGF) {
1569                              CGF.EmitOMPLoopBody(S, JumpDest());
1570                              CGF.EmitStopPoint(&S);
1571                            },
1572                            [](CodeGenFunction &) {});
1573       CGF.EmitOMPSimdFinal(
1574           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1575       // Emit final copy of the lastprivate variables at the end of loops.
1576       if (HasLastprivateClause)
1577         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1578       CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1579       emitPostUpdateForReductionClause(
1580           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1581     }
1582     CGF.EmitOMPLinearClauseFinal(
1583         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1584     // Emit: if (PreCond) - end.
1585     if (ContBlock) {
1586       CGF.EmitBranch(ContBlock);
1587       CGF.EmitBlock(ContBlock, true);
1588     }
1589   };
1590   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1591   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1592 }
1593 
1594 void CodeGenFunction::EmitOMPOuterLoop(
1595     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1596     CodeGenFunction::OMPPrivateScope &LoopScope,
1597     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1598     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1599     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1600   auto &RT = CGM.getOpenMPRuntime();
1601 
1602   const Expr *IVExpr = S.getIterationVariable();
1603   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1604   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1605 
1606   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1607 
1608   // Start the loop with a block that tests the condition.
1609   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1610   EmitBlock(CondBlock);
1611   const SourceRange &R = S.getSourceRange();
1612   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1613                  SourceLocToDebugLoc(R.getEnd()));
1614 
1615   llvm::Value *BoolCondVal = nullptr;
1616   if (!DynamicOrOrdered) {
1617     // UB = min(UB, GlobalUB) or
1618     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1619     // 'distribute parallel for')
1620     EmitIgnoredExpr(LoopArgs.EUB);
1621     // IV = LB
1622     EmitIgnoredExpr(LoopArgs.Init);
1623     // IV < UB
1624     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1625   } else {
1626     BoolCondVal =
1627         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1628                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1629   }
1630 
1631   // If there are any cleanups between here and the loop-exit scope,
1632   // create a block to stage a loop exit along.
1633   auto ExitBlock = LoopExit.getBlock();
1634   if (LoopScope.requiresCleanups())
1635     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1636 
1637   auto LoopBody = createBasicBlock("omp.dispatch.body");
1638   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1639   if (ExitBlock != LoopExit.getBlock()) {
1640     EmitBlock(ExitBlock);
1641     EmitBranchThroughCleanup(LoopExit);
1642   }
1643   EmitBlock(LoopBody);
1644 
1645   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1646   // LB for loop condition and emitted it above).
1647   if (DynamicOrOrdered)
1648     EmitIgnoredExpr(LoopArgs.Init);
1649 
1650   // Create a block for the increment.
1651   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1652   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1653 
1654   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1655   // with dynamic/guided scheduling and without ordered clause.
1656   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1657     LoopStack.setParallel(!IsMonotonic);
1658   else
1659     EmitOMPSimdInit(S, IsMonotonic);
1660 
1661   SourceLocation Loc = S.getLocStart();
1662 
1663   // when 'distribute' is not combined with a 'for':
1664   // while (idx <= UB) { BODY; ++idx; }
1665   // when 'distribute' is combined with a 'for'
1666   // (e.g. 'distribute parallel for')
1667   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1668   EmitOMPInnerLoop(
1669       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1670       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1671         CodeGenLoop(CGF, S, LoopExit);
1672       },
1673       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1674         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1675       });
1676 
1677   EmitBlock(Continue.getBlock());
1678   BreakContinueStack.pop_back();
1679   if (!DynamicOrOrdered) {
1680     // Emit "LB = LB + Stride", "UB = UB + Stride".
1681     EmitIgnoredExpr(LoopArgs.NextLB);
1682     EmitIgnoredExpr(LoopArgs.NextUB);
1683   }
1684 
1685   EmitBranch(CondBlock);
1686   LoopStack.pop();
1687   // Emit the fall-through block.
1688   EmitBlock(LoopExit.getBlock());
1689 
1690   // Tell the runtime we are done.
1691   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1692     if (!DynamicOrOrdered)
1693       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
1694   };
1695   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1696 }
1697 
1698 void CodeGenFunction::EmitOMPForOuterLoop(
1699     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1700     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1701     const OMPLoopArguments &LoopArgs,
1702     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1703   auto &RT = CGM.getOpenMPRuntime();
1704 
1705   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1706   const bool DynamicOrOrdered =
1707       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1708 
1709   assert((Ordered ||
1710           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1711                                  LoopArgs.Chunk != nullptr)) &&
1712          "static non-chunked schedule does not need outer loop");
1713 
1714   // Emit outer loop.
1715   //
1716   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1717   // When schedule(dynamic,chunk_size) is specified, the iterations are
1718   // distributed to threads in the team in chunks as the threads request them.
1719   // Each thread executes a chunk of iterations, then requests another chunk,
1720   // until no chunks remain to be distributed. Each chunk contains chunk_size
1721   // iterations, except for the last chunk to be distributed, which may have
1722   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1723   //
1724   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1725   // to threads in the team in chunks as the executing threads request them.
1726   // Each thread executes a chunk of iterations, then requests another chunk,
1727   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1728   // each chunk is proportional to the number of unassigned iterations divided
1729   // by the number of threads in the team, decreasing to 1. For a chunk_size
1730   // with value k (greater than 1), the size of each chunk is determined in the
1731   // same way, with the restriction that the chunks do not contain fewer than k
1732   // iterations (except for the last chunk to be assigned, which may have fewer
1733   // than k iterations).
1734   //
1735   // When schedule(auto) is specified, the decision regarding scheduling is
1736   // delegated to the compiler and/or runtime system. The programmer gives the
1737   // implementation the freedom to choose any possible mapping of iterations to
1738   // threads in the team.
1739   //
1740   // When schedule(runtime) is specified, the decision regarding scheduling is
1741   // deferred until run time, and the schedule and chunk size are taken from the
1742   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1743   // implementation defined
1744   //
1745   // while(__kmpc_dispatch_next(&LB, &UB)) {
1746   //   idx = LB;
1747   //   while (idx <= UB) { BODY; ++idx;
1748   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1749   //   } // inner loop
1750   // }
1751   //
1752   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1753   // When schedule(static, chunk_size) is specified, iterations are divided into
1754   // chunks of size chunk_size, and the chunks are assigned to the threads in
1755   // the team in a round-robin fashion in the order of the thread number.
1756   //
1757   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1758   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1759   //   LB = LB + ST;
1760   //   UB = UB + ST;
1761   // }
1762   //
1763 
1764   const Expr *IVExpr = S.getIterationVariable();
1765   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1766   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1767 
1768   if (DynamicOrOrdered) {
1769     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1770     llvm::Value *LBVal = DispatchBounds.first;
1771     llvm::Value *UBVal = DispatchBounds.second;
1772     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1773                                                              LoopArgs.Chunk};
1774     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1775                            IVSigned, Ordered, DipatchRTInputValues);
1776   } else {
1777     CGOpenMPRuntime::StaticRTInput StaticInit(
1778         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1779         LoopArgs.ST, LoopArgs.Chunk);
1780     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1781                          ScheduleKind, StaticInit);
1782   }
1783 
1784   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1785                                     const unsigned IVSize,
1786                                     const bool IVSigned) {
1787     if (Ordered) {
1788       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1789                                                             IVSigned);
1790     }
1791   };
1792 
1793   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1794                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1795   OuterLoopArgs.IncExpr = S.getInc();
1796   OuterLoopArgs.Init = S.getInit();
1797   OuterLoopArgs.Cond = S.getCond();
1798   OuterLoopArgs.NextLB = S.getNextLowerBound();
1799   OuterLoopArgs.NextUB = S.getNextUpperBound();
1800   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1801                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1802 }
1803 
1804 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1805                              const unsigned IVSize, const bool IVSigned) {}
1806 
1807 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1808     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1809     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1810     const CodeGenLoopTy &CodeGenLoopContent) {
1811 
1812   auto &RT = CGM.getOpenMPRuntime();
1813 
1814   // Emit outer loop.
1815   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1816   // dynamic
1817   //
1818 
1819   const Expr *IVExpr = S.getIterationVariable();
1820   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1821   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1822 
1823   CGOpenMPRuntime::StaticRTInput StaticInit(
1824       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1825       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1826   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1827 
1828   // for combined 'distribute' and 'for' the increment expression of distribute
1829   // is store in DistInc. For 'distribute' alone, it is in Inc.
1830   Expr *IncExpr;
1831   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1832     IncExpr = S.getDistInc();
1833   else
1834     IncExpr = S.getInc();
1835 
1836   // this routine is shared by 'omp distribute parallel for' and
1837   // 'omp distribute': select the right EUB expression depending on the
1838   // directive
1839   OMPLoopArguments OuterLoopArgs;
1840   OuterLoopArgs.LB = LoopArgs.LB;
1841   OuterLoopArgs.UB = LoopArgs.UB;
1842   OuterLoopArgs.ST = LoopArgs.ST;
1843   OuterLoopArgs.IL = LoopArgs.IL;
1844   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1845   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1846                           ? S.getCombinedEnsureUpperBound()
1847                           : S.getEnsureUpperBound();
1848   OuterLoopArgs.IncExpr = IncExpr;
1849   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1850                            ? S.getCombinedInit()
1851                            : S.getInit();
1852   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1853                            ? S.getCombinedCond()
1854                            : S.getCond();
1855   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1856                              ? S.getCombinedNextLowerBound()
1857                              : S.getNextLowerBound();
1858   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1859                              ? S.getCombinedNextUpperBound()
1860                              : S.getNextUpperBound();
1861 
1862   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1863                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1864                    emitEmptyOrdered);
1865 }
1866 
1867 /// Emit a helper variable and return corresponding lvalue.
1868 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1869                                const DeclRefExpr *Helper) {
1870   auto VDecl = cast<VarDecl>(Helper->getDecl());
1871   CGF.EmitVarDecl(*VDecl);
1872   return CGF.EmitLValue(Helper);
1873 }
1874 
1875 static std::pair<LValue, LValue>
1876 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1877                                      const OMPExecutableDirective &S) {
1878   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1879   LValue LB =
1880       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1881   LValue UB =
1882       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1883 
1884   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1885   // parallel for') we need to use the 'distribute'
1886   // chunk lower and upper bounds rather than the whole loop iteration
1887   // space. These are parameters to the outlined function for 'parallel'
1888   // and we copy the bounds of the previous schedule into the
1889   // the current ones.
1890   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1891   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1892   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1893   PrevLBVal = CGF.EmitScalarConversion(
1894       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1895       LS.getIterationVariable()->getType(), SourceLocation());
1896   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1897   PrevUBVal = CGF.EmitScalarConversion(
1898       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1899       LS.getIterationVariable()->getType(), SourceLocation());
1900 
1901   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1902   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1903 
1904   return {LB, UB};
1905 }
1906 
1907 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1908 /// we need to use the LB and UB expressions generated by the worksharing
1909 /// code generation support, whereas in non combined situations we would
1910 /// just emit 0 and the LastIteration expression
1911 /// This function is necessary due to the difference of the LB and UB
1912 /// types for the RT emission routines for 'for_static_init' and
1913 /// 'for_dispatch_init'
1914 static std::pair<llvm::Value *, llvm::Value *>
1915 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1916                                         const OMPExecutableDirective &S,
1917                                         Address LB, Address UB) {
1918   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1919   const Expr *IVExpr = LS.getIterationVariable();
1920   // when implementing a dynamic schedule for a 'for' combined with a
1921   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1922   // is not normalized as each team only executes its own assigned
1923   // distribute chunk
1924   QualType IteratorTy = IVExpr->getType();
1925   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1926                                             SourceLocation());
1927   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1928                                             SourceLocation());
1929   return {LBVal, UBVal};
1930 }
1931 
1932 static void emitDistributeParallelForDistributeInnerBoundParams(
1933     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1934     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1935   const auto &Dir = cast<OMPLoopDirective>(S);
1936   LValue LB =
1937       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1938   auto LBCast = CGF.Builder.CreateIntCast(
1939       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1940   CapturedVars.push_back(LBCast);
1941   LValue UB =
1942       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1943 
1944   auto UBCast = CGF.Builder.CreateIntCast(
1945       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1946   CapturedVars.push_back(UBCast);
1947 }
1948 
1949 static void
1950 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
1951                                  const OMPLoopDirective &S,
1952                                  CodeGenFunction::JumpDest LoopExit) {
1953   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
1954                                          PrePostActionTy &) {
1955     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
1956                                emitDistributeParallelForInnerBounds,
1957                                emitDistributeParallelForDispatchBounds);
1958   };
1959 
1960   emitCommonOMPParallelDirective(
1961       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
1962       emitDistributeParallelForDistributeInnerBoundParams);
1963 }
1964 
1965 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1966     const OMPDistributeParallelForDirective &S) {
1967   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1968     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
1969                               S.getDistInc());
1970   };
1971   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1972   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
1973                                   /*HasCancel=*/false);
1974   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
1975                                               /*HasCancel=*/false);
1976 }
1977 
1978 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
1979     const OMPDistributeParallelForSimdDirective &S) {
1980   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1981   CGM.getOpenMPRuntime().emitInlinedDirective(
1982       *this, OMPD_distribute_parallel_for_simd,
1983       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1984         OMPLoopScope PreInitScope(CGF, S);
1985         CGF.EmitStmt(
1986             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1987       });
1988 }
1989 
1990 void CodeGenFunction::EmitOMPDistributeSimdDirective(
1991     const OMPDistributeSimdDirective &S) {
1992   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1993   CGM.getOpenMPRuntime().emitInlinedDirective(
1994       *this, OMPD_distribute_simd,
1995       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1996         OMPLoopScope PreInitScope(CGF, S);
1997         CGF.EmitStmt(
1998             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1999       });
2000 }
2001 
2002 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
2003     const OMPTargetParallelForSimdDirective &S) {
2004   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2005   CGM.getOpenMPRuntime().emitInlinedDirective(
2006       *this, OMPD_target_parallel_for_simd,
2007       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2008         OMPLoopScope PreInitScope(CGF, S);
2009         CGF.EmitStmt(
2010             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2011       });
2012 }
2013 
2014 void CodeGenFunction::EmitOMPTargetSimdDirective(
2015     const OMPTargetSimdDirective &S) {
2016   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2017   CGM.getOpenMPRuntime().emitInlinedDirective(
2018       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2019         OMPLoopScope PreInitScope(CGF, S);
2020         CGF.EmitStmt(
2021             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2022       });
2023 }
2024 
2025 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
2026     const OMPTeamsDistributeDirective &S) {
2027   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2028   CGM.getOpenMPRuntime().emitInlinedDirective(
2029       *this, OMPD_teams_distribute,
2030       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2031         OMPLoopScope PreInitScope(CGF, S);
2032         CGF.EmitStmt(
2033             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2034       });
2035 }
2036 
2037 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2038     const OMPTeamsDistributeSimdDirective &S) {
2039   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2040   CGM.getOpenMPRuntime().emitInlinedDirective(
2041       *this, OMPD_teams_distribute_simd,
2042       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2043         OMPLoopScope PreInitScope(CGF, S);
2044         CGF.EmitStmt(
2045             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2046       });
2047 }
2048 
2049 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2050     const OMPTeamsDistributeParallelForSimdDirective &S) {
2051   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2052   CGM.getOpenMPRuntime().emitInlinedDirective(
2053       *this, OMPD_teams_distribute_parallel_for_simd,
2054       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2055         OMPLoopScope PreInitScope(CGF, S);
2056         CGF.EmitStmt(
2057             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2058       });
2059 }
2060 
2061 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2062     const OMPTeamsDistributeParallelForDirective &S) {
2063   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2064   CGM.getOpenMPRuntime().emitInlinedDirective(
2065       *this, OMPD_teams_distribute_parallel_for,
2066       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2067         OMPLoopScope PreInitScope(CGF, S);
2068         CGF.EmitStmt(
2069             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2070       });
2071 }
2072 
2073 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2074     const OMPTargetTeamsDistributeDirective &S) {
2075   CGM.getOpenMPRuntime().emitInlinedDirective(
2076       *this, OMPD_target_teams_distribute,
2077       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2078         CGF.EmitStmt(
2079             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2080       });
2081 }
2082 
2083 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2084     const OMPTargetTeamsDistributeParallelForDirective &S) {
2085   CGM.getOpenMPRuntime().emitInlinedDirective(
2086       *this, OMPD_target_teams_distribute_parallel_for,
2087       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2088         CGF.EmitStmt(
2089             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2090       });
2091 }
2092 
2093 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2094     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2095   CGM.getOpenMPRuntime().emitInlinedDirective(
2096       *this, OMPD_target_teams_distribute_parallel_for_simd,
2097       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2098         CGF.EmitStmt(
2099             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2100       });
2101 }
2102 
2103 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2104     const OMPTargetTeamsDistributeSimdDirective &S) {
2105   CGM.getOpenMPRuntime().emitInlinedDirective(
2106       *this, OMPD_target_teams_distribute_simd,
2107       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2108         CGF.EmitStmt(
2109             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2110       });
2111 }
2112 
2113 namespace {
2114   struct ScheduleKindModifiersTy {
2115     OpenMPScheduleClauseKind Kind;
2116     OpenMPScheduleClauseModifier M1;
2117     OpenMPScheduleClauseModifier M2;
2118     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2119                             OpenMPScheduleClauseModifier M1,
2120                             OpenMPScheduleClauseModifier M2)
2121         : Kind(Kind), M1(M1), M2(M2) {}
2122   };
2123 } // namespace
2124 
2125 bool CodeGenFunction::EmitOMPWorksharingLoop(
2126     const OMPLoopDirective &S, Expr *EUB,
2127     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2128     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2129   // Emit the loop iteration variable.
2130   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2131   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2132   EmitVarDecl(*IVDecl);
2133 
2134   // Emit the iterations count variable.
2135   // If it is not a variable, Sema decided to calculate iterations count on each
2136   // iteration (e.g., it is foldable into a constant).
2137   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2138     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2139     // Emit calculation of the iterations count.
2140     EmitIgnoredExpr(S.getCalcLastIteration());
2141   }
2142 
2143   auto &RT = CGM.getOpenMPRuntime();
2144 
2145   bool HasLastprivateClause;
2146   // Check pre-condition.
2147   {
2148     OMPLoopScope PreInitScope(*this, S);
2149     // Skip the entire loop if we don't meet the precondition.
2150     // If the condition constant folds and can be elided, avoid emitting the
2151     // whole loop.
2152     bool CondConstant;
2153     llvm::BasicBlock *ContBlock = nullptr;
2154     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2155       if (!CondConstant)
2156         return false;
2157     } else {
2158       auto *ThenBlock = createBasicBlock("omp.precond.then");
2159       ContBlock = createBasicBlock("omp.precond.end");
2160       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2161                   getProfileCount(&S));
2162       EmitBlock(ThenBlock);
2163       incrementProfileCounter(&S);
2164     }
2165 
2166     bool Ordered = false;
2167     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2168       if (OrderedClause->getNumForLoops())
2169         RT.emitDoacrossInit(*this, S);
2170       else
2171         Ordered = true;
2172     }
2173 
2174     llvm::DenseSet<const Expr *> EmittedFinals;
2175     emitAlignedClause(*this, S);
2176     bool HasLinears = EmitOMPLinearClauseInit(S);
2177     // Emit helper vars inits.
2178 
2179     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2180     LValue LB = Bounds.first;
2181     LValue UB = Bounds.second;
2182     LValue ST =
2183         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2184     LValue IL =
2185         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2186 
2187     // Emit 'then' code.
2188     {
2189       OMPPrivateScope LoopScope(*this);
2190       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2191         // Emit implicit barrier to synchronize threads and avoid data races on
2192         // initialization of firstprivate variables and post-update of
2193         // lastprivate variables.
2194         CGM.getOpenMPRuntime().emitBarrierCall(
2195             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2196             /*ForceSimpleCall=*/true);
2197       }
2198       EmitOMPPrivateClause(S, LoopScope);
2199       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2200       EmitOMPReductionClauseInit(S, LoopScope);
2201       EmitOMPPrivateLoopCounters(S, LoopScope);
2202       EmitOMPLinearClause(S, LoopScope);
2203       (void)LoopScope.Privatize();
2204 
2205       // Detect the loop schedule kind and chunk.
2206       llvm::Value *Chunk = nullptr;
2207       OpenMPScheduleTy ScheduleKind;
2208       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2209         ScheduleKind.Schedule = C->getScheduleKind();
2210         ScheduleKind.M1 = C->getFirstScheduleModifier();
2211         ScheduleKind.M2 = C->getSecondScheduleModifier();
2212         if (const auto *Ch = C->getChunkSize()) {
2213           Chunk = EmitScalarExpr(Ch);
2214           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2215                                        S.getIterationVariable()->getType(),
2216                                        S.getLocStart());
2217         }
2218       }
2219       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2220       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2221       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2222       // If the static schedule kind is specified or if the ordered clause is
2223       // specified, and if no monotonic modifier is specified, the effect will
2224       // be as if the monotonic modifier was specified.
2225       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2226                                 /* Chunked */ Chunk != nullptr) &&
2227           !Ordered) {
2228         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2229           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2230         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2231         // When no chunk_size is specified, the iteration space is divided into
2232         // chunks that are approximately equal in size, and at most one chunk is
2233         // distributed to each thread. Note that the size of the chunks is
2234         // unspecified in this case.
2235         CGOpenMPRuntime::StaticRTInput StaticInit(
2236             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2237             UB.getAddress(), ST.getAddress());
2238         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2239                              ScheduleKind, StaticInit);
2240         auto LoopExit =
2241             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2242         // UB = min(UB, GlobalUB);
2243         EmitIgnoredExpr(S.getEnsureUpperBound());
2244         // IV = LB;
2245         EmitIgnoredExpr(S.getInit());
2246         // while (idx <= UB) { BODY; ++idx; }
2247         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2248                          S.getInc(),
2249                          [&S, LoopExit](CodeGenFunction &CGF) {
2250                            CGF.EmitOMPLoopBody(S, LoopExit);
2251                            CGF.EmitStopPoint(&S);
2252                          },
2253                          [](CodeGenFunction &) {});
2254         EmitBlock(LoopExit.getBlock());
2255         // Tell the runtime we are done.
2256         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2257           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2258         };
2259         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2260       } else {
2261         const bool IsMonotonic =
2262             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2263             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2264             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2265             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2266         // Emit the outer loop, which requests its work chunk [LB..UB] from
2267         // runtime and runs the inner loop to process it.
2268         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2269                                              ST.getAddress(), IL.getAddress(),
2270                                              Chunk, EUB);
2271         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2272                             LoopArguments, CGDispatchBounds);
2273       }
2274       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2275         EmitOMPSimdFinal(S,
2276                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2277                            return CGF.Builder.CreateIsNotNull(
2278                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2279                          });
2280       }
2281       EmitOMPReductionClauseFinal(
2282           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2283                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2284                  : /*Parallel only*/ OMPD_parallel);
2285       // Emit post-update of the reduction variables if IsLastIter != 0.
2286       emitPostUpdateForReductionClause(
2287           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2288             return CGF.Builder.CreateIsNotNull(
2289                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2290           });
2291       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2292       if (HasLastprivateClause)
2293         EmitOMPLastprivateClauseFinal(
2294             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2295             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2296     }
2297     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2298       return CGF.Builder.CreateIsNotNull(
2299           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2300     });
2301     // We're now done with the loop, so jump to the continuation block.
2302     if (ContBlock) {
2303       EmitBranch(ContBlock);
2304       EmitBlock(ContBlock, true);
2305     }
2306   }
2307   return HasLastprivateClause;
2308 }
2309 
2310 /// The following two functions generate expressions for the loop lower
2311 /// and upper bounds in case of static and dynamic (dispatch) schedule
2312 /// of the associated 'for' or 'distribute' loop.
2313 static std::pair<LValue, LValue>
2314 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2315   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2316   LValue LB =
2317       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2318   LValue UB =
2319       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2320   return {LB, UB};
2321 }
2322 
2323 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2324 /// consider the lower and upper bound expressions generated by the
2325 /// worksharing loop support, but we use 0 and the iteration space size as
2326 /// constants
2327 static std::pair<llvm::Value *, llvm::Value *>
2328 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2329                           Address LB, Address UB) {
2330   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2331   const Expr *IVExpr = LS.getIterationVariable();
2332   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2333   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2334   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2335   return {LBVal, UBVal};
2336 }
2337 
2338 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2339   bool HasLastprivates = false;
2340   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2341                                           PrePostActionTy &) {
2342     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2343     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2344                                                  emitForLoopBounds,
2345                                                  emitDispatchForLoopBounds);
2346   };
2347   {
2348     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2349     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2350                                                 S.hasCancel());
2351   }
2352 
2353   // Emit an implicit barrier at the end.
2354   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2355     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2356   }
2357 }
2358 
2359 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2360   bool HasLastprivates = false;
2361   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2362                                           PrePostActionTy &) {
2363     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2364                                                  emitForLoopBounds,
2365                                                  emitDispatchForLoopBounds);
2366   };
2367   {
2368     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2369     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2370   }
2371 
2372   // Emit an implicit barrier at the end.
2373   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2374     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2375   }
2376 }
2377 
2378 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2379                                 const Twine &Name,
2380                                 llvm::Value *Init = nullptr) {
2381   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2382   if (Init)
2383     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2384   return LVal;
2385 }
2386 
2387 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2388   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2389   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2390   bool HasLastprivates = false;
2391   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2392                                                     PrePostActionTy &) {
2393     auto &C = CGF.CGM.getContext();
2394     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2395     // Emit helper vars inits.
2396     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2397                                   CGF.Builder.getInt32(0));
2398     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2399                                       : CGF.Builder.getInt32(0);
2400     LValue UB =
2401         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2402     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2403                                   CGF.Builder.getInt32(1));
2404     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2405                                   CGF.Builder.getInt32(0));
2406     // Loop counter.
2407     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2408     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2409     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2410     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2411     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2412     // Generate condition for loop.
2413     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2414                         OK_Ordinary, S.getLocStart(), FPOptions());
2415     // Increment for loop counter.
2416     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2417                       S.getLocStart());
2418     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2419       // Iterate through all sections and emit a switch construct:
2420       // switch (IV) {
2421       //   case 0:
2422       //     <SectionStmt[0]>;
2423       //     break;
2424       // ...
2425       //   case <NumSection> - 1:
2426       //     <SectionStmt[<NumSection> - 1]>;
2427       //     break;
2428       // }
2429       // .omp.sections.exit:
2430       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2431       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2432           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2433           CS == nullptr ? 1 : CS->size());
2434       if (CS) {
2435         unsigned CaseNumber = 0;
2436         for (auto *SubStmt : CS->children()) {
2437           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2438           CGF.EmitBlock(CaseBB);
2439           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2440           CGF.EmitStmt(SubStmt);
2441           CGF.EmitBranch(ExitBB);
2442           ++CaseNumber;
2443         }
2444       } else {
2445         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2446         CGF.EmitBlock(CaseBB);
2447         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2448         CGF.EmitStmt(Stmt);
2449         CGF.EmitBranch(ExitBB);
2450       }
2451       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2452     };
2453 
2454     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2455     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2456       // Emit implicit barrier to synchronize threads and avoid data races on
2457       // initialization of firstprivate variables and post-update of lastprivate
2458       // variables.
2459       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2460           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2461           /*ForceSimpleCall=*/true);
2462     }
2463     CGF.EmitOMPPrivateClause(S, LoopScope);
2464     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2465     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2466     (void)LoopScope.Privatize();
2467 
2468     // Emit static non-chunked loop.
2469     OpenMPScheduleTy ScheduleKind;
2470     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2471     CGOpenMPRuntime::StaticRTInput StaticInit(
2472         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2473         LB.getAddress(), UB.getAddress(), ST.getAddress());
2474     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2475         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2476     // UB = min(UB, GlobalUB);
2477     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2478     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2479         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2480     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2481     // IV = LB;
2482     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2483     // while (idx <= UB) { BODY; ++idx; }
2484     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2485                          [](CodeGenFunction &) {});
2486     // Tell the runtime we are done.
2487     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2488       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2489     };
2490     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2491     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2492     // Emit post-update of the reduction variables if IsLastIter != 0.
2493     emitPostUpdateForReductionClause(
2494         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2495           return CGF.Builder.CreateIsNotNull(
2496               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2497         });
2498 
2499     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2500     if (HasLastprivates)
2501       CGF.EmitOMPLastprivateClauseFinal(
2502           S, /*NoFinals=*/false,
2503           CGF.Builder.CreateIsNotNull(
2504               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2505   };
2506 
2507   bool HasCancel = false;
2508   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2509     HasCancel = OSD->hasCancel();
2510   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2511     HasCancel = OPSD->hasCancel();
2512   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2513   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2514                                               HasCancel);
2515   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2516   // clause. Otherwise the barrier will be generated by the codegen for the
2517   // directive.
2518   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2519     // Emit implicit barrier to synchronize threads and avoid data races on
2520     // initialization of firstprivate variables.
2521     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2522                                            OMPD_unknown);
2523   }
2524 }
2525 
2526 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2527   {
2528     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2529     EmitSections(S);
2530   }
2531   // Emit an implicit barrier at the end.
2532   if (!S.getSingleClause<OMPNowaitClause>()) {
2533     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2534                                            OMPD_sections);
2535   }
2536 }
2537 
2538 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2539   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2540     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2541   };
2542   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2543   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2544                                               S.hasCancel());
2545 }
2546 
2547 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2548   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2549   llvm::SmallVector<const Expr *, 8> DestExprs;
2550   llvm::SmallVector<const Expr *, 8> SrcExprs;
2551   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2552   // Check if there are any 'copyprivate' clauses associated with this
2553   // 'single' construct.
2554   // Build a list of copyprivate variables along with helper expressions
2555   // (<source>, <destination>, <destination>=<source> expressions)
2556   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2557     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2558     DestExprs.append(C->destination_exprs().begin(),
2559                      C->destination_exprs().end());
2560     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2561     AssignmentOps.append(C->assignment_ops().begin(),
2562                          C->assignment_ops().end());
2563   }
2564   // Emit code for 'single' region along with 'copyprivate' clauses
2565   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2566     Action.Enter(CGF);
2567     OMPPrivateScope SingleScope(CGF);
2568     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2569     CGF.EmitOMPPrivateClause(S, SingleScope);
2570     (void)SingleScope.Privatize();
2571     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2572   };
2573   {
2574     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2575     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2576                                             CopyprivateVars, DestExprs,
2577                                             SrcExprs, AssignmentOps);
2578   }
2579   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2580   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2581   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2582     CGM.getOpenMPRuntime().emitBarrierCall(
2583         *this, S.getLocStart(),
2584         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2585   }
2586 }
2587 
2588 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2589   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2590     Action.Enter(CGF);
2591     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2592   };
2593   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2594   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2595 }
2596 
2597 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2598   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2599     Action.Enter(CGF);
2600     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2601   };
2602   Expr *Hint = nullptr;
2603   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2604     Hint = HintClause->getHint();
2605   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2606   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2607                                             S.getDirectiveName().getAsString(),
2608                                             CodeGen, S.getLocStart(), Hint);
2609 }
2610 
2611 void CodeGenFunction::EmitOMPParallelForDirective(
2612     const OMPParallelForDirective &S) {
2613   // Emit directive as a combined directive that consists of two implicit
2614   // directives: 'parallel' with 'for' directive.
2615   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2616     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2617     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2618                                emitDispatchForLoopBounds);
2619   };
2620   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2621                                  emitEmptyBoundParameters);
2622 }
2623 
2624 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2625     const OMPParallelForSimdDirective &S) {
2626   // Emit directive as a combined directive that consists of two implicit
2627   // directives: 'parallel' with 'for' directive.
2628   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2629     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2630                                emitDispatchForLoopBounds);
2631   };
2632   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2633                                  emitEmptyBoundParameters);
2634 }
2635 
2636 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2637     const OMPParallelSectionsDirective &S) {
2638   // Emit directive as a combined directive that consists of two implicit
2639   // directives: 'parallel' with 'sections' directive.
2640   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2641     CGF.EmitSections(S);
2642   };
2643   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2644                                  emitEmptyBoundParameters);
2645 }
2646 
2647 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2648                                                 const RegionCodeGenTy &BodyGen,
2649                                                 const TaskGenTy &TaskGen,
2650                                                 OMPTaskDataTy &Data) {
2651   // Emit outlined function for task construct.
2652   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2653   auto *I = CS->getCapturedDecl()->param_begin();
2654   auto *PartId = std::next(I);
2655   auto *TaskT = std::next(I, 4);
2656   // Check if the task is final
2657   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2658     // If the condition constant folds and can be elided, try to avoid emitting
2659     // the condition and the dead arm of the if/else.
2660     auto *Cond = Clause->getCondition();
2661     bool CondConstant;
2662     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2663       Data.Final.setInt(CondConstant);
2664     else
2665       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2666   } else {
2667     // By default the task is not final.
2668     Data.Final.setInt(/*IntVal=*/false);
2669   }
2670   // Check if the task has 'priority' clause.
2671   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2672     auto *Prio = Clause->getPriority();
2673     Data.Priority.setInt(/*IntVal=*/true);
2674     Data.Priority.setPointer(EmitScalarConversion(
2675         EmitScalarExpr(Prio), Prio->getType(),
2676         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2677         Prio->getExprLoc()));
2678   }
2679   // The first function argument for tasks is a thread id, the second one is a
2680   // part id (0 for tied tasks, >=0 for untied task).
2681   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2682   // Get list of private variables.
2683   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2684     auto IRef = C->varlist_begin();
2685     for (auto *IInit : C->private_copies()) {
2686       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2687       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2688         Data.PrivateVars.push_back(*IRef);
2689         Data.PrivateCopies.push_back(IInit);
2690       }
2691       ++IRef;
2692     }
2693   }
2694   EmittedAsPrivate.clear();
2695   // Get list of firstprivate variables.
2696   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2697     auto IRef = C->varlist_begin();
2698     auto IElemInitRef = C->inits().begin();
2699     for (auto *IInit : C->private_copies()) {
2700       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2701       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2702         Data.FirstprivateVars.push_back(*IRef);
2703         Data.FirstprivateCopies.push_back(IInit);
2704         Data.FirstprivateInits.push_back(*IElemInitRef);
2705       }
2706       ++IRef;
2707       ++IElemInitRef;
2708     }
2709   }
2710   // Get list of lastprivate variables (for taskloops).
2711   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2712   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2713     auto IRef = C->varlist_begin();
2714     auto ID = C->destination_exprs().begin();
2715     for (auto *IInit : C->private_copies()) {
2716       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2717       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2718         Data.LastprivateVars.push_back(*IRef);
2719         Data.LastprivateCopies.push_back(IInit);
2720       }
2721       LastprivateDstsOrigs.insert(
2722           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2723            cast<DeclRefExpr>(*IRef)});
2724       ++IRef;
2725       ++ID;
2726     }
2727   }
2728   SmallVector<const Expr *, 4> LHSs;
2729   SmallVector<const Expr *, 4> RHSs;
2730   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2731     auto IPriv = C->privates().begin();
2732     auto IRed = C->reduction_ops().begin();
2733     auto ILHS = C->lhs_exprs().begin();
2734     auto IRHS = C->rhs_exprs().begin();
2735     for (const auto *Ref : C->varlists()) {
2736       Data.ReductionVars.emplace_back(Ref);
2737       Data.ReductionCopies.emplace_back(*IPriv);
2738       Data.ReductionOps.emplace_back(*IRed);
2739       LHSs.emplace_back(*ILHS);
2740       RHSs.emplace_back(*IRHS);
2741       std::advance(IPriv, 1);
2742       std::advance(IRed, 1);
2743       std::advance(ILHS, 1);
2744       std::advance(IRHS, 1);
2745     }
2746   }
2747   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2748       *this, S.getLocStart(), LHSs, RHSs, Data);
2749   // Build list of dependences.
2750   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2751     for (auto *IRef : C->varlists())
2752       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2753   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2754       CodeGenFunction &CGF, PrePostActionTy &Action) {
2755     // Set proper addresses for generated private copies.
2756     OMPPrivateScope Scope(CGF);
2757     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2758         !Data.LastprivateVars.empty()) {
2759       enum { PrivatesParam = 2, CopyFnParam = 3 };
2760       auto *CopyFn = CGF.Builder.CreateLoad(
2761           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2762       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2763           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2764       // Map privates.
2765       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2766       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2767       CallArgs.push_back(PrivatesPtr);
2768       for (auto *E : Data.PrivateVars) {
2769         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2770         Address PrivatePtr = CGF.CreateMemTemp(
2771             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2772         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2773         CallArgs.push_back(PrivatePtr.getPointer());
2774       }
2775       for (auto *E : Data.FirstprivateVars) {
2776         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2777         Address PrivatePtr =
2778             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2779                               ".firstpriv.ptr.addr");
2780         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2781         CallArgs.push_back(PrivatePtr.getPointer());
2782       }
2783       for (auto *E : Data.LastprivateVars) {
2784         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2785         Address PrivatePtr =
2786             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2787                               ".lastpriv.ptr.addr");
2788         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2789         CallArgs.push_back(PrivatePtr.getPointer());
2790       }
2791       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2792                                                           CopyFn, CallArgs);
2793       for (auto &&Pair : LastprivateDstsOrigs) {
2794         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2795         DeclRefExpr DRE(
2796             const_cast<VarDecl *>(OrigVD),
2797             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2798                 OrigVD) != nullptr,
2799             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2800         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2801           return CGF.EmitLValue(&DRE).getAddress();
2802         });
2803       }
2804       for (auto &&Pair : PrivatePtrs) {
2805         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2806                             CGF.getContext().getDeclAlign(Pair.first));
2807         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2808       }
2809     }
2810     if (Data.Reductions) {
2811       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2812       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2813                              Data.ReductionOps);
2814       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2815           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2816       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2817         RedCG.emitSharedLValue(CGF, Cnt);
2818         RedCG.emitAggregateType(CGF, Cnt);
2819         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2820             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2821         Replacement =
2822             Address(CGF.EmitScalarConversion(
2823                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2824                         CGF.getContext().getPointerType(
2825                             Data.ReductionCopies[Cnt]->getType()),
2826                         SourceLocation()),
2827                     Replacement.getAlignment());
2828         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2829         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2830                          [Replacement]() { return Replacement; });
2831         // FIXME: This must removed once the runtime library is fixed.
2832         // Emit required threadprivate variables for
2833         // initilizer/combiner/finalizer.
2834         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2835                                                            RedCG, Cnt);
2836       }
2837     }
2838     // Privatize all private variables except for in_reduction items.
2839     (void)Scope.Privatize();
2840     SmallVector<const Expr *, 4> InRedVars;
2841     SmallVector<const Expr *, 4> InRedPrivs;
2842     SmallVector<const Expr *, 4> InRedOps;
2843     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2844     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2845       auto IPriv = C->privates().begin();
2846       auto IRed = C->reduction_ops().begin();
2847       auto ITD = C->taskgroup_descriptors().begin();
2848       for (const auto *Ref : C->varlists()) {
2849         InRedVars.emplace_back(Ref);
2850         InRedPrivs.emplace_back(*IPriv);
2851         InRedOps.emplace_back(*IRed);
2852         TaskgroupDescriptors.emplace_back(*ITD);
2853         std::advance(IPriv, 1);
2854         std::advance(IRed, 1);
2855         std::advance(ITD, 1);
2856       }
2857     }
2858     // Privatize in_reduction items here, because taskgroup descriptors must be
2859     // privatized earlier.
2860     OMPPrivateScope InRedScope(CGF);
2861     if (!InRedVars.empty()) {
2862       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2863       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2864         RedCG.emitSharedLValue(CGF, Cnt);
2865         RedCG.emitAggregateType(CGF, Cnt);
2866         // The taskgroup descriptor variable is always implicit firstprivate and
2867         // privatized already during procoessing of the firstprivates.
2868         llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
2869             CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
2870         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2871             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2872         Replacement = Address(
2873             CGF.EmitScalarConversion(
2874                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2875                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2876                 SourceLocation()),
2877             Replacement.getAlignment());
2878         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2879         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2880                               [Replacement]() { return Replacement; });
2881         // FIXME: This must removed once the runtime library is fixed.
2882         // Emit required threadprivate variables for
2883         // initilizer/combiner/finalizer.
2884         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2885                                                            RedCG, Cnt);
2886       }
2887     }
2888     (void)InRedScope.Privatize();
2889 
2890     Action.Enter(CGF);
2891     BodyGen(CGF);
2892   };
2893   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2894       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2895       Data.NumberOfParts);
2896   OMPLexicalScope Scope(*this, S);
2897   TaskGen(*this, OutlinedFn, Data);
2898 }
2899 
2900 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2901   // Emit outlined function for task construct.
2902   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2903   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2904   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2905   const Expr *IfCond = nullptr;
2906   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2907     if (C->getNameModifier() == OMPD_unknown ||
2908         C->getNameModifier() == OMPD_task) {
2909       IfCond = C->getCondition();
2910       break;
2911     }
2912   }
2913 
2914   OMPTaskDataTy Data;
2915   // Check if we should emit tied or untied task.
2916   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2917   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2918     CGF.EmitStmt(CS->getCapturedStmt());
2919   };
2920   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2921                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2922                             const OMPTaskDataTy &Data) {
2923     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2924                                             SharedsTy, CapturedStruct, IfCond,
2925                                             Data);
2926   };
2927   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2928 }
2929 
2930 void CodeGenFunction::EmitOMPTaskyieldDirective(
2931     const OMPTaskyieldDirective &S) {
2932   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2933 }
2934 
2935 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2936   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2937 }
2938 
2939 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2940   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2941 }
2942 
2943 void CodeGenFunction::EmitOMPTaskgroupDirective(
2944     const OMPTaskgroupDirective &S) {
2945   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2946     Action.Enter(CGF);
2947     if (const Expr *E = S.getReductionRef()) {
2948       SmallVector<const Expr *, 4> LHSs;
2949       SmallVector<const Expr *, 4> RHSs;
2950       OMPTaskDataTy Data;
2951       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
2952         auto IPriv = C->privates().begin();
2953         auto IRed = C->reduction_ops().begin();
2954         auto ILHS = C->lhs_exprs().begin();
2955         auto IRHS = C->rhs_exprs().begin();
2956         for (const auto *Ref : C->varlists()) {
2957           Data.ReductionVars.emplace_back(Ref);
2958           Data.ReductionCopies.emplace_back(*IPriv);
2959           Data.ReductionOps.emplace_back(*IRed);
2960           LHSs.emplace_back(*ILHS);
2961           RHSs.emplace_back(*IRHS);
2962           std::advance(IPriv, 1);
2963           std::advance(IRed, 1);
2964           std::advance(ILHS, 1);
2965           std::advance(IRHS, 1);
2966         }
2967       }
2968       llvm::Value *ReductionDesc =
2969           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
2970                                                            LHSs, RHSs, Data);
2971       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2972       CGF.EmitVarDecl(*VD);
2973       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
2974                             /*Volatile=*/false, E->getType());
2975     }
2976     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2977   };
2978   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2979   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2980 }
2981 
2982 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2983   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2984     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2985       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2986                                 FlushClause->varlist_end());
2987     }
2988     return llvm::None;
2989   }(), S.getLocStart());
2990 }
2991 
2992 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
2993                                             const CodeGenLoopTy &CodeGenLoop,
2994                                             Expr *IncExpr) {
2995   // Emit the loop iteration variable.
2996   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2997   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2998   EmitVarDecl(*IVDecl);
2999 
3000   // Emit the iterations count variable.
3001   // If it is not a variable, Sema decided to calculate iterations count on each
3002   // iteration (e.g., it is foldable into a constant).
3003   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3004     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3005     // Emit calculation of the iterations count.
3006     EmitIgnoredExpr(S.getCalcLastIteration());
3007   }
3008 
3009   auto &RT = CGM.getOpenMPRuntime();
3010 
3011   bool HasLastprivateClause = false;
3012   // Check pre-condition.
3013   {
3014     OMPLoopScope PreInitScope(*this, S);
3015     // Skip the entire loop if we don't meet the precondition.
3016     // If the condition constant folds and can be elided, avoid emitting the
3017     // whole loop.
3018     bool CondConstant;
3019     llvm::BasicBlock *ContBlock = nullptr;
3020     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3021       if (!CondConstant)
3022         return;
3023     } else {
3024       auto *ThenBlock = createBasicBlock("omp.precond.then");
3025       ContBlock = createBasicBlock("omp.precond.end");
3026       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3027                   getProfileCount(&S));
3028       EmitBlock(ThenBlock);
3029       incrementProfileCounter(&S);
3030     }
3031 
3032     // Emit 'then' code.
3033     {
3034       // Emit helper vars inits.
3035 
3036       LValue LB = EmitOMPHelperVar(
3037           *this, cast<DeclRefExpr>(
3038                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3039                           ? S.getCombinedLowerBoundVariable()
3040                           : S.getLowerBoundVariable())));
3041       LValue UB = EmitOMPHelperVar(
3042           *this, cast<DeclRefExpr>(
3043                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3044                           ? S.getCombinedUpperBoundVariable()
3045                           : S.getUpperBoundVariable())));
3046       LValue ST =
3047           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3048       LValue IL =
3049           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3050 
3051       OMPPrivateScope LoopScope(*this);
3052       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3053         // Emit implicit barrier to synchronize threads and avoid data races on
3054         // initialization of firstprivate variables and post-update of
3055         // lastprivate variables.
3056         CGM.getOpenMPRuntime().emitBarrierCall(
3057           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3058           /*ForceSimpleCall=*/true);
3059       }
3060       EmitOMPPrivateClause(S, LoopScope);
3061       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3062       EmitOMPPrivateLoopCounters(S, LoopScope);
3063       (void)LoopScope.Privatize();
3064 
3065       // Detect the distribute schedule kind and chunk.
3066       llvm::Value *Chunk = nullptr;
3067       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3068       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3069         ScheduleKind = C->getDistScheduleKind();
3070         if (const auto *Ch = C->getChunkSize()) {
3071           Chunk = EmitScalarExpr(Ch);
3072           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3073           S.getIterationVariable()->getType(),
3074           S.getLocStart());
3075         }
3076       }
3077       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3078       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3079 
3080       // OpenMP [2.10.8, distribute Construct, Description]
3081       // If dist_schedule is specified, kind must be static. If specified,
3082       // iterations are divided into chunks of size chunk_size, chunks are
3083       // assigned to the teams of the league in a round-robin fashion in the
3084       // order of the team number. When no chunk_size is specified, the
3085       // iteration space is divided into chunks that are approximately equal
3086       // in size, and at most one chunk is distributed to each team of the
3087       // league. The size of the chunks is unspecified in this case.
3088       if (RT.isStaticNonchunked(ScheduleKind,
3089                                 /* Chunked */ Chunk != nullptr)) {
3090         CGOpenMPRuntime::StaticRTInput StaticInit(
3091             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3092             LB.getAddress(), UB.getAddress(), ST.getAddress());
3093         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3094                                     StaticInit);
3095         auto LoopExit =
3096             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3097         // UB = min(UB, GlobalUB);
3098         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3099                             ? S.getCombinedEnsureUpperBound()
3100                             : S.getEnsureUpperBound());
3101         // IV = LB;
3102         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3103                             ? S.getCombinedInit()
3104                             : S.getInit());
3105 
3106         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3107                          ? S.getCombinedCond()
3108                          : S.getCond();
3109 
3110         // for distribute alone,  codegen
3111         // while (idx <= UB) { BODY; ++idx; }
3112         // when combined with 'for' (e.g. as in 'distribute parallel for')
3113         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3114         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3115                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3116                            CodeGenLoop(CGF, S, LoopExit);
3117                          },
3118                          [](CodeGenFunction &) {});
3119         EmitBlock(LoopExit.getBlock());
3120         // Tell the runtime we are done.
3121         RT.emitForStaticFinish(*this, S.getLocStart());
3122       } else {
3123         // Emit the outer loop, which requests its work chunk [LB..UB] from
3124         // runtime and runs the inner loop to process it.
3125         const OMPLoopArguments LoopArguments = {
3126             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3127             Chunk};
3128         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3129                                    CodeGenLoop);
3130       }
3131 
3132       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3133       if (HasLastprivateClause)
3134         EmitOMPLastprivateClauseFinal(
3135             S, /*NoFinals=*/false,
3136             Builder.CreateIsNotNull(
3137                 EmitLoadOfScalar(IL, S.getLocStart())));
3138     }
3139 
3140     // We're now done with the loop, so jump to the continuation block.
3141     if (ContBlock) {
3142       EmitBranch(ContBlock);
3143       EmitBlock(ContBlock, true);
3144     }
3145   }
3146 }
3147 
3148 void CodeGenFunction::EmitOMPDistributeDirective(
3149     const OMPDistributeDirective &S) {
3150   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3151 
3152     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3153   };
3154   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3155   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3156                                               false);
3157 }
3158 
3159 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3160                                                    const CapturedStmt *S) {
3161   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3162   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3163   CGF.CapturedStmtInfo = &CapStmtInfo;
3164   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3165   Fn->addFnAttr(llvm::Attribute::NoInline);
3166   return Fn;
3167 }
3168 
3169 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3170   if (!S.getAssociatedStmt()) {
3171     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3172       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3173     return;
3174   }
3175   auto *C = S.getSingleClause<OMPSIMDClause>();
3176   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3177                                  PrePostActionTy &Action) {
3178     if (C) {
3179       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3180       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3181       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3182       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3183       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3184                                                       OutlinedFn, CapturedVars);
3185     } else {
3186       Action.Enter(CGF);
3187       CGF.EmitStmt(
3188           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3189     }
3190   };
3191   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3192   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3193 }
3194 
3195 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3196                                          QualType SrcType, QualType DestType,
3197                                          SourceLocation Loc) {
3198   assert(CGF.hasScalarEvaluationKind(DestType) &&
3199          "DestType must have scalar evaluation kind.");
3200   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3201   return Val.isScalar()
3202              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3203                                         Loc)
3204              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3205                                                  DestType, Loc);
3206 }
3207 
3208 static CodeGenFunction::ComplexPairTy
3209 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3210                       QualType DestType, SourceLocation Loc) {
3211   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3212          "DestType must have complex evaluation kind.");
3213   CodeGenFunction::ComplexPairTy ComplexVal;
3214   if (Val.isScalar()) {
3215     // Convert the input element to the element type of the complex.
3216     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3217     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3218                                               DestElementType, Loc);
3219     ComplexVal = CodeGenFunction::ComplexPairTy(
3220         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3221   } else {
3222     assert(Val.isComplex() && "Must be a scalar or complex.");
3223     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3224     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3225     ComplexVal.first = CGF.EmitScalarConversion(
3226         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3227     ComplexVal.second = CGF.EmitScalarConversion(
3228         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3229   }
3230   return ComplexVal;
3231 }
3232 
3233 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3234                                   LValue LVal, RValue RVal) {
3235   if (LVal.isGlobalReg()) {
3236     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3237   } else {
3238     CGF.EmitAtomicStore(RVal, LVal,
3239                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3240                                  : llvm::AtomicOrdering::Monotonic,
3241                         LVal.isVolatile(), /*IsInit=*/false);
3242   }
3243 }
3244 
3245 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3246                                          QualType RValTy, SourceLocation Loc) {
3247   switch (getEvaluationKind(LVal.getType())) {
3248   case TEK_Scalar:
3249     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3250                                *this, RVal, RValTy, LVal.getType(), Loc)),
3251                            LVal);
3252     break;
3253   case TEK_Complex:
3254     EmitStoreOfComplex(
3255         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3256         /*isInit=*/false);
3257     break;
3258   case TEK_Aggregate:
3259     llvm_unreachable("Must be a scalar or complex.");
3260   }
3261 }
3262 
3263 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3264                                   const Expr *X, const Expr *V,
3265                                   SourceLocation Loc) {
3266   // v = x;
3267   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3268   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3269   LValue XLValue = CGF.EmitLValue(X);
3270   LValue VLValue = CGF.EmitLValue(V);
3271   RValue Res = XLValue.isGlobalReg()
3272                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3273                    : CGF.EmitAtomicLoad(
3274                          XLValue, Loc,
3275                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3276                                   : llvm::AtomicOrdering::Monotonic,
3277                          XLValue.isVolatile());
3278   // OpenMP, 2.12.6, atomic Construct
3279   // Any atomic construct with a seq_cst clause forces the atomically
3280   // performed operation to include an implicit flush operation without a
3281   // list.
3282   if (IsSeqCst)
3283     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3284   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3285 }
3286 
3287 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3288                                    const Expr *X, const Expr *E,
3289                                    SourceLocation Loc) {
3290   // x = expr;
3291   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3292   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3293   // OpenMP, 2.12.6, atomic Construct
3294   // Any atomic construct with a seq_cst clause forces the atomically
3295   // performed operation to include an implicit flush operation without a
3296   // list.
3297   if (IsSeqCst)
3298     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3299 }
3300 
3301 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3302                                                 RValue Update,
3303                                                 BinaryOperatorKind BO,
3304                                                 llvm::AtomicOrdering AO,
3305                                                 bool IsXLHSInRHSPart) {
3306   auto &Context = CGF.CGM.getContext();
3307   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3308   // expression is simple and atomic is allowed for the given type for the
3309   // target platform.
3310   if (BO == BO_Comma || !Update.isScalar() ||
3311       !Update.getScalarVal()->getType()->isIntegerTy() ||
3312       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3313                         (Update.getScalarVal()->getType() !=
3314                          X.getAddress().getElementType())) ||
3315       !X.getAddress().getElementType()->isIntegerTy() ||
3316       !Context.getTargetInfo().hasBuiltinAtomic(
3317           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3318     return std::make_pair(false, RValue::get(nullptr));
3319 
3320   llvm::AtomicRMWInst::BinOp RMWOp;
3321   switch (BO) {
3322   case BO_Add:
3323     RMWOp = llvm::AtomicRMWInst::Add;
3324     break;
3325   case BO_Sub:
3326     if (!IsXLHSInRHSPart)
3327       return std::make_pair(false, RValue::get(nullptr));
3328     RMWOp = llvm::AtomicRMWInst::Sub;
3329     break;
3330   case BO_And:
3331     RMWOp = llvm::AtomicRMWInst::And;
3332     break;
3333   case BO_Or:
3334     RMWOp = llvm::AtomicRMWInst::Or;
3335     break;
3336   case BO_Xor:
3337     RMWOp = llvm::AtomicRMWInst::Xor;
3338     break;
3339   case BO_LT:
3340     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3341                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3342                                    : llvm::AtomicRMWInst::Max)
3343                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3344                                    : llvm::AtomicRMWInst::UMax);
3345     break;
3346   case BO_GT:
3347     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3348                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3349                                    : llvm::AtomicRMWInst::Min)
3350                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3351                                    : llvm::AtomicRMWInst::UMin);
3352     break;
3353   case BO_Assign:
3354     RMWOp = llvm::AtomicRMWInst::Xchg;
3355     break;
3356   case BO_Mul:
3357   case BO_Div:
3358   case BO_Rem:
3359   case BO_Shl:
3360   case BO_Shr:
3361   case BO_LAnd:
3362   case BO_LOr:
3363     return std::make_pair(false, RValue::get(nullptr));
3364   case BO_PtrMemD:
3365   case BO_PtrMemI:
3366   case BO_LE:
3367   case BO_GE:
3368   case BO_EQ:
3369   case BO_NE:
3370   case BO_AddAssign:
3371   case BO_SubAssign:
3372   case BO_AndAssign:
3373   case BO_OrAssign:
3374   case BO_XorAssign:
3375   case BO_MulAssign:
3376   case BO_DivAssign:
3377   case BO_RemAssign:
3378   case BO_ShlAssign:
3379   case BO_ShrAssign:
3380   case BO_Comma:
3381     llvm_unreachable("Unsupported atomic update operation");
3382   }
3383   auto *UpdateVal = Update.getScalarVal();
3384   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3385     UpdateVal = CGF.Builder.CreateIntCast(
3386         IC, X.getAddress().getElementType(),
3387         X.getType()->hasSignedIntegerRepresentation());
3388   }
3389   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3390   return std::make_pair(true, RValue::get(Res));
3391 }
3392 
3393 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3394     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3395     llvm::AtomicOrdering AO, SourceLocation Loc,
3396     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3397   // Update expressions are allowed to have the following forms:
3398   // x binop= expr; -> xrval + expr;
3399   // x++, ++x -> xrval + 1;
3400   // x--, --x -> xrval - 1;
3401   // x = x binop expr; -> xrval binop expr
3402   // x = expr Op x; - > expr binop xrval;
3403   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3404   if (!Res.first) {
3405     if (X.isGlobalReg()) {
3406       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3407       // 'xrval'.
3408       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3409     } else {
3410       // Perform compare-and-swap procedure.
3411       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3412     }
3413   }
3414   return Res;
3415 }
3416 
3417 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3418                                     const Expr *X, const Expr *E,
3419                                     const Expr *UE, bool IsXLHSInRHSPart,
3420                                     SourceLocation Loc) {
3421   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3422          "Update expr in 'atomic update' must be a binary operator.");
3423   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3424   // Update expressions are allowed to have the following forms:
3425   // x binop= expr; -> xrval + expr;
3426   // x++, ++x -> xrval + 1;
3427   // x--, --x -> xrval - 1;
3428   // x = x binop expr; -> xrval binop expr
3429   // x = expr Op x; - > expr binop xrval;
3430   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3431   LValue XLValue = CGF.EmitLValue(X);
3432   RValue ExprRValue = CGF.EmitAnyExpr(E);
3433   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3434                      : llvm::AtomicOrdering::Monotonic;
3435   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3436   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3437   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3438   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3439   auto Gen =
3440       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3441         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3442         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3443         return CGF.EmitAnyExpr(UE);
3444       };
3445   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3446       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3447   // OpenMP, 2.12.6, atomic Construct
3448   // Any atomic construct with a seq_cst clause forces the atomically
3449   // performed operation to include an implicit flush operation without a
3450   // list.
3451   if (IsSeqCst)
3452     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3453 }
3454 
3455 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3456                             QualType SourceType, QualType ResType,
3457                             SourceLocation Loc) {
3458   switch (CGF.getEvaluationKind(ResType)) {
3459   case TEK_Scalar:
3460     return RValue::get(
3461         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3462   case TEK_Complex: {
3463     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3464     return RValue::getComplex(Res.first, Res.second);
3465   }
3466   case TEK_Aggregate:
3467     break;
3468   }
3469   llvm_unreachable("Must be a scalar or complex.");
3470 }
3471 
3472 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3473                                      bool IsPostfixUpdate, const Expr *V,
3474                                      const Expr *X, const Expr *E,
3475                                      const Expr *UE, bool IsXLHSInRHSPart,
3476                                      SourceLocation Loc) {
3477   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3478   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3479   RValue NewVVal;
3480   LValue VLValue = CGF.EmitLValue(V);
3481   LValue XLValue = CGF.EmitLValue(X);
3482   RValue ExprRValue = CGF.EmitAnyExpr(E);
3483   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3484                      : llvm::AtomicOrdering::Monotonic;
3485   QualType NewVValType;
3486   if (UE) {
3487     // 'x' is updated with some additional value.
3488     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3489            "Update expr in 'atomic capture' must be a binary operator.");
3490     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3491     // Update expressions are allowed to have the following forms:
3492     // x binop= expr; -> xrval + expr;
3493     // x++, ++x -> xrval + 1;
3494     // x--, --x -> xrval - 1;
3495     // x = x binop expr; -> xrval binop expr
3496     // x = expr Op x; - > expr binop xrval;
3497     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3498     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3499     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3500     NewVValType = XRValExpr->getType();
3501     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3502     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3503                   IsPostfixUpdate](RValue XRValue) -> RValue {
3504       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3505       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3506       RValue Res = CGF.EmitAnyExpr(UE);
3507       NewVVal = IsPostfixUpdate ? XRValue : Res;
3508       return Res;
3509     };
3510     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3511         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3512     if (Res.first) {
3513       // 'atomicrmw' instruction was generated.
3514       if (IsPostfixUpdate) {
3515         // Use old value from 'atomicrmw'.
3516         NewVVal = Res.second;
3517       } else {
3518         // 'atomicrmw' does not provide new value, so evaluate it using old
3519         // value of 'x'.
3520         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3521         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3522         NewVVal = CGF.EmitAnyExpr(UE);
3523       }
3524     }
3525   } else {
3526     // 'x' is simply rewritten with some 'expr'.
3527     NewVValType = X->getType().getNonReferenceType();
3528     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3529                                X->getType().getNonReferenceType(), Loc);
3530     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3531       NewVVal = XRValue;
3532       return ExprRValue;
3533     };
3534     // Try to perform atomicrmw xchg, otherwise simple exchange.
3535     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3536         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3537         Loc, Gen);
3538     if (Res.first) {
3539       // 'atomicrmw' instruction was generated.
3540       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3541     }
3542   }
3543   // Emit post-update store to 'v' of old/new 'x' value.
3544   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3545   // OpenMP, 2.12.6, atomic Construct
3546   // Any atomic construct with a seq_cst clause forces the atomically
3547   // performed operation to include an implicit flush operation without a
3548   // list.
3549   if (IsSeqCst)
3550     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3551 }
3552 
3553 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3554                               bool IsSeqCst, bool IsPostfixUpdate,
3555                               const Expr *X, const Expr *V, const Expr *E,
3556                               const Expr *UE, bool IsXLHSInRHSPart,
3557                               SourceLocation Loc) {
3558   switch (Kind) {
3559   case OMPC_read:
3560     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3561     break;
3562   case OMPC_write:
3563     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3564     break;
3565   case OMPC_unknown:
3566   case OMPC_update:
3567     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3568     break;
3569   case OMPC_capture:
3570     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3571                              IsXLHSInRHSPart, Loc);
3572     break;
3573   case OMPC_if:
3574   case OMPC_final:
3575   case OMPC_num_threads:
3576   case OMPC_private:
3577   case OMPC_firstprivate:
3578   case OMPC_lastprivate:
3579   case OMPC_reduction:
3580   case OMPC_task_reduction:
3581   case OMPC_in_reduction:
3582   case OMPC_safelen:
3583   case OMPC_simdlen:
3584   case OMPC_collapse:
3585   case OMPC_default:
3586   case OMPC_seq_cst:
3587   case OMPC_shared:
3588   case OMPC_linear:
3589   case OMPC_aligned:
3590   case OMPC_copyin:
3591   case OMPC_copyprivate:
3592   case OMPC_flush:
3593   case OMPC_proc_bind:
3594   case OMPC_schedule:
3595   case OMPC_ordered:
3596   case OMPC_nowait:
3597   case OMPC_untied:
3598   case OMPC_threadprivate:
3599   case OMPC_depend:
3600   case OMPC_mergeable:
3601   case OMPC_device:
3602   case OMPC_threads:
3603   case OMPC_simd:
3604   case OMPC_map:
3605   case OMPC_num_teams:
3606   case OMPC_thread_limit:
3607   case OMPC_priority:
3608   case OMPC_grainsize:
3609   case OMPC_nogroup:
3610   case OMPC_num_tasks:
3611   case OMPC_hint:
3612   case OMPC_dist_schedule:
3613   case OMPC_defaultmap:
3614   case OMPC_uniform:
3615   case OMPC_to:
3616   case OMPC_from:
3617   case OMPC_use_device_ptr:
3618   case OMPC_is_device_ptr:
3619     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3620   }
3621 }
3622 
3623 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3624   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3625   OpenMPClauseKind Kind = OMPC_unknown;
3626   for (auto *C : S.clauses()) {
3627     // Find first clause (skip seq_cst clause, if it is first).
3628     if (C->getClauseKind() != OMPC_seq_cst) {
3629       Kind = C->getClauseKind();
3630       break;
3631     }
3632   }
3633 
3634   const auto *CS =
3635       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3636   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3637     enterFullExpression(EWC);
3638   }
3639   // Processing for statements under 'atomic capture'.
3640   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3641     for (const auto *C : Compound->body()) {
3642       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3643         enterFullExpression(EWC);
3644       }
3645     }
3646   }
3647 
3648   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3649                                             PrePostActionTy &) {
3650     CGF.EmitStopPoint(CS);
3651     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3652                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3653                       S.isXLHSInRHSPart(), S.getLocStart());
3654   };
3655   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3656   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3657 }
3658 
3659 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3660                                          const OMPExecutableDirective &S,
3661                                          const RegionCodeGenTy &CodeGen) {
3662   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3663   CodeGenModule &CGM = CGF.CGM;
3664   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3665 
3666   llvm::Function *Fn = nullptr;
3667   llvm::Constant *FnID = nullptr;
3668 
3669   const Expr *IfCond = nullptr;
3670   // Check for the at most one if clause associated with the target region.
3671   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3672     if (C->getNameModifier() == OMPD_unknown ||
3673         C->getNameModifier() == OMPD_target) {
3674       IfCond = C->getCondition();
3675       break;
3676     }
3677   }
3678 
3679   // Check if we have any device clause associated with the directive.
3680   const Expr *Device = nullptr;
3681   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3682     Device = C->getDevice();
3683   }
3684 
3685   // Check if we have an if clause whose conditional always evaluates to false
3686   // or if we do not have any targets specified. If so the target region is not
3687   // an offload entry point.
3688   bool IsOffloadEntry = true;
3689   if (IfCond) {
3690     bool Val;
3691     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3692       IsOffloadEntry = false;
3693   }
3694   if (CGM.getLangOpts().OMPTargetTriples.empty())
3695     IsOffloadEntry = false;
3696 
3697   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3698   StringRef ParentName;
3699   // In case we have Ctors/Dtors we use the complete type variant to produce
3700   // the mangling of the device outlined kernel.
3701   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3702     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3703   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3704     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3705   else
3706     ParentName =
3707         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3708 
3709   // Emit target region as a standalone region.
3710   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3711                                                     IsOffloadEntry, CodeGen);
3712   OMPLexicalScope Scope(CGF, S);
3713   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3714   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3715   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3716                                         CapturedVars);
3717 }
3718 
3719 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3720                              PrePostActionTy &Action) {
3721   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3722   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3723   CGF.EmitOMPPrivateClause(S, PrivateScope);
3724   (void)PrivateScope.Privatize();
3725 
3726   Action.Enter(CGF);
3727   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3728 }
3729 
3730 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3731                                                   StringRef ParentName,
3732                                                   const OMPTargetDirective &S) {
3733   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3734     emitTargetRegion(CGF, S, Action);
3735   };
3736   llvm::Function *Fn;
3737   llvm::Constant *Addr;
3738   // Emit target region as a standalone region.
3739   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3740       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3741   assert(Fn && Addr && "Target device function emission failed.");
3742 }
3743 
3744 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3745   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3746     emitTargetRegion(CGF, S, Action);
3747   };
3748   emitCommonOMPTargetDirective(*this, S, CodeGen);
3749 }
3750 
3751 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3752                                         const OMPExecutableDirective &S,
3753                                         OpenMPDirectiveKind InnermostKind,
3754                                         const RegionCodeGenTy &CodeGen) {
3755   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3756   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3757       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3758 
3759   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3760   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3761   if (NT || TL) {
3762     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3763     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3764 
3765     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3766                                                   S.getLocStart());
3767   }
3768 
3769   OMPTeamsScope Scope(CGF, S);
3770   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3771   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3772   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3773                                            CapturedVars);
3774 }
3775 
3776 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3777   // Emit teams region as a standalone region.
3778   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3779     OMPPrivateScope PrivateScope(CGF);
3780     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3781     CGF.EmitOMPPrivateClause(S, PrivateScope);
3782     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3783     (void)PrivateScope.Privatize();
3784     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3785     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3786   };
3787   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3788   emitPostUpdateForReductionClause(
3789       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3790 }
3791 
3792 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3793                                   const OMPTargetTeamsDirective &S) {
3794   auto *CS = S.getCapturedStmt(OMPD_teams);
3795   Action.Enter(CGF);
3796   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3797     // TODO: Add support for clauses.
3798     CGF.EmitStmt(CS->getCapturedStmt());
3799   };
3800   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3801 }
3802 
3803 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3804     CodeGenModule &CGM, StringRef ParentName,
3805     const OMPTargetTeamsDirective &S) {
3806   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3807     emitTargetTeamsRegion(CGF, Action, S);
3808   };
3809   llvm::Function *Fn;
3810   llvm::Constant *Addr;
3811   // Emit target region as a standalone region.
3812   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3813       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3814   assert(Fn && Addr && "Target device function emission failed.");
3815 }
3816 
3817 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3818     const OMPTargetTeamsDirective &S) {
3819   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3820     emitTargetTeamsRegion(CGF, Action, S);
3821   };
3822   emitCommonOMPTargetDirective(*this, S, CodeGen);
3823 }
3824 
3825 void CodeGenFunction::EmitOMPCancellationPointDirective(
3826     const OMPCancellationPointDirective &S) {
3827   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3828                                                    S.getCancelRegion());
3829 }
3830 
3831 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3832   const Expr *IfCond = nullptr;
3833   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3834     if (C->getNameModifier() == OMPD_unknown ||
3835         C->getNameModifier() == OMPD_cancel) {
3836       IfCond = C->getCondition();
3837       break;
3838     }
3839   }
3840   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3841                                         S.getCancelRegion());
3842 }
3843 
3844 CodeGenFunction::JumpDest
3845 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3846   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3847       Kind == OMPD_target_parallel)
3848     return ReturnBlock;
3849   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3850          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3851          Kind == OMPD_distribute_parallel_for ||
3852          Kind == OMPD_target_parallel_for);
3853   return OMPCancelStack.getExitBlock();
3854 }
3855 
3856 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3857     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3858     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3859   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3860   auto OrigVarIt = C.varlist_begin();
3861   auto InitIt = C.inits().begin();
3862   for (auto PvtVarIt : C.private_copies()) {
3863     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3864     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3865     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3866 
3867     // In order to identify the right initializer we need to match the
3868     // declaration used by the mapping logic. In some cases we may get
3869     // OMPCapturedExprDecl that refers to the original declaration.
3870     const ValueDecl *MatchingVD = OrigVD;
3871     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3872       // OMPCapturedExprDecl are used to privative fields of the current
3873       // structure.
3874       auto *ME = cast<MemberExpr>(OED->getInit());
3875       assert(isa<CXXThisExpr>(ME->getBase()) &&
3876              "Base should be the current struct!");
3877       MatchingVD = ME->getMemberDecl();
3878     }
3879 
3880     // If we don't have information about the current list item, move on to
3881     // the next one.
3882     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3883     if (InitAddrIt == CaptureDeviceAddrMap.end())
3884       continue;
3885 
3886     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3887       // Initialize the temporary initialization variable with the address we
3888       // get from the runtime library. We have to cast the source address
3889       // because it is always a void *. References are materialized in the
3890       // privatization scope, so the initialization here disregards the fact
3891       // the original variable is a reference.
3892       QualType AddrQTy =
3893           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3894       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3895       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3896       setAddrOfLocalVar(InitVD, InitAddr);
3897 
3898       // Emit private declaration, it will be initialized by the value we
3899       // declaration we just added to the local declarations map.
3900       EmitDecl(*PvtVD);
3901 
3902       // The initialization variables reached its purpose in the emission
3903       // ofthe previous declaration, so we don't need it anymore.
3904       LocalDeclMap.erase(InitVD);
3905 
3906       // Return the address of the private variable.
3907       return GetAddrOfLocalVar(PvtVD);
3908     });
3909     assert(IsRegistered && "firstprivate var already registered as private");
3910     // Silence the warning about unused variable.
3911     (void)IsRegistered;
3912 
3913     ++OrigVarIt;
3914     ++InitIt;
3915   }
3916 }
3917 
3918 // Generate the instructions for '#pragma omp target data' directive.
3919 void CodeGenFunction::EmitOMPTargetDataDirective(
3920     const OMPTargetDataDirective &S) {
3921   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3922 
3923   // Create a pre/post action to signal the privatization of the device pointer.
3924   // This action can be replaced by the OpenMP runtime code generation to
3925   // deactivate privatization.
3926   bool PrivatizeDevicePointers = false;
3927   class DevicePointerPrivActionTy : public PrePostActionTy {
3928     bool &PrivatizeDevicePointers;
3929 
3930   public:
3931     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3932         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3933     void Enter(CodeGenFunction &CGF) override {
3934       PrivatizeDevicePointers = true;
3935     }
3936   };
3937   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3938 
3939   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3940       CodeGenFunction &CGF, PrePostActionTy &Action) {
3941     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3942       CGF.EmitStmt(
3943           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3944     };
3945 
3946     // Codegen that selects wheather to generate the privatization code or not.
3947     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3948                           &InnermostCodeGen](CodeGenFunction &CGF,
3949                                              PrePostActionTy &Action) {
3950       RegionCodeGenTy RCG(InnermostCodeGen);
3951       PrivatizeDevicePointers = false;
3952 
3953       // Call the pre-action to change the status of PrivatizeDevicePointers if
3954       // needed.
3955       Action.Enter(CGF);
3956 
3957       if (PrivatizeDevicePointers) {
3958         OMPPrivateScope PrivateScope(CGF);
3959         // Emit all instances of the use_device_ptr clause.
3960         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
3961           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
3962                                         Info.CaptureDeviceAddrMap);
3963         (void)PrivateScope.Privatize();
3964         RCG(CGF);
3965       } else
3966         RCG(CGF);
3967     };
3968 
3969     // Forward the provided action to the privatization codegen.
3970     RegionCodeGenTy PrivRCG(PrivCodeGen);
3971     PrivRCG.setAction(Action);
3972 
3973     // Notwithstanding the body of the region is emitted as inlined directive,
3974     // we don't use an inline scope as changes in the references inside the
3975     // region are expected to be visible outside, so we do not privative them.
3976     OMPLexicalScope Scope(CGF, S);
3977     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
3978                                                     PrivRCG);
3979   };
3980 
3981   RegionCodeGenTy RCG(CodeGen);
3982 
3983   // If we don't have target devices, don't bother emitting the data mapping
3984   // code.
3985   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
3986     RCG(*this);
3987     return;
3988   }
3989 
3990   // Check if we have any if clause associated with the directive.
3991   const Expr *IfCond = nullptr;
3992   if (auto *C = S.getSingleClause<OMPIfClause>())
3993     IfCond = C->getCondition();
3994 
3995   // Check if we have any device clause associated with the directive.
3996   const Expr *Device = nullptr;
3997   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3998     Device = C->getDevice();
3999 
4000   // Set the action to signal privatization of device pointers.
4001   RCG.setAction(PrivAction);
4002 
4003   // Emit region code.
4004   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4005                                              Info);
4006 }
4007 
4008 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4009     const OMPTargetEnterDataDirective &S) {
4010   // If we don't have target devices, don't bother emitting the data mapping
4011   // code.
4012   if (CGM.getLangOpts().OMPTargetTriples.empty())
4013     return;
4014 
4015   // Check if we have any if clause associated with the directive.
4016   const Expr *IfCond = nullptr;
4017   if (auto *C = S.getSingleClause<OMPIfClause>())
4018     IfCond = C->getCondition();
4019 
4020   // Check if we have any device clause associated with the directive.
4021   const Expr *Device = nullptr;
4022   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4023     Device = C->getDevice();
4024 
4025   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4026 }
4027 
4028 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4029     const OMPTargetExitDataDirective &S) {
4030   // If we don't have target devices, don't bother emitting the data mapping
4031   // code.
4032   if (CGM.getLangOpts().OMPTargetTriples.empty())
4033     return;
4034 
4035   // Check if we have any if clause associated with the directive.
4036   const Expr *IfCond = nullptr;
4037   if (auto *C = S.getSingleClause<OMPIfClause>())
4038     IfCond = C->getCondition();
4039 
4040   // Check if we have any device clause associated with the directive.
4041   const Expr *Device = nullptr;
4042   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4043     Device = C->getDevice();
4044 
4045   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4046 }
4047 
4048 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4049                                      const OMPTargetParallelDirective &S,
4050                                      PrePostActionTy &Action) {
4051   // Get the captured statement associated with the 'parallel' region.
4052   auto *CS = S.getCapturedStmt(OMPD_parallel);
4053   Action.Enter(CGF);
4054   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4055     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4056     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4057     CGF.EmitOMPPrivateClause(S, PrivateScope);
4058     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4059     (void)PrivateScope.Privatize();
4060     // TODO: Add support for clauses.
4061     CGF.EmitStmt(CS->getCapturedStmt());
4062     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4063   };
4064   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4065                                  emitEmptyBoundParameters);
4066   emitPostUpdateForReductionClause(
4067       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4068 }
4069 
4070 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4071     CodeGenModule &CGM, StringRef ParentName,
4072     const OMPTargetParallelDirective &S) {
4073   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4074     emitTargetParallelRegion(CGF, S, Action);
4075   };
4076   llvm::Function *Fn;
4077   llvm::Constant *Addr;
4078   // Emit target region as a standalone region.
4079   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4080       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4081   assert(Fn && Addr && "Target device function emission failed.");
4082 }
4083 
4084 void CodeGenFunction::EmitOMPTargetParallelDirective(
4085     const OMPTargetParallelDirective &S) {
4086   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4087     emitTargetParallelRegion(CGF, S, Action);
4088   };
4089   emitCommonOMPTargetDirective(*this, S, CodeGen);
4090 }
4091 
4092 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4093     const OMPTargetParallelForDirective &S) {
4094   // TODO: codegen for target parallel for.
4095 }
4096 
4097 /// Emit a helper variable and return corresponding lvalue.
4098 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4099                      const ImplicitParamDecl *PVD,
4100                      CodeGenFunction::OMPPrivateScope &Privates) {
4101   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4102   Privates.addPrivate(
4103       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4104 }
4105 
4106 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4107   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4108   // Emit outlined function for task construct.
4109   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4110   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4111   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4112   const Expr *IfCond = nullptr;
4113   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4114     if (C->getNameModifier() == OMPD_unknown ||
4115         C->getNameModifier() == OMPD_taskloop) {
4116       IfCond = C->getCondition();
4117       break;
4118     }
4119   }
4120 
4121   OMPTaskDataTy Data;
4122   // Check if taskloop must be emitted without taskgroup.
4123   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4124   // TODO: Check if we should emit tied or untied task.
4125   Data.Tied = true;
4126   // Set scheduling for taskloop
4127   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4128     // grainsize clause
4129     Data.Schedule.setInt(/*IntVal=*/false);
4130     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4131   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4132     // num_tasks clause
4133     Data.Schedule.setInt(/*IntVal=*/true);
4134     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4135   }
4136 
4137   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4138     // if (PreCond) {
4139     //   for (IV in 0..LastIteration) BODY;
4140     //   <Final counter/linear vars updates>;
4141     // }
4142     //
4143 
4144     // Emit: if (PreCond) - begin.
4145     // If the condition constant folds and can be elided, avoid emitting the
4146     // whole loop.
4147     bool CondConstant;
4148     llvm::BasicBlock *ContBlock = nullptr;
4149     OMPLoopScope PreInitScope(CGF, S);
4150     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4151       if (!CondConstant)
4152         return;
4153     } else {
4154       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4155       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4156       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4157                   CGF.getProfileCount(&S));
4158       CGF.EmitBlock(ThenBlock);
4159       CGF.incrementProfileCounter(&S);
4160     }
4161 
4162     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4163       CGF.EmitOMPSimdInit(S);
4164 
4165     OMPPrivateScope LoopScope(CGF);
4166     // Emit helper vars inits.
4167     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4168     auto *I = CS->getCapturedDecl()->param_begin();
4169     auto *LBP = std::next(I, LowerBound);
4170     auto *UBP = std::next(I, UpperBound);
4171     auto *STP = std::next(I, Stride);
4172     auto *LIP = std::next(I, LastIter);
4173     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4174              LoopScope);
4175     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4176              LoopScope);
4177     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4178     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4179              LoopScope);
4180     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4181     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4182     (void)LoopScope.Privatize();
4183     // Emit the loop iteration variable.
4184     const Expr *IVExpr = S.getIterationVariable();
4185     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4186     CGF.EmitVarDecl(*IVDecl);
4187     CGF.EmitIgnoredExpr(S.getInit());
4188 
4189     // Emit the iterations count variable.
4190     // If it is not a variable, Sema decided to calculate iterations count on
4191     // each iteration (e.g., it is foldable into a constant).
4192     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4193       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4194       // Emit calculation of the iterations count.
4195       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4196     }
4197 
4198     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4199                          S.getInc(),
4200                          [&S](CodeGenFunction &CGF) {
4201                            CGF.EmitOMPLoopBody(S, JumpDest());
4202                            CGF.EmitStopPoint(&S);
4203                          },
4204                          [](CodeGenFunction &) {});
4205     // Emit: if (PreCond) - end.
4206     if (ContBlock) {
4207       CGF.EmitBranch(ContBlock);
4208       CGF.EmitBlock(ContBlock, true);
4209     }
4210     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4211     if (HasLastprivateClause) {
4212       CGF.EmitOMPLastprivateClauseFinal(
4213           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4214           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4215               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4216               (*LIP)->getType(), S.getLocStart())));
4217     }
4218   };
4219   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4220                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4221                             const OMPTaskDataTy &Data) {
4222     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4223       OMPLoopScope PreInitScope(CGF, S);
4224       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4225                                                   OutlinedFn, SharedsTy,
4226                                                   CapturedStruct, IfCond, Data);
4227     };
4228     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4229                                                     CodeGen);
4230   };
4231   if (Data.Nogroup)
4232     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4233   else {
4234     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4235         *this,
4236         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4237                                         PrePostActionTy &Action) {
4238           Action.Enter(CGF);
4239           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4240         },
4241         S.getLocStart());
4242   }
4243 }
4244 
4245 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4246   EmitOMPTaskLoopBasedDirective(S);
4247 }
4248 
4249 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4250     const OMPTaskLoopSimdDirective &S) {
4251   EmitOMPTaskLoopBasedDirective(S);
4252 }
4253 
4254 // Generate the instructions for '#pragma omp target update' directive.
4255 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4256     const OMPTargetUpdateDirective &S) {
4257   // If we don't have target devices, don't bother emitting the data mapping
4258   // code.
4259   if (CGM.getLangOpts().OMPTargetTriples.empty())
4260     return;
4261 
4262   // Check if we have any if clause associated with the directive.
4263   const Expr *IfCond = nullptr;
4264   if (auto *C = S.getSingleClause<OMPIfClause>())
4265     IfCond = C->getCondition();
4266 
4267   // Check if we have any device clause associated with the directive.
4268   const Expr *Device = nullptr;
4269   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4270     Device = C->getDevice();
4271 
4272   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4273 }
4274