1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(
57       CodeGenFunction &CGF, const OMPExecutableDirective &S,
58       const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
59       const bool EmitPreInitStmt = true)
60       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
61         InlinedShareds(CGF) {
62     if (EmitPreInitStmt)
63       emitPreInitStmt(CGF, S);
64     if (!CapturedRegion.hasValue())
65       return;
66     assert(S.hasAssociatedStmt() &&
67            "Expected associated statement for inlined directive.");
68     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
69     for (auto &C : CS->captures()) {
70       if (C.capturesVariable() || C.capturesVariableByCopy()) {
71         auto *VD = C.getCapturedVar();
72         assert(VD == VD->getCanonicalDecl() &&
73                "Canonical decl must be captured.");
74         DeclRefExpr DRE(
75             const_cast<VarDecl *>(VD),
76             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
77                                        InlinedShareds.isGlobalVarCaptured(VD)),
78             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
79         InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
80           return CGF.EmitLValue(&DRE).getAddress();
81         });
82       }
83     }
84     (void)InlinedShareds.Privatize();
85   }
86 };
87 
88 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
89 /// for captured expressions.
90 class OMPParallelScope final : public OMPLexicalScope {
91   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
92     OpenMPDirectiveKind Kind = S.getDirectiveKind();
93     return !(isOpenMPTargetExecutionDirective(Kind) ||
94              isOpenMPLoopBoundSharingDirective(Kind)) &&
95            isOpenMPParallelDirective(Kind);
96   }
97 
98 public:
99   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
100       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
101                         EmitPreInitStmt(S)) {}
102 };
103 
104 /// Lexical scope for OpenMP teams construct, that handles correct codegen
105 /// for captured expressions.
106 class OMPTeamsScope final : public OMPLexicalScope {
107   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
108     OpenMPDirectiveKind Kind = S.getDirectiveKind();
109     return !isOpenMPTargetExecutionDirective(Kind) &&
110            isOpenMPTeamsDirective(Kind);
111   }
112 
113 public:
114   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
115       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
116                         EmitPreInitStmt(S)) {}
117 };
118 
119 /// Private scope for OpenMP loop-based directives, that supports capturing
120 /// of used expression from loop statement.
121 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
122   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
123     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
124     for (auto *E : S.counters()) {
125       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
126       (void)PreCondScope.addPrivate(VD, [&CGF, VD]() {
127         return CGF.CreateMemTemp(VD->getType().getNonReferenceType());
128       });
129     }
130     (void)PreCondScope.Privatize();
131     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
132       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
133         for (const auto *I : PreInits->decls())
134           CGF.EmitVarDecl(cast<VarDecl>(*I));
135       }
136     }
137   }
138 
139 public:
140   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
141       : CodeGenFunction::RunCleanupsScope(CGF) {
142     emitPreInitStmt(CGF, S);
143   }
144 };
145 
146 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
147   CodeGenFunction::OMPPrivateScope InlinedShareds;
148 
149   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
150     return CGF.LambdaCaptureFields.lookup(VD) ||
151            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
152            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
153             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
154   }
155 
156 public:
157   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
158       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
159         InlinedShareds(CGF) {
160     for (const auto *C : S.clauses()) {
161       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
162         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
163           for (const auto *I : PreInit->decls()) {
164             if (!I->hasAttr<OMPCaptureNoInitAttr>())
165               CGF.EmitVarDecl(cast<VarDecl>(*I));
166             else {
167               CodeGenFunction::AutoVarEmission Emission =
168                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
169               CGF.EmitAutoVarCleanups(Emission);
170             }
171           }
172         }
173       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
174         for (const Expr *E : UDP->varlists()) {
175           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
176           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
177             CGF.EmitVarDecl(*OED);
178         }
179       }
180     }
181     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
182       CGF.EmitOMPPrivateClause(S, InlinedShareds);
183     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
184       if (const Expr *E = TG->getReductionRef())
185         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
186     }
187     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
188     while (CS) {
189       for (auto &C : CS->captures()) {
190         if (C.capturesVariable() || C.capturesVariableByCopy()) {
191           auto *VD = C.getCapturedVar();
192           assert(VD == VD->getCanonicalDecl() &&
193                  "Canonical decl must be captured.");
194           DeclRefExpr DRE(const_cast<VarDecl *>(VD),
195                           isCapturedVar(CGF, VD) ||
196                               (CGF.CapturedStmtInfo &&
197                                InlinedShareds.isGlobalVarCaptured(VD)),
198                           VD->getType().getNonReferenceType(), VK_LValue,
199                           C.getLocation());
200           InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
201             return CGF.EmitLValue(&DRE).getAddress();
202           });
203         }
204       }
205       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
206     }
207     (void)InlinedShareds.Privatize();
208   }
209 };
210 
211 } // namespace
212 
213 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
214                                          const OMPExecutableDirective &S,
215                                          const RegionCodeGenTy &CodeGen);
216 
217 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
218   if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
219     if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
220       OrigVD = OrigVD->getCanonicalDecl();
221       bool IsCaptured =
222           LambdaCaptureFields.lookup(OrigVD) ||
223           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
224           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
225       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
226                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
227       return EmitLValue(&DRE);
228     }
229   }
230   return EmitLValue(E);
231 }
232 
233 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
234   auto &C = getContext();
235   llvm::Value *Size = nullptr;
236   auto SizeInChars = C.getTypeSizeInChars(Ty);
237   if (SizeInChars.isZero()) {
238     // getTypeSizeInChars() returns 0 for a VLA.
239     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
240       llvm::Value *ArraySize;
241       std::tie(ArraySize, Ty) = getVLASize(VAT);
242       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
243     }
244     SizeInChars = C.getTypeSizeInChars(Ty);
245     if (SizeInChars.isZero())
246       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
247     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
248   } else
249     Size = CGM.getSize(SizeInChars);
250   return Size;
251 }
252 
253 void CodeGenFunction::GenerateOpenMPCapturedVars(
254     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
255   const RecordDecl *RD = S.getCapturedRecordDecl();
256   auto CurField = RD->field_begin();
257   auto CurCap = S.captures().begin();
258   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
259                                                  E = S.capture_init_end();
260        I != E; ++I, ++CurField, ++CurCap) {
261     if (CurField->hasCapturedVLAType()) {
262       auto VAT = CurField->getCapturedVLAType();
263       auto *Val = VLASizeMap[VAT->getSizeExpr()];
264       CapturedVars.push_back(Val);
265     } else if (CurCap->capturesThis())
266       CapturedVars.push_back(CXXThisValue);
267     else if (CurCap->capturesVariableByCopy()) {
268       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
269 
270       // If the field is not a pointer, we need to save the actual value
271       // and load it as a void pointer.
272       if (!CurField->getType()->isAnyPointerType()) {
273         auto &Ctx = getContext();
274         auto DstAddr = CreateMemTemp(
275             Ctx.getUIntPtrType(),
276             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
277         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
278 
279         auto *SrcAddrVal = EmitScalarConversion(
280             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
281             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
282         LValue SrcLV =
283             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
284 
285         // Store the value using the source type pointer.
286         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
287 
288         // Load the value using the destination type pointer.
289         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
290       }
291       CapturedVars.push_back(CV);
292     } else {
293       assert(CurCap->capturesVariable() && "Expected capture by reference.");
294       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
295     }
296   }
297 }
298 
299 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
300                                     QualType DstType, StringRef Name,
301                                     LValue AddrLV,
302                                     bool isReferenceType = false) {
303   ASTContext &Ctx = CGF.getContext();
304 
305   auto *CastedPtr = CGF.EmitScalarConversion(AddrLV.getAddress().getPointer(),
306                                              Ctx.getUIntPtrType(),
307                                              Ctx.getPointerType(DstType), Loc);
308   auto TmpAddr =
309       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
310           .getAddress();
311 
312   // If we are dealing with references we need to return the address of the
313   // reference instead of the reference of the value.
314   if (isReferenceType) {
315     QualType RefType = Ctx.getLValueReferenceType(DstType);
316     auto *RefVal = TmpAddr.getPointer();
317     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
318     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
319     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
320   }
321 
322   return TmpAddr;
323 }
324 
325 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
326   if (T->isLValueReferenceType()) {
327     return C.getLValueReferenceType(
328         getCanonicalParamType(C, T.getNonReferenceType()),
329         /*SpelledAsLValue=*/false);
330   }
331   if (T->isPointerType())
332     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
333   if (auto *A = T->getAsArrayTypeUnsafe()) {
334     if (auto *VLA = dyn_cast<VariableArrayType>(A))
335       return getCanonicalParamType(C, VLA->getElementType());
336     else if (!A->isVariablyModifiedType())
337       return C.getCanonicalType(T);
338   }
339   return C.getCanonicalParamType(T);
340 }
341 
342 namespace {
343   /// Contains required data for proper outlined function codegen.
344   struct FunctionOptions {
345     /// Captured statement for which the function is generated.
346     const CapturedStmt *S = nullptr;
347     /// true if cast to/from  UIntPtr is required for variables captured by
348     /// value.
349     const bool UIntPtrCastRequired = true;
350     /// true if only casted arguments must be registered as local args or VLA
351     /// sizes.
352     const bool RegisterCastedArgsOnly = false;
353     /// Name of the generated function.
354     const StringRef FunctionName;
355     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
356                              bool RegisterCastedArgsOnly,
357                              StringRef FunctionName)
358         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
359           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
360           FunctionName(FunctionName) {}
361   };
362 }
363 
364 static llvm::Function *emitOutlinedFunctionPrologue(
365     CodeGenFunction &CGF, FunctionArgList &Args,
366     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
367         &LocalAddrs,
368     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
369         &VLASizes,
370     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
371   const CapturedDecl *CD = FO.S->getCapturedDecl();
372   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
373   assert(CD->hasBody() && "missing CapturedDecl body");
374 
375   CXXThisValue = nullptr;
376   // Build the argument list.
377   CodeGenModule &CGM = CGF.CGM;
378   ASTContext &Ctx = CGM.getContext();
379   FunctionArgList TargetArgs;
380   Args.append(CD->param_begin(),
381               std::next(CD->param_begin(), CD->getContextParamPosition()));
382   TargetArgs.append(
383       CD->param_begin(),
384       std::next(CD->param_begin(), CD->getContextParamPosition()));
385   auto I = FO.S->captures().begin();
386   FunctionDecl *DebugFunctionDecl = nullptr;
387   if (!FO.UIntPtrCastRequired) {
388     FunctionProtoType::ExtProtoInfo EPI;
389     DebugFunctionDecl = FunctionDecl::Create(
390         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
391         SourceLocation(), DeclarationName(), Ctx.VoidTy,
392         Ctx.getTrivialTypeSourceInfo(
393             Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
394         SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
395   }
396   for (auto *FD : RD->fields()) {
397     QualType ArgType = FD->getType();
398     IdentifierInfo *II = nullptr;
399     VarDecl *CapVar = nullptr;
400 
401     // If this is a capture by copy and the type is not a pointer, the outlined
402     // function argument type should be uintptr and the value properly casted to
403     // uintptr. This is necessary given that the runtime library is only able to
404     // deal with pointers. We can pass in the same way the VLA type sizes to the
405     // outlined function.
406     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
407         I->capturesVariableArrayType()) {
408       if (FO.UIntPtrCastRequired)
409         ArgType = Ctx.getUIntPtrType();
410     }
411 
412     if (I->capturesVariable() || I->capturesVariableByCopy()) {
413       CapVar = I->getCapturedVar();
414       II = CapVar->getIdentifier();
415     } else if (I->capturesThis())
416       II = &Ctx.Idents.get("this");
417     else {
418       assert(I->capturesVariableArrayType());
419       II = &Ctx.Idents.get("vla");
420     }
421     if (ArgType->isVariablyModifiedType())
422       ArgType = getCanonicalParamType(Ctx, ArgType);
423     VarDecl *Arg;
424     if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
425       Arg = ParmVarDecl::Create(
426           Ctx, DebugFunctionDecl,
427           CapVar ? CapVar->getLocStart() : FD->getLocStart(),
428           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
429           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
430     } else {
431       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
432                                       II, ArgType, ImplicitParamDecl::Other);
433     }
434     Args.emplace_back(Arg);
435     // Do not cast arguments if we emit function with non-original types.
436     TargetArgs.emplace_back(
437         FO.UIntPtrCastRequired
438             ? Arg
439             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
440     ++I;
441   }
442   Args.append(
443       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
444       CD->param_end());
445   TargetArgs.append(
446       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
447       CD->param_end());
448 
449   // Create the function declaration.
450   const CGFunctionInfo &FuncInfo =
451       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
452   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
453 
454   llvm::Function *F =
455       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
456                              FO.FunctionName, &CGM.getModule());
457   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
458   if (CD->isNothrow())
459     F->setDoesNotThrow();
460 
461   // Generate the function.
462   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
463                     FO.S->getLocStart(), CD->getBody()->getLocStart());
464   unsigned Cnt = CD->getContextParamPosition();
465   I = FO.S->captures().begin();
466   for (auto *FD : RD->fields()) {
467     // Do not map arguments if we emit function with non-original types.
468     Address LocalAddr(Address::invalid());
469     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
470       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
471                                                              TargetArgs[Cnt]);
472     } else {
473       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
474     }
475     // If we are capturing a pointer by copy we don't need to do anything, just
476     // use the value that we get from the arguments.
477     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
478       const VarDecl *CurVD = I->getCapturedVar();
479       // If the variable is a reference we need to materialize it here.
480       if (CurVD->getType()->isReferenceType()) {
481         Address RefAddr = CGF.CreateMemTemp(
482             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
483         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
484                               /*Volatile=*/false, CurVD->getType());
485         LocalAddr = RefAddr;
486       }
487       if (!FO.RegisterCastedArgsOnly)
488         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
489       ++Cnt;
490       ++I;
491       continue;
492     }
493 
494     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
495                                         AlignmentSource::Decl);
496     if (FD->hasCapturedVLAType()) {
497       if (FO.UIntPtrCastRequired) {
498         ArgLVal = CGF.MakeAddrLValue(
499             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
500                                  Args[Cnt]->getName(), ArgLVal),
501             FD->getType(), AlignmentSource::Decl);
502       }
503       auto *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
504       auto VAT = FD->getCapturedVLAType();
505       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
506     } else if (I->capturesVariable()) {
507       auto *Var = I->getCapturedVar();
508       QualType VarTy = Var->getType();
509       Address ArgAddr = ArgLVal.getAddress();
510       if (!VarTy->isReferenceType()) {
511         if (ArgLVal.getType()->isLValueReferenceType()) {
512           ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
513         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
514           assert(ArgLVal.getType()->isPointerType());
515           ArgAddr = CGF.EmitLoadOfPointer(
516               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
517         }
518       }
519       if (!FO.RegisterCastedArgsOnly) {
520         LocalAddrs.insert(
521             {Args[Cnt],
522              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
523       }
524     } else if (I->capturesVariableByCopy()) {
525       assert(!FD->getType()->isAnyPointerType() &&
526              "Not expecting a captured pointer.");
527       auto *Var = I->getCapturedVar();
528       QualType VarTy = Var->getType();
529       LocalAddrs.insert(
530           {Args[Cnt],
531            {Var, FO.UIntPtrCastRequired
532                      ? castValueFromUintptr(CGF, I->getLocation(),
533                                             FD->getType(), Args[Cnt]->getName(),
534                                             ArgLVal, VarTy->isReferenceType())
535                      : ArgLVal.getAddress()}});
536     } else {
537       // If 'this' is captured, load it into CXXThisValue.
538       assert(I->capturesThis());
539       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
540       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
541     }
542     ++Cnt;
543     ++I;
544   }
545 
546   return F;
547 }
548 
549 llvm::Function *
550 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
551   assert(
552       CapturedStmtInfo &&
553       "CapturedStmtInfo should be set when generating the captured function");
554   const CapturedDecl *CD = S.getCapturedDecl();
555   // Build the argument list.
556   bool NeedWrapperFunction =
557       getDebugInfo() &&
558       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
559   FunctionArgList Args;
560   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
561   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
562   SmallString<256> Buffer;
563   llvm::raw_svector_ostream Out(Buffer);
564   Out << CapturedStmtInfo->getHelperName();
565   if (NeedWrapperFunction)
566     Out << "_debug__";
567   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
568                      Out.str());
569   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
570                                                    VLASizes, CXXThisValue, FO);
571   for (const auto &LocalAddrPair : LocalAddrs) {
572     if (LocalAddrPair.second.first) {
573       setAddrOfLocalVar(LocalAddrPair.second.first,
574                         LocalAddrPair.second.second);
575     }
576   }
577   for (const auto &VLASizePair : VLASizes)
578     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
579   PGO.assignRegionCounters(GlobalDecl(CD), F);
580   CapturedStmtInfo->EmitBody(*this, CD->getBody());
581   FinishFunction(CD->getBodyRBrace());
582   if (!NeedWrapperFunction)
583     return F;
584 
585   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
586                             /*RegisterCastedArgsOnly=*/true,
587                             CapturedStmtInfo->getHelperName());
588   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
589   Args.clear();
590   LocalAddrs.clear();
591   VLASizes.clear();
592   llvm::Function *WrapperF =
593       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
594                                    WrapperCGF.CXXThisValue, WrapperFO);
595   llvm::SmallVector<llvm::Value *, 4> CallArgs;
596   for (const auto *Arg : Args) {
597     llvm::Value *CallArg;
598     auto I = LocalAddrs.find(Arg);
599     if (I != LocalAddrs.end()) {
600       LValue LV = WrapperCGF.MakeAddrLValue(
601           I->second.second,
602           I->second.first ? I->second.first->getType() : Arg->getType(),
603           AlignmentSource::Decl);
604       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
605     } else {
606       auto EI = VLASizes.find(Arg);
607       if (EI != VLASizes.end())
608         CallArg = EI->second.second;
609       else {
610         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
611                                               Arg->getType(),
612                                               AlignmentSource::Decl);
613         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
614       }
615     }
616     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
617   }
618   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
619                                                   F, CallArgs);
620   WrapperCGF.FinishFunction();
621   return WrapperF;
622 }
623 
624 //===----------------------------------------------------------------------===//
625 //                              OpenMP Directive Emission
626 //===----------------------------------------------------------------------===//
627 void CodeGenFunction::EmitOMPAggregateAssign(
628     Address DestAddr, Address SrcAddr, QualType OriginalType,
629     const llvm::function_ref<void(Address, Address)> &CopyGen) {
630   // Perform element-by-element initialization.
631   QualType ElementTy;
632 
633   // Drill down to the base element type on both arrays.
634   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
635   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
636   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
637 
638   auto SrcBegin = SrcAddr.getPointer();
639   auto DestBegin = DestAddr.getPointer();
640   // Cast from pointer to array type to pointer to single element.
641   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
642   // The basic structure here is a while-do loop.
643   auto BodyBB = createBasicBlock("omp.arraycpy.body");
644   auto DoneBB = createBasicBlock("omp.arraycpy.done");
645   auto IsEmpty =
646       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
647   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
648 
649   // Enter the loop body, making that address the current address.
650   auto EntryBB = Builder.GetInsertBlock();
651   EmitBlock(BodyBB);
652 
653   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
654 
655   llvm::PHINode *SrcElementPHI =
656     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
657   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
658   Address SrcElementCurrent =
659       Address(SrcElementPHI,
660               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
661 
662   llvm::PHINode *DestElementPHI =
663     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
664   DestElementPHI->addIncoming(DestBegin, EntryBB);
665   Address DestElementCurrent =
666     Address(DestElementPHI,
667             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
668 
669   // Emit copy.
670   CopyGen(DestElementCurrent, SrcElementCurrent);
671 
672   // Shift the address forward by one element.
673   auto DestElementNext = Builder.CreateConstGEP1_32(
674       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
675   auto SrcElementNext = Builder.CreateConstGEP1_32(
676       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
677   // Check whether we've reached the end.
678   auto Done =
679       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
680   Builder.CreateCondBr(Done, DoneBB, BodyBB);
681   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
682   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
683 
684   // Done.
685   EmitBlock(DoneBB, /*IsFinished=*/true);
686 }
687 
688 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
689                                   Address SrcAddr, const VarDecl *DestVD,
690                                   const VarDecl *SrcVD, const Expr *Copy) {
691   if (OriginalType->isArrayType()) {
692     auto *BO = dyn_cast<BinaryOperator>(Copy);
693     if (BO && BO->getOpcode() == BO_Assign) {
694       // Perform simple memcpy for simple copying.
695       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
696     } else {
697       // For arrays with complex element types perform element by element
698       // copying.
699       EmitOMPAggregateAssign(
700           DestAddr, SrcAddr, OriginalType,
701           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
702             // Working with the single array element, so have to remap
703             // destination and source variables to corresponding array
704             // elements.
705             CodeGenFunction::OMPPrivateScope Remap(*this);
706             Remap.addPrivate(DestVD, [DestElement]() -> Address {
707               return DestElement;
708             });
709             Remap.addPrivate(
710                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
711             (void)Remap.Privatize();
712             EmitIgnoredExpr(Copy);
713           });
714     }
715   } else {
716     // Remap pseudo source variable to private copy.
717     CodeGenFunction::OMPPrivateScope Remap(*this);
718     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
719     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
720     (void)Remap.Privatize();
721     // Emit copying of the whole variable.
722     EmitIgnoredExpr(Copy);
723   }
724 }
725 
726 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
727                                                 OMPPrivateScope &PrivateScope) {
728   if (!HaveInsertPoint())
729     return false;
730   bool FirstprivateIsLastprivate = false;
731   llvm::DenseSet<const VarDecl *> Lastprivates;
732   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
733     for (const auto *D : C->varlists())
734       Lastprivates.insert(
735           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
736   }
737   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
738   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
739   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
740   // Force emission of the firstprivate copy if the directive does not emit
741   // outlined function, like omp for, omp simd, omp distribute etc.
742   bool MustEmitFirstprivateCopy =
743       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
744   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
745     auto IRef = C->varlist_begin();
746     auto InitsRef = C->inits().begin();
747     for (auto IInit : C->private_copies()) {
748       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
749       bool ThisFirstprivateIsLastprivate =
750           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
751       auto *FD = CapturedStmtInfo->lookup(OrigVD);
752       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
753           !FD->getType()->isReferenceType()) {
754         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
755         ++IRef;
756         ++InitsRef;
757         continue;
758       }
759       FirstprivateIsLastprivate =
760           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
761       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
762         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
763         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
764         bool IsRegistered;
765         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
766                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
767                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
768         Address OriginalAddr = EmitLValue(&DRE).getAddress();
769         QualType Type = VD->getType();
770         if (Type->isArrayType()) {
771           // Emit VarDecl with copy init for arrays.
772           // Get the address of the original variable captured in current
773           // captured region.
774           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
775             auto Emission = EmitAutoVarAlloca(*VD);
776             auto *Init = VD->getInit();
777             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
778               // Perform simple memcpy.
779               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
780                                   Type);
781             } else {
782               EmitOMPAggregateAssign(
783                   Emission.getAllocatedAddress(), OriginalAddr, Type,
784                   [this, VDInit, Init](Address DestElement,
785                                        Address SrcElement) {
786                     // Clean up any temporaries needed by the initialization.
787                     RunCleanupsScope InitScope(*this);
788                     // Emit initialization for single element.
789                     setAddrOfLocalVar(VDInit, SrcElement);
790                     EmitAnyExprToMem(Init, DestElement,
791                                      Init->getType().getQualifiers(),
792                                      /*IsInitializer*/ false);
793                     LocalDeclMap.erase(VDInit);
794                   });
795             }
796             EmitAutoVarCleanups(Emission);
797             return Emission.getAllocatedAddress();
798           });
799         } else {
800           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
801             // Emit private VarDecl with copy init.
802             // Remap temp VDInit variable to the address of the original
803             // variable
804             // (for proper handling of captured global variables).
805             setAddrOfLocalVar(VDInit, OriginalAddr);
806             EmitDecl(*VD);
807             LocalDeclMap.erase(VDInit);
808             return GetAddrOfLocalVar(VD);
809           });
810         }
811         assert(IsRegistered &&
812                "firstprivate var already registered as private");
813         // Silence the warning about unused variable.
814         (void)IsRegistered;
815       }
816       ++IRef;
817       ++InitsRef;
818     }
819   }
820   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
821 }
822 
823 void CodeGenFunction::EmitOMPPrivateClause(
824     const OMPExecutableDirective &D,
825     CodeGenFunction::OMPPrivateScope &PrivateScope) {
826   if (!HaveInsertPoint())
827     return;
828   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
829   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
830     auto IRef = C->varlist_begin();
831     for (auto IInit : C->private_copies()) {
832       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
833       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
834         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
835         bool IsRegistered =
836             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
837               // Emit private VarDecl with copy init.
838               EmitDecl(*VD);
839               return GetAddrOfLocalVar(VD);
840             });
841         assert(IsRegistered && "private var already registered as private");
842         // Silence the warning about unused variable.
843         (void)IsRegistered;
844       }
845       ++IRef;
846     }
847   }
848 }
849 
850 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
851   if (!HaveInsertPoint())
852     return false;
853   // threadprivate_var1 = master_threadprivate_var1;
854   // operator=(threadprivate_var2, master_threadprivate_var2);
855   // ...
856   // __kmpc_barrier(&loc, global_tid);
857   llvm::DenseSet<const VarDecl *> CopiedVars;
858   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
859   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
860     auto IRef = C->varlist_begin();
861     auto ISrcRef = C->source_exprs().begin();
862     auto IDestRef = C->destination_exprs().begin();
863     for (auto *AssignOp : C->assignment_ops()) {
864       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
865       QualType Type = VD->getType();
866       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
867         // Get the address of the master variable. If we are emitting code with
868         // TLS support, the address is passed from the master as field in the
869         // captured declaration.
870         Address MasterAddr = Address::invalid();
871         if (getLangOpts().OpenMPUseTLS &&
872             getContext().getTargetInfo().isTLSSupported()) {
873           assert(CapturedStmtInfo->lookup(VD) &&
874                  "Copyin threadprivates should have been captured!");
875           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
876                           VK_LValue, (*IRef)->getExprLoc());
877           MasterAddr = EmitLValue(&DRE).getAddress();
878           LocalDeclMap.erase(VD);
879         } else {
880           MasterAddr =
881             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
882                                         : CGM.GetAddrOfGlobal(VD),
883                     getContext().getDeclAlign(VD));
884         }
885         // Get the address of the threadprivate variable.
886         Address PrivateAddr = EmitLValue(*IRef).getAddress();
887         if (CopiedVars.size() == 1) {
888           // At first check if current thread is a master thread. If it is, no
889           // need to copy data.
890           CopyBegin = createBasicBlock("copyin.not.master");
891           CopyEnd = createBasicBlock("copyin.not.master.end");
892           Builder.CreateCondBr(
893               Builder.CreateICmpNE(
894                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
895                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
896               CopyBegin, CopyEnd);
897           EmitBlock(CopyBegin);
898         }
899         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
900         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
901         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
902       }
903       ++IRef;
904       ++ISrcRef;
905       ++IDestRef;
906     }
907   }
908   if (CopyEnd) {
909     // Exit out of copying procedure for non-master thread.
910     EmitBlock(CopyEnd, /*IsFinished=*/true);
911     return true;
912   }
913   return false;
914 }
915 
916 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
917     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
918   if (!HaveInsertPoint())
919     return false;
920   bool HasAtLeastOneLastprivate = false;
921   llvm::DenseSet<const VarDecl *> SIMDLCVs;
922   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
923     auto *LoopDirective = cast<OMPLoopDirective>(&D);
924     for (auto *C : LoopDirective->counters()) {
925       SIMDLCVs.insert(
926           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
927     }
928   }
929   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
930   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
931     HasAtLeastOneLastprivate = true;
932     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
933         !getLangOpts().OpenMPSimd)
934       break;
935     auto IRef = C->varlist_begin();
936     auto IDestRef = C->destination_exprs().begin();
937     for (auto *IInit : C->private_copies()) {
938       // Keep the address of the original variable for future update at the end
939       // of the loop.
940       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
941       // Taskloops do not require additional initialization, it is done in
942       // runtime support library.
943       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
944         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
945         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
946           DeclRefExpr DRE(
947               const_cast<VarDecl *>(OrigVD),
948               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
949                   OrigVD) != nullptr,
950               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
951           return EmitLValue(&DRE).getAddress();
952         });
953         // Check if the variable is also a firstprivate: in this case IInit is
954         // not generated. Initialization of this variable will happen in codegen
955         // for 'firstprivate' clause.
956         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
957           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
958           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
959             // Emit private VarDecl with copy init.
960             EmitDecl(*VD);
961             return GetAddrOfLocalVar(VD);
962           });
963           assert(IsRegistered &&
964                  "lastprivate var already registered as private");
965           (void)IsRegistered;
966         }
967       }
968       ++IRef;
969       ++IDestRef;
970     }
971   }
972   return HasAtLeastOneLastprivate;
973 }
974 
975 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
976     const OMPExecutableDirective &D, bool NoFinals,
977     llvm::Value *IsLastIterCond) {
978   if (!HaveInsertPoint())
979     return;
980   // Emit following code:
981   // if (<IsLastIterCond>) {
982   //   orig_var1 = private_orig_var1;
983   //   ...
984   //   orig_varn = private_orig_varn;
985   // }
986   llvm::BasicBlock *ThenBB = nullptr;
987   llvm::BasicBlock *DoneBB = nullptr;
988   if (IsLastIterCond) {
989     ThenBB = createBasicBlock(".omp.lastprivate.then");
990     DoneBB = createBasicBlock(".omp.lastprivate.done");
991     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
992     EmitBlock(ThenBB);
993   }
994   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
995   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
996   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
997     auto IC = LoopDirective->counters().begin();
998     for (auto F : LoopDirective->finals()) {
999       auto *D =
1000           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1001       if (NoFinals)
1002         AlreadyEmittedVars.insert(D);
1003       else
1004         LoopCountersAndUpdates[D] = F;
1005       ++IC;
1006     }
1007   }
1008   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1009     auto IRef = C->varlist_begin();
1010     auto ISrcRef = C->source_exprs().begin();
1011     auto IDestRef = C->destination_exprs().begin();
1012     for (auto *AssignOp : C->assignment_ops()) {
1013       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1014       QualType Type = PrivateVD->getType();
1015       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1016       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1017         // If lastprivate variable is a loop control variable for loop-based
1018         // directive, update its value before copyin back to original
1019         // variable.
1020         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1021           EmitIgnoredExpr(FinalExpr);
1022         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1023         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1024         // Get the address of the original variable.
1025         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1026         // Get the address of the private variable.
1027         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1028         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1029           PrivateAddr =
1030               Address(Builder.CreateLoad(PrivateAddr),
1031                       getNaturalTypeAlignment(RefTy->getPointeeType()));
1032         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1033       }
1034       ++IRef;
1035       ++ISrcRef;
1036       ++IDestRef;
1037     }
1038     if (auto *PostUpdate = C->getPostUpdateExpr())
1039       EmitIgnoredExpr(PostUpdate);
1040   }
1041   if (IsLastIterCond)
1042     EmitBlock(DoneBB, /*IsFinished=*/true);
1043 }
1044 
1045 void CodeGenFunction::EmitOMPReductionClauseInit(
1046     const OMPExecutableDirective &D,
1047     CodeGenFunction::OMPPrivateScope &PrivateScope) {
1048   if (!HaveInsertPoint())
1049     return;
1050   SmallVector<const Expr *, 4> Shareds;
1051   SmallVector<const Expr *, 4> Privates;
1052   SmallVector<const Expr *, 4> ReductionOps;
1053   SmallVector<const Expr *, 4> LHSs;
1054   SmallVector<const Expr *, 4> RHSs;
1055   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1056     auto IPriv = C->privates().begin();
1057     auto IRed = C->reduction_ops().begin();
1058     auto ILHS = C->lhs_exprs().begin();
1059     auto IRHS = C->rhs_exprs().begin();
1060     for (const auto *Ref : C->varlists()) {
1061       Shareds.emplace_back(Ref);
1062       Privates.emplace_back(*IPriv);
1063       ReductionOps.emplace_back(*IRed);
1064       LHSs.emplace_back(*ILHS);
1065       RHSs.emplace_back(*IRHS);
1066       std::advance(IPriv, 1);
1067       std::advance(IRed, 1);
1068       std::advance(ILHS, 1);
1069       std::advance(IRHS, 1);
1070     }
1071   }
1072   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
1073   unsigned Count = 0;
1074   auto ILHS = LHSs.begin();
1075   auto IRHS = RHSs.begin();
1076   auto IPriv = Privates.begin();
1077   for (const auto *IRef : Shareds) {
1078     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1079     // Emit private VarDecl with reduction init.
1080     RedCG.emitSharedLValue(*this, Count);
1081     RedCG.emitAggregateType(*this, Count);
1082     auto Emission = EmitAutoVarAlloca(*PrivateVD);
1083     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1084                              RedCG.getSharedLValue(Count),
1085                              [&Emission](CodeGenFunction &CGF) {
1086                                CGF.EmitAutoVarInit(Emission);
1087                                return true;
1088                              });
1089     EmitAutoVarCleanups(Emission);
1090     Address BaseAddr = RedCG.adjustPrivateAddress(
1091         *this, Count, Emission.getAllocatedAddress());
1092     bool IsRegistered = PrivateScope.addPrivate(
1093         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
1094     assert(IsRegistered && "private var already registered as private");
1095     // Silence the warning about unused variable.
1096     (void)IsRegistered;
1097 
1098     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1099     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1100     QualType Type = PrivateVD->getType();
1101     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1102     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1103       // Store the address of the original variable associated with the LHS
1104       // implicit variable.
1105       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1106         return RedCG.getSharedLValue(Count).getAddress();
1107       });
1108       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1109         return GetAddrOfLocalVar(PrivateVD);
1110       });
1111     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1112                isa<ArraySubscriptExpr>(IRef)) {
1113       // Store the address of the original variable associated with the LHS
1114       // implicit variable.
1115       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
1116         return RedCG.getSharedLValue(Count).getAddress();
1117       });
1118       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1119         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1120                                             ConvertTypeForMem(RHSVD->getType()),
1121                                             "rhs.begin");
1122       });
1123     } else {
1124       QualType Type = PrivateVD->getType();
1125       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1126       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1127       // Store the address of the original variable associated with the LHS
1128       // implicit variable.
1129       if (IsArray) {
1130         OriginalAddr = Builder.CreateElementBitCast(
1131             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1132       }
1133       PrivateScope.addPrivate(
1134           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
1135       PrivateScope.addPrivate(
1136           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
1137             return IsArray
1138                        ? Builder.CreateElementBitCast(
1139                              GetAddrOfLocalVar(PrivateVD),
1140                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1141                        : GetAddrOfLocalVar(PrivateVD);
1142           });
1143     }
1144     ++ILHS;
1145     ++IRHS;
1146     ++IPriv;
1147     ++Count;
1148   }
1149 }
1150 
1151 void CodeGenFunction::EmitOMPReductionClauseFinal(
1152     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1153   if (!HaveInsertPoint())
1154     return;
1155   llvm::SmallVector<const Expr *, 8> Privates;
1156   llvm::SmallVector<const Expr *, 8> LHSExprs;
1157   llvm::SmallVector<const Expr *, 8> RHSExprs;
1158   llvm::SmallVector<const Expr *, 8> ReductionOps;
1159   bool HasAtLeastOneReduction = false;
1160   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1161     HasAtLeastOneReduction = true;
1162     Privates.append(C->privates().begin(), C->privates().end());
1163     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1164     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1165     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1166   }
1167   if (HasAtLeastOneReduction) {
1168     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1169                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1170                       ReductionKind == OMPD_simd;
1171     bool SimpleReduction = ReductionKind == OMPD_simd;
1172     // Emit nowait reduction if nowait clause is present or directive is a
1173     // parallel directive (it always has implicit barrier).
1174     CGM.getOpenMPRuntime().emitReduction(
1175         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1176         {WithNowait, SimpleReduction, ReductionKind});
1177   }
1178 }
1179 
1180 static void emitPostUpdateForReductionClause(
1181     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1182     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1183   if (!CGF.HaveInsertPoint())
1184     return;
1185   llvm::BasicBlock *DoneBB = nullptr;
1186   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1187     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1188       if (!DoneBB) {
1189         if (auto *Cond = CondGen(CGF)) {
1190           // If the first post-update expression is found, emit conditional
1191           // block if it was requested.
1192           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1193           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1194           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1195           CGF.EmitBlock(ThenBB);
1196         }
1197       }
1198       CGF.EmitIgnoredExpr(PostUpdate);
1199     }
1200   }
1201   if (DoneBB)
1202     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1203 }
1204 
1205 namespace {
1206 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1207 /// parallel function. This is necessary for combined constructs such as
1208 /// 'distribute parallel for'
1209 typedef llvm::function_ref<void(CodeGenFunction &,
1210                                 const OMPExecutableDirective &,
1211                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1212     CodeGenBoundParametersTy;
1213 } // anonymous namespace
1214 
1215 static void emitCommonOMPParallelDirective(
1216     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1217     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1218     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1219   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1220   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1221       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1222   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1223     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1224     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1225                                          /*IgnoreResultAssign*/ true);
1226     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1227         CGF, NumThreads, NumThreadsClause->getLocStart());
1228   }
1229   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1230     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1231     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1232         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1233   }
1234   const Expr *IfCond = nullptr;
1235   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1236     if (C->getNameModifier() == OMPD_unknown ||
1237         C->getNameModifier() == OMPD_parallel) {
1238       IfCond = C->getCondition();
1239       break;
1240     }
1241   }
1242 
1243   OMPParallelScope Scope(CGF, S);
1244   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1245   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1246   // lower and upper bounds with the pragma 'for' chunking mechanism.
1247   // The following lambda takes care of appending the lower and upper bound
1248   // parameters when necessary
1249   CodeGenBoundParameters(CGF, S, CapturedVars);
1250   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1251   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1252                                               CapturedVars, IfCond);
1253 }
1254 
1255 static void emitEmptyBoundParameters(CodeGenFunction &,
1256                                      const OMPExecutableDirective &,
1257                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1258 
1259 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1260   // Emit parallel region as a standalone region.
1261   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1262     OMPPrivateScope PrivateScope(CGF);
1263     bool Copyins = CGF.EmitOMPCopyinClause(S);
1264     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1265     if (Copyins) {
1266       // Emit implicit barrier to synchronize threads and avoid data races on
1267       // propagation master's thread values of threadprivate variables to local
1268       // instances of that variables of all other implicit threads.
1269       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1270           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1271           /*ForceSimpleCall=*/true);
1272     }
1273     CGF.EmitOMPPrivateClause(S, PrivateScope);
1274     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1275     (void)PrivateScope.Privatize();
1276     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1277     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1278   };
1279   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1280                                  emitEmptyBoundParameters);
1281   emitPostUpdateForReductionClause(
1282       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1283 }
1284 
1285 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1286                                       JumpDest LoopExit) {
1287   RunCleanupsScope BodyScope(*this);
1288   // Update counters values on current iteration.
1289   for (auto I : D.updates()) {
1290     EmitIgnoredExpr(I);
1291   }
1292   // Update the linear variables.
1293   // In distribute directives only loop counters may be marked as linear, no
1294   // need to generate the code for them.
1295   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1296     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1297       for (auto *U : C->updates())
1298         EmitIgnoredExpr(U);
1299     }
1300   }
1301 
1302   // On a continue in the body, jump to the end.
1303   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1304   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1305   // Emit loop body.
1306   EmitStmt(D.getBody());
1307   // The end (updates/cleanups).
1308   EmitBlock(Continue.getBlock());
1309   BreakContinueStack.pop_back();
1310 }
1311 
1312 void CodeGenFunction::EmitOMPInnerLoop(
1313     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1314     const Expr *IncExpr,
1315     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1316     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1317   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1318 
1319   // Start the loop with a block that tests the condition.
1320   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1321   EmitBlock(CondBlock);
1322   const SourceRange &R = S.getSourceRange();
1323   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1324                  SourceLocToDebugLoc(R.getEnd()));
1325 
1326   // If there are any cleanups between here and the loop-exit scope,
1327   // create a block to stage a loop exit along.
1328   auto ExitBlock = LoopExit.getBlock();
1329   if (RequiresCleanup)
1330     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1331 
1332   auto LoopBody = createBasicBlock("omp.inner.for.body");
1333 
1334   // Emit condition.
1335   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1336   if (ExitBlock != LoopExit.getBlock()) {
1337     EmitBlock(ExitBlock);
1338     EmitBranchThroughCleanup(LoopExit);
1339   }
1340 
1341   EmitBlock(LoopBody);
1342   incrementProfileCounter(&S);
1343 
1344   // Create a block for the increment.
1345   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1346   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1347 
1348   BodyGen(*this);
1349 
1350   // Emit "IV = IV + 1" and a back-edge to the condition block.
1351   EmitBlock(Continue.getBlock());
1352   EmitIgnoredExpr(IncExpr);
1353   PostIncGen(*this);
1354   BreakContinueStack.pop_back();
1355   EmitBranch(CondBlock);
1356   LoopStack.pop();
1357   // Emit the fall-through block.
1358   EmitBlock(LoopExit.getBlock());
1359 }
1360 
1361 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1362   if (!HaveInsertPoint())
1363     return false;
1364   // Emit inits for the linear variables.
1365   bool HasLinears = false;
1366   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1367     for (auto *Init : C->inits()) {
1368       HasLinears = true;
1369       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1370       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1371         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1372         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1373         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1374                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1375                         VD->getInit()->getType(), VK_LValue,
1376                         VD->getInit()->getExprLoc());
1377         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1378                                                 VD->getType()),
1379                        /*capturedByInit=*/false);
1380         EmitAutoVarCleanups(Emission);
1381       } else
1382         EmitVarDecl(*VD);
1383     }
1384     // Emit the linear steps for the linear clauses.
1385     // If a step is not constant, it is pre-calculated before the loop.
1386     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1387       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1388         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1389         // Emit calculation of the linear step.
1390         EmitIgnoredExpr(CS);
1391       }
1392   }
1393   return HasLinears;
1394 }
1395 
1396 void CodeGenFunction::EmitOMPLinearClauseFinal(
1397     const OMPLoopDirective &D,
1398     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1399   if (!HaveInsertPoint())
1400     return;
1401   llvm::BasicBlock *DoneBB = nullptr;
1402   // Emit the final values of the linear variables.
1403   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1404     auto IC = C->varlist_begin();
1405     for (auto *F : C->finals()) {
1406       if (!DoneBB) {
1407         if (auto *Cond = CondGen(*this)) {
1408           // If the first post-update expression is found, emit conditional
1409           // block if it was requested.
1410           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1411           DoneBB = createBasicBlock(".omp.linear.pu.done");
1412           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1413           EmitBlock(ThenBB);
1414         }
1415       }
1416       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1417       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1418                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1419                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1420       Address OrigAddr = EmitLValue(&DRE).getAddress();
1421       CodeGenFunction::OMPPrivateScope VarScope(*this);
1422       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1423       (void)VarScope.Privatize();
1424       EmitIgnoredExpr(F);
1425       ++IC;
1426     }
1427     if (auto *PostUpdate = C->getPostUpdateExpr())
1428       EmitIgnoredExpr(PostUpdate);
1429   }
1430   if (DoneBB)
1431     EmitBlock(DoneBB, /*IsFinished=*/true);
1432 }
1433 
1434 static void emitAlignedClause(CodeGenFunction &CGF,
1435                               const OMPExecutableDirective &D) {
1436   if (!CGF.HaveInsertPoint())
1437     return;
1438   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1439     unsigned ClauseAlignment = 0;
1440     if (auto AlignmentExpr = Clause->getAlignment()) {
1441       auto AlignmentCI =
1442           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1443       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1444     }
1445     for (auto E : Clause->varlists()) {
1446       unsigned Alignment = ClauseAlignment;
1447       if (Alignment == 0) {
1448         // OpenMP [2.8.1, Description]
1449         // If no optional parameter is specified, implementation-defined default
1450         // alignments for SIMD instructions on the target platforms are assumed.
1451         Alignment =
1452             CGF.getContext()
1453                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1454                     E->getType()->getPointeeType()))
1455                 .getQuantity();
1456       }
1457       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1458              "alignment is not power of 2");
1459       if (Alignment != 0) {
1460         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1461         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1462       }
1463     }
1464   }
1465 }
1466 
1467 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1468     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1469   if (!HaveInsertPoint())
1470     return;
1471   auto I = S.private_counters().begin();
1472   for (auto *E : S.counters()) {
1473     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1474     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1475     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1476       // Emit var without initialization.
1477       if (!LocalDeclMap.count(PrivateVD)) {
1478         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1479         EmitAutoVarCleanups(VarEmission);
1480       }
1481       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1482                       /*RefersToEnclosingVariableOrCapture=*/false,
1483                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1484       return EmitLValue(&DRE).getAddress();
1485     });
1486     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1487         VD->hasGlobalStorage()) {
1488       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1489         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1490                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1491                         E->getType(), VK_LValue, E->getExprLoc());
1492         return EmitLValue(&DRE).getAddress();
1493       });
1494     }
1495     ++I;
1496   }
1497 }
1498 
1499 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1500                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1501                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1502   if (!CGF.HaveInsertPoint())
1503     return;
1504   {
1505     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1506     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1507     (void)PreCondScope.Privatize();
1508     // Get initial values of real counters.
1509     for (auto I : S.inits()) {
1510       CGF.EmitIgnoredExpr(I);
1511     }
1512   }
1513   // Check that loop is executed at least one time.
1514   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1515 }
1516 
1517 void CodeGenFunction::EmitOMPLinearClause(
1518     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1519   if (!HaveInsertPoint())
1520     return;
1521   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1522   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1523     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1524     for (auto *C : LoopDirective->counters()) {
1525       SIMDLCVs.insert(
1526           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1527     }
1528   }
1529   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1530     auto CurPrivate = C->privates().begin();
1531     for (auto *E : C->varlists()) {
1532       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1533       auto *PrivateVD =
1534           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1535       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1536         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1537           // Emit private VarDecl with copy init.
1538           EmitVarDecl(*PrivateVD);
1539           return GetAddrOfLocalVar(PrivateVD);
1540         });
1541         assert(IsRegistered && "linear var already registered as private");
1542         // Silence the warning about unused variable.
1543         (void)IsRegistered;
1544       } else
1545         EmitVarDecl(*PrivateVD);
1546       ++CurPrivate;
1547     }
1548   }
1549 }
1550 
1551 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1552                                      const OMPExecutableDirective &D,
1553                                      bool IsMonotonic) {
1554   if (!CGF.HaveInsertPoint())
1555     return;
1556   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1557     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1558                                  /*ignoreResult=*/true);
1559     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1560     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1561     // In presence of finite 'safelen', it may be unsafe to mark all
1562     // the memory instructions parallel, because loop-carried
1563     // dependences of 'safelen' iterations are possible.
1564     if (!IsMonotonic)
1565       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1566   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1567     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1568                                  /*ignoreResult=*/true);
1569     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1570     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1571     // In presence of finite 'safelen', it may be unsafe to mark all
1572     // the memory instructions parallel, because loop-carried
1573     // dependences of 'safelen' iterations are possible.
1574     CGF.LoopStack.setParallel(false);
1575   }
1576 }
1577 
1578 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1579                                       bool IsMonotonic) {
1580   // Walk clauses and process safelen/lastprivate.
1581   LoopStack.setParallel(!IsMonotonic);
1582   LoopStack.setVectorizeEnable(true);
1583   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1584 }
1585 
1586 void CodeGenFunction::EmitOMPSimdFinal(
1587     const OMPLoopDirective &D,
1588     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1589   if (!HaveInsertPoint())
1590     return;
1591   llvm::BasicBlock *DoneBB = nullptr;
1592   auto IC = D.counters().begin();
1593   auto IPC = D.private_counters().begin();
1594   for (auto F : D.finals()) {
1595     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1596     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1597     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1598     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1599         OrigVD->hasGlobalStorage() || CED) {
1600       if (!DoneBB) {
1601         if (auto *Cond = CondGen(*this)) {
1602           // If the first post-update expression is found, emit conditional
1603           // block if it was requested.
1604           auto *ThenBB = createBasicBlock(".omp.final.then");
1605           DoneBB = createBasicBlock(".omp.final.done");
1606           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1607           EmitBlock(ThenBB);
1608         }
1609       }
1610       Address OrigAddr = Address::invalid();
1611       if (CED)
1612         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1613       else {
1614         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1615                         /*RefersToEnclosingVariableOrCapture=*/false,
1616                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1617         OrigAddr = EmitLValue(&DRE).getAddress();
1618       }
1619       OMPPrivateScope VarScope(*this);
1620       VarScope.addPrivate(OrigVD,
1621                           [OrigAddr]() -> Address { return OrigAddr; });
1622       (void)VarScope.Privatize();
1623       EmitIgnoredExpr(F);
1624     }
1625     ++IC;
1626     ++IPC;
1627   }
1628   if (DoneBB)
1629     EmitBlock(DoneBB, /*IsFinished=*/true);
1630 }
1631 
1632 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1633                                          const OMPLoopDirective &S,
1634                                          CodeGenFunction::JumpDest LoopExit) {
1635   CGF.EmitOMPLoopBody(S, LoopExit);
1636   CGF.EmitStopPoint(&S);
1637 }
1638 
1639 /// Emit a helper variable and return corresponding lvalue.
1640 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1641                                const DeclRefExpr *Helper) {
1642   auto VDecl = cast<VarDecl>(Helper->getDecl());
1643   CGF.EmitVarDecl(*VDecl);
1644   return CGF.EmitLValue(Helper);
1645 }
1646 
1647 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
1648                               PrePostActionTy &Action) {
1649   Action.Enter(CGF);
1650   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
1651          "Expected simd directive");
1652   OMPLoopScope PreInitScope(CGF, S);
1653   // if (PreCond) {
1654   //   for (IV in 0..LastIteration) BODY;
1655   //   <Final counter/linear vars updates>;
1656   // }
1657   //
1658   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
1659       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
1660       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
1661     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1662     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1663   }
1664 
1665   // Emit: if (PreCond) - begin.
1666   // If the condition constant folds and can be elided, avoid emitting the
1667   // whole loop.
1668   bool CondConstant;
1669   llvm::BasicBlock *ContBlock = nullptr;
1670   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1671     if (!CondConstant)
1672       return;
1673   } else {
1674     auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1675     ContBlock = CGF.createBasicBlock("simd.if.end");
1676     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1677                 CGF.getProfileCount(&S));
1678     CGF.EmitBlock(ThenBlock);
1679     CGF.incrementProfileCounter(&S);
1680   }
1681 
1682   // Emit the loop iteration variable.
1683   const Expr *IVExpr = S.getIterationVariable();
1684   const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1685   CGF.EmitVarDecl(*IVDecl);
1686   CGF.EmitIgnoredExpr(S.getInit());
1687 
1688   // Emit the iterations count variable.
1689   // If it is not a variable, Sema decided to calculate iterations count on
1690   // each iteration (e.g., it is foldable into a constant).
1691   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1692     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1693     // Emit calculation of the iterations count.
1694     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1695   }
1696 
1697   CGF.EmitOMPSimdInit(S);
1698 
1699   emitAlignedClause(CGF, S);
1700   (void)CGF.EmitOMPLinearClauseInit(S);
1701   {
1702     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1703     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1704     CGF.EmitOMPLinearClause(S, LoopScope);
1705     CGF.EmitOMPPrivateClause(S, LoopScope);
1706     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1707     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1708     (void)LoopScope.Privatize();
1709     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1710                          S.getInc(),
1711                          [&S](CodeGenFunction &CGF) {
1712                            CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
1713                            CGF.EmitStopPoint(&S);
1714                          },
1715                          [](CodeGenFunction &) {});
1716     CGF.EmitOMPSimdFinal(
1717         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1718     // Emit final copy of the lastprivate variables at the end of loops.
1719     if (HasLastprivateClause)
1720       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1721     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1722     emitPostUpdateForReductionClause(
1723         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1724   }
1725   CGF.EmitOMPLinearClauseFinal(
1726       S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1727   // Emit: if (PreCond) - end.
1728   if (ContBlock) {
1729     CGF.EmitBranch(ContBlock);
1730     CGF.EmitBlock(ContBlock, true);
1731   }
1732 }
1733 
1734 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1735   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1736     emitOMPSimdRegion(CGF, S, Action);
1737   };
1738   OMPLexicalScope Scope(*this, S, OMPD_unknown);
1739   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1740 }
1741 
1742 void CodeGenFunction::EmitOMPOuterLoop(
1743     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1744     CodeGenFunction::OMPPrivateScope &LoopScope,
1745     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1746     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1747     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1748   auto &RT = CGM.getOpenMPRuntime();
1749 
1750   const Expr *IVExpr = S.getIterationVariable();
1751   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1752   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1753 
1754   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1755 
1756   // Start the loop with a block that tests the condition.
1757   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1758   EmitBlock(CondBlock);
1759   const SourceRange &R = S.getSourceRange();
1760   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1761                  SourceLocToDebugLoc(R.getEnd()));
1762 
1763   llvm::Value *BoolCondVal = nullptr;
1764   if (!DynamicOrOrdered) {
1765     // UB = min(UB, GlobalUB) or
1766     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1767     // 'distribute parallel for')
1768     EmitIgnoredExpr(LoopArgs.EUB);
1769     // IV = LB
1770     EmitIgnoredExpr(LoopArgs.Init);
1771     // IV < UB
1772     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1773   } else {
1774     BoolCondVal =
1775         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1776                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1777   }
1778 
1779   // If there are any cleanups between here and the loop-exit scope,
1780   // create a block to stage a loop exit along.
1781   auto ExitBlock = LoopExit.getBlock();
1782   if (LoopScope.requiresCleanups())
1783     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1784 
1785   auto LoopBody = createBasicBlock("omp.dispatch.body");
1786   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1787   if (ExitBlock != LoopExit.getBlock()) {
1788     EmitBlock(ExitBlock);
1789     EmitBranchThroughCleanup(LoopExit);
1790   }
1791   EmitBlock(LoopBody);
1792 
1793   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1794   // LB for loop condition and emitted it above).
1795   if (DynamicOrOrdered)
1796     EmitIgnoredExpr(LoopArgs.Init);
1797 
1798   // Create a block for the increment.
1799   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1800   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1801 
1802   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1803   // with dynamic/guided scheduling and without ordered clause.
1804   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1805     LoopStack.setParallel(!IsMonotonic);
1806   else
1807     EmitOMPSimdInit(S, IsMonotonic);
1808 
1809   SourceLocation Loc = S.getLocStart();
1810 
1811   // when 'distribute' is not combined with a 'for':
1812   // while (idx <= UB) { BODY; ++idx; }
1813   // when 'distribute' is combined with a 'for'
1814   // (e.g. 'distribute parallel for')
1815   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1816   EmitOMPInnerLoop(
1817       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1818       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1819         CodeGenLoop(CGF, S, LoopExit);
1820       },
1821       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1822         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1823       });
1824 
1825   EmitBlock(Continue.getBlock());
1826   BreakContinueStack.pop_back();
1827   if (!DynamicOrOrdered) {
1828     // Emit "LB = LB + Stride", "UB = UB + Stride".
1829     EmitIgnoredExpr(LoopArgs.NextLB);
1830     EmitIgnoredExpr(LoopArgs.NextUB);
1831   }
1832 
1833   EmitBranch(CondBlock);
1834   LoopStack.pop();
1835   // Emit the fall-through block.
1836   EmitBlock(LoopExit.getBlock());
1837 
1838   // Tell the runtime we are done.
1839   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1840     if (!DynamicOrOrdered)
1841       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
1842                                                      S.getDirectiveKind());
1843   };
1844   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1845 }
1846 
1847 void CodeGenFunction::EmitOMPForOuterLoop(
1848     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1849     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1850     const OMPLoopArguments &LoopArgs,
1851     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1852   auto &RT = CGM.getOpenMPRuntime();
1853 
1854   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1855   const bool DynamicOrOrdered =
1856       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1857 
1858   assert((Ordered ||
1859           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1860                                  LoopArgs.Chunk != nullptr)) &&
1861          "static non-chunked schedule does not need outer loop");
1862 
1863   // Emit outer loop.
1864   //
1865   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1866   // When schedule(dynamic,chunk_size) is specified, the iterations are
1867   // distributed to threads in the team in chunks as the threads request them.
1868   // Each thread executes a chunk of iterations, then requests another chunk,
1869   // until no chunks remain to be distributed. Each chunk contains chunk_size
1870   // iterations, except for the last chunk to be distributed, which may have
1871   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1872   //
1873   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1874   // to threads in the team in chunks as the executing threads request them.
1875   // Each thread executes a chunk of iterations, then requests another chunk,
1876   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1877   // each chunk is proportional to the number of unassigned iterations divided
1878   // by the number of threads in the team, decreasing to 1. For a chunk_size
1879   // with value k (greater than 1), the size of each chunk is determined in the
1880   // same way, with the restriction that the chunks do not contain fewer than k
1881   // iterations (except for the last chunk to be assigned, which may have fewer
1882   // than k iterations).
1883   //
1884   // When schedule(auto) is specified, the decision regarding scheduling is
1885   // delegated to the compiler and/or runtime system. The programmer gives the
1886   // implementation the freedom to choose any possible mapping of iterations to
1887   // threads in the team.
1888   //
1889   // When schedule(runtime) is specified, the decision regarding scheduling is
1890   // deferred until run time, and the schedule and chunk size are taken from the
1891   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1892   // implementation defined
1893   //
1894   // while(__kmpc_dispatch_next(&LB, &UB)) {
1895   //   idx = LB;
1896   //   while (idx <= UB) { BODY; ++idx;
1897   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1898   //   } // inner loop
1899   // }
1900   //
1901   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1902   // When schedule(static, chunk_size) is specified, iterations are divided into
1903   // chunks of size chunk_size, and the chunks are assigned to the threads in
1904   // the team in a round-robin fashion in the order of the thread number.
1905   //
1906   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1907   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1908   //   LB = LB + ST;
1909   //   UB = UB + ST;
1910   // }
1911   //
1912 
1913   const Expr *IVExpr = S.getIterationVariable();
1914   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1915   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1916 
1917   if (DynamicOrOrdered) {
1918     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1919     llvm::Value *LBVal = DispatchBounds.first;
1920     llvm::Value *UBVal = DispatchBounds.second;
1921     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1922                                                              LoopArgs.Chunk};
1923     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1924                            IVSigned, Ordered, DipatchRTInputValues);
1925   } else {
1926     CGOpenMPRuntime::StaticRTInput StaticInit(
1927         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1928         LoopArgs.ST, LoopArgs.Chunk);
1929     RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
1930                          ScheduleKind, StaticInit);
1931   }
1932 
1933   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1934                                     const unsigned IVSize,
1935                                     const bool IVSigned) {
1936     if (Ordered) {
1937       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1938                                                             IVSigned);
1939     }
1940   };
1941 
1942   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1943                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1944   OuterLoopArgs.IncExpr = S.getInc();
1945   OuterLoopArgs.Init = S.getInit();
1946   OuterLoopArgs.Cond = S.getCond();
1947   OuterLoopArgs.NextLB = S.getNextLowerBound();
1948   OuterLoopArgs.NextUB = S.getNextUpperBound();
1949   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1950                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1951 }
1952 
1953 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1954                              const unsigned IVSize, const bool IVSigned) {}
1955 
1956 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1957     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1958     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1959     const CodeGenLoopTy &CodeGenLoopContent) {
1960 
1961   auto &RT = CGM.getOpenMPRuntime();
1962 
1963   // Emit outer loop.
1964   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1965   // dynamic
1966   //
1967 
1968   const Expr *IVExpr = S.getIterationVariable();
1969   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1970   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1971 
1972   CGOpenMPRuntime::StaticRTInput StaticInit(
1973       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
1974       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
1975   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
1976 
1977   // for combined 'distribute' and 'for' the increment expression of distribute
1978   // is store in DistInc. For 'distribute' alone, it is in Inc.
1979   Expr *IncExpr;
1980   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1981     IncExpr = S.getDistInc();
1982   else
1983     IncExpr = S.getInc();
1984 
1985   // this routine is shared by 'omp distribute parallel for' and
1986   // 'omp distribute': select the right EUB expression depending on the
1987   // directive
1988   OMPLoopArguments OuterLoopArgs;
1989   OuterLoopArgs.LB = LoopArgs.LB;
1990   OuterLoopArgs.UB = LoopArgs.UB;
1991   OuterLoopArgs.ST = LoopArgs.ST;
1992   OuterLoopArgs.IL = LoopArgs.IL;
1993   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1994   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1995                           ? S.getCombinedEnsureUpperBound()
1996                           : S.getEnsureUpperBound();
1997   OuterLoopArgs.IncExpr = IncExpr;
1998   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1999                            ? S.getCombinedInit()
2000                            : S.getInit();
2001   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2002                            ? S.getCombinedCond()
2003                            : S.getCond();
2004   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2005                              ? S.getCombinedNextLowerBound()
2006                              : S.getNextLowerBound();
2007   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2008                              ? S.getCombinedNextUpperBound()
2009                              : S.getNextUpperBound();
2010 
2011   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2012                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
2013                    emitEmptyOrdered);
2014 }
2015 
2016 static std::pair<LValue, LValue>
2017 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2018                                      const OMPExecutableDirective &S) {
2019   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2020   LValue LB =
2021       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2022   LValue UB =
2023       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2024 
2025   // When composing 'distribute' with 'for' (e.g. as in 'distribute
2026   // parallel for') we need to use the 'distribute'
2027   // chunk lower and upper bounds rather than the whole loop iteration
2028   // space. These are parameters to the outlined function for 'parallel'
2029   // and we copy the bounds of the previous schedule into the
2030   // the current ones.
2031   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2032   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2033   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2034       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2035   PrevLBVal = CGF.EmitScalarConversion(
2036       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2037       LS.getIterationVariable()->getType(),
2038       LS.getPrevLowerBoundVariable()->getExprLoc());
2039   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2040       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2041   PrevUBVal = CGF.EmitScalarConversion(
2042       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2043       LS.getIterationVariable()->getType(),
2044       LS.getPrevUpperBoundVariable()->getExprLoc());
2045 
2046   CGF.EmitStoreOfScalar(PrevLBVal, LB);
2047   CGF.EmitStoreOfScalar(PrevUBVal, UB);
2048 
2049   return {LB, UB};
2050 }
2051 
2052 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2053 /// we need to use the LB and UB expressions generated by the worksharing
2054 /// code generation support, whereas in non combined situations we would
2055 /// just emit 0 and the LastIteration expression
2056 /// This function is necessary due to the difference of the LB and UB
2057 /// types for the RT emission routines for 'for_static_init' and
2058 /// 'for_dispatch_init'
2059 static std::pair<llvm::Value *, llvm::Value *>
2060 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2061                                         const OMPExecutableDirective &S,
2062                                         Address LB, Address UB) {
2063   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2064   const Expr *IVExpr = LS.getIterationVariable();
2065   // when implementing a dynamic schedule for a 'for' combined with a
2066   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2067   // is not normalized as each team only executes its own assigned
2068   // distribute chunk
2069   QualType IteratorTy = IVExpr->getType();
2070   llvm::Value *LBVal =
2071       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart());
2072   llvm::Value *UBVal =
2073       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart());
2074   return {LBVal, UBVal};
2075 }
2076 
2077 static void emitDistributeParallelForDistributeInnerBoundParams(
2078     CodeGenFunction &CGF, const OMPExecutableDirective &S,
2079     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2080   const auto &Dir = cast<OMPLoopDirective>(S);
2081   LValue LB =
2082       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2083   auto LBCast = CGF.Builder.CreateIntCast(
2084       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2085   CapturedVars.push_back(LBCast);
2086   LValue UB =
2087       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2088 
2089   auto UBCast = CGF.Builder.CreateIntCast(
2090       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2091   CapturedVars.push_back(UBCast);
2092 }
2093 
2094 static void
2095 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2096                                  const OMPLoopDirective &S,
2097                                  CodeGenFunction::JumpDest LoopExit) {
2098   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2099                                          PrePostActionTy &) {
2100     bool HasCancel = false;
2101     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2102       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2103         HasCancel = D->hasCancel();
2104       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2105         HasCancel = D->hasCancel();
2106       else if (const auto *D =
2107                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2108         HasCancel = D->hasCancel();
2109     }
2110     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2111                                                      HasCancel);
2112     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2113                                emitDistributeParallelForInnerBounds,
2114                                emitDistributeParallelForDispatchBounds);
2115   };
2116 
2117   emitCommonOMPParallelDirective(
2118       CGF, S,
2119       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2120       CGInlinedWorksharingLoop,
2121       emitDistributeParallelForDistributeInnerBoundParams);
2122 }
2123 
2124 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2125     const OMPDistributeParallelForDirective &S) {
2126   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2127     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2128                               S.getDistInc());
2129   };
2130   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2131   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2132 }
2133 
2134 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2135     const OMPDistributeParallelForSimdDirective &S) {
2136   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2137     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2138                               S.getDistInc());
2139   };
2140   OMPLexicalScope Scope(*this, S, OMPD_parallel);
2141   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2142 }
2143 
2144 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2145     const OMPDistributeSimdDirective &S) {
2146   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2147     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2148   };
2149   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2150   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2151 }
2152 
2153 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2154     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2155   // Emit SPMD target parallel for region as a standalone region.
2156   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2157     emitOMPSimdRegion(CGF, S, Action);
2158   };
2159   llvm::Function *Fn;
2160   llvm::Constant *Addr;
2161   // Emit target region as a standalone region.
2162   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2163       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2164   assert(Fn && Addr && "Target device function emission failed.");
2165 }
2166 
2167 void CodeGenFunction::EmitOMPTargetSimdDirective(
2168     const OMPTargetSimdDirective &S) {
2169   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2170     emitOMPSimdRegion(CGF, S, Action);
2171   };
2172   emitCommonOMPTargetDirective(*this, S, CodeGen);
2173 }
2174 
2175 namespace {
2176   struct ScheduleKindModifiersTy {
2177     OpenMPScheduleClauseKind Kind;
2178     OpenMPScheduleClauseModifier M1;
2179     OpenMPScheduleClauseModifier M2;
2180     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2181                             OpenMPScheduleClauseModifier M1,
2182                             OpenMPScheduleClauseModifier M2)
2183         : Kind(Kind), M1(M1), M2(M2) {}
2184   };
2185 } // namespace
2186 
2187 bool CodeGenFunction::EmitOMPWorksharingLoop(
2188     const OMPLoopDirective &S, Expr *EUB,
2189     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2190     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2191   // Emit the loop iteration variable.
2192   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2193   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2194   EmitVarDecl(*IVDecl);
2195 
2196   // Emit the iterations count variable.
2197   // If it is not a variable, Sema decided to calculate iterations count on each
2198   // iteration (e.g., it is foldable into a constant).
2199   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2200     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2201     // Emit calculation of the iterations count.
2202     EmitIgnoredExpr(S.getCalcLastIteration());
2203   }
2204 
2205   auto &RT = CGM.getOpenMPRuntime();
2206 
2207   bool HasLastprivateClause;
2208   // Check pre-condition.
2209   {
2210     OMPLoopScope PreInitScope(*this, S);
2211     // Skip the entire loop if we don't meet the precondition.
2212     // If the condition constant folds and can be elided, avoid emitting the
2213     // whole loop.
2214     bool CondConstant;
2215     llvm::BasicBlock *ContBlock = nullptr;
2216     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2217       if (!CondConstant)
2218         return false;
2219     } else {
2220       auto *ThenBlock = createBasicBlock("omp.precond.then");
2221       ContBlock = createBasicBlock("omp.precond.end");
2222       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2223                   getProfileCount(&S));
2224       EmitBlock(ThenBlock);
2225       incrementProfileCounter(&S);
2226     }
2227 
2228     bool Ordered = false;
2229     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2230       if (OrderedClause->getNumForLoops())
2231         RT.emitDoacrossInit(*this, S);
2232       else
2233         Ordered = true;
2234     }
2235 
2236     llvm::DenseSet<const Expr *> EmittedFinals;
2237     emitAlignedClause(*this, S);
2238     bool HasLinears = EmitOMPLinearClauseInit(S);
2239     // Emit helper vars inits.
2240 
2241     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2242     LValue LB = Bounds.first;
2243     LValue UB = Bounds.second;
2244     LValue ST =
2245         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2246     LValue IL =
2247         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2248 
2249     // Emit 'then' code.
2250     {
2251       OMPPrivateScope LoopScope(*this);
2252       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2253         // Emit implicit barrier to synchronize threads and avoid data races on
2254         // initialization of firstprivate variables and post-update of
2255         // lastprivate variables.
2256         CGM.getOpenMPRuntime().emitBarrierCall(
2257             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2258             /*ForceSimpleCall=*/true);
2259       }
2260       EmitOMPPrivateClause(S, LoopScope);
2261       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2262       EmitOMPReductionClauseInit(S, LoopScope);
2263       EmitOMPPrivateLoopCounters(S, LoopScope);
2264       EmitOMPLinearClause(S, LoopScope);
2265       (void)LoopScope.Privatize();
2266 
2267       // Detect the loop schedule kind and chunk.
2268       llvm::Value *Chunk = nullptr;
2269       OpenMPScheduleTy ScheduleKind;
2270       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2271         ScheduleKind.Schedule = C->getScheduleKind();
2272         ScheduleKind.M1 = C->getFirstScheduleModifier();
2273         ScheduleKind.M2 = C->getSecondScheduleModifier();
2274         if (const auto *Ch = C->getChunkSize()) {
2275           Chunk = EmitScalarExpr(Ch);
2276           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2277                                        S.getIterationVariable()->getType(),
2278                                        S.getLocStart());
2279         }
2280       }
2281       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2282       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2283       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2284       // If the static schedule kind is specified or if the ordered clause is
2285       // specified, and if no monotonic modifier is specified, the effect will
2286       // be as if the monotonic modifier was specified.
2287       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2288                                 /* Chunked */ Chunk != nullptr) &&
2289           !Ordered) {
2290         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2291           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2292         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2293         // When no chunk_size is specified, the iteration space is divided into
2294         // chunks that are approximately equal in size, and at most one chunk is
2295         // distributed to each thread. Note that the size of the chunks is
2296         // unspecified in this case.
2297         CGOpenMPRuntime::StaticRTInput StaticInit(
2298             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
2299             UB.getAddress(), ST.getAddress());
2300         RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
2301                              ScheduleKind, StaticInit);
2302         auto LoopExit =
2303             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2304         // UB = min(UB, GlobalUB);
2305         EmitIgnoredExpr(S.getEnsureUpperBound());
2306         // IV = LB;
2307         EmitIgnoredExpr(S.getInit());
2308         // while (idx <= UB) { BODY; ++idx; }
2309         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2310                          S.getInc(),
2311                          [&S, LoopExit](CodeGenFunction &CGF) {
2312                            CGF.EmitOMPLoopBody(S, LoopExit);
2313                            CGF.EmitStopPoint(&S);
2314                          },
2315                          [](CodeGenFunction &) {});
2316         EmitBlock(LoopExit.getBlock());
2317         // Tell the runtime we are done.
2318         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2319           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2320                                                          S.getDirectiveKind());
2321         };
2322         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2323       } else {
2324         const bool IsMonotonic =
2325             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2326             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2327             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2328             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2329         // Emit the outer loop, which requests its work chunk [LB..UB] from
2330         // runtime and runs the inner loop to process it.
2331         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2332                                              ST.getAddress(), IL.getAddress(),
2333                                              Chunk, EUB);
2334         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2335                             LoopArguments, CGDispatchBounds);
2336       }
2337       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2338         EmitOMPSimdFinal(S,
2339                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2340                            return CGF.Builder.CreateIsNotNull(
2341                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2342                          });
2343       }
2344       EmitOMPReductionClauseFinal(
2345           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2346                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2347                  : /*Parallel only*/ OMPD_parallel);
2348       // Emit post-update of the reduction variables if IsLastIter != 0.
2349       emitPostUpdateForReductionClause(
2350           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2351             return CGF.Builder.CreateIsNotNull(
2352                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2353           });
2354       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2355       if (HasLastprivateClause)
2356         EmitOMPLastprivateClauseFinal(
2357             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2358             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2359     }
2360     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2361       return CGF.Builder.CreateIsNotNull(
2362           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2363     });
2364     // We're now done with the loop, so jump to the continuation block.
2365     if (ContBlock) {
2366       EmitBranch(ContBlock);
2367       EmitBlock(ContBlock, true);
2368     }
2369   }
2370   return HasLastprivateClause;
2371 }
2372 
2373 /// The following two functions generate expressions for the loop lower
2374 /// and upper bounds in case of static and dynamic (dispatch) schedule
2375 /// of the associated 'for' or 'distribute' loop.
2376 static std::pair<LValue, LValue>
2377 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2378   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2379   LValue LB =
2380       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2381   LValue UB =
2382       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2383   return {LB, UB};
2384 }
2385 
2386 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2387 /// consider the lower and upper bound expressions generated by the
2388 /// worksharing loop support, but we use 0 and the iteration space size as
2389 /// constants
2390 static std::pair<llvm::Value *, llvm::Value *>
2391 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2392                           Address LB, Address UB) {
2393   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2394   const Expr *IVExpr = LS.getIterationVariable();
2395   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2396   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2397   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2398   return {LBVal, UBVal};
2399 }
2400 
2401 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2402   bool HasLastprivates = false;
2403   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2404                                           PrePostActionTy &) {
2405     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2406     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2407                                                  emitForLoopBounds,
2408                                                  emitDispatchForLoopBounds);
2409   };
2410   {
2411     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2412     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2413                                                 S.hasCancel());
2414   }
2415 
2416   // Emit an implicit barrier at the end.
2417   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2418     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2419   }
2420 }
2421 
2422 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2423   bool HasLastprivates = false;
2424   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2425                                           PrePostActionTy &) {
2426     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2427                                                  emitForLoopBounds,
2428                                                  emitDispatchForLoopBounds);
2429   };
2430   {
2431     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2432     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2433   }
2434 
2435   // Emit an implicit barrier at the end.
2436   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2437     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2438   }
2439 }
2440 
2441 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2442                                 const Twine &Name,
2443                                 llvm::Value *Init = nullptr) {
2444   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2445   if (Init)
2446     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2447   return LVal;
2448 }
2449 
2450 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2451   const Stmt *Stmt = S.getInnermostCapturedStmt()->getCapturedStmt();
2452   const auto *CS = dyn_cast<CompoundStmt>(Stmt);
2453   bool HasLastprivates = false;
2454   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2455                                                     PrePostActionTy &) {
2456     auto &C = CGF.CGM.getContext();
2457     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2458     // Emit helper vars inits.
2459     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2460                                   CGF.Builder.getInt32(0));
2461     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2462                                       : CGF.Builder.getInt32(0);
2463     LValue UB =
2464         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2465     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2466                                   CGF.Builder.getInt32(1));
2467     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2468                                   CGF.Builder.getInt32(0));
2469     // Loop counter.
2470     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2471     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2472     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2473     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2474     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2475     // Generate condition for loop.
2476     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2477                         OK_Ordinary, S.getLocStart(), FPOptions());
2478     // Increment for loop counter.
2479     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2480                       S.getLocStart(), true);
2481     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2482       // Iterate through all sections and emit a switch construct:
2483       // switch (IV) {
2484       //   case 0:
2485       //     <SectionStmt[0]>;
2486       //     break;
2487       // ...
2488       //   case <NumSection> - 1:
2489       //     <SectionStmt[<NumSection> - 1]>;
2490       //     break;
2491       // }
2492       // .omp.sections.exit:
2493       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2494       auto *SwitchStmt =
2495           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()),
2496                                    ExitBB, CS == nullptr ? 1 : CS->size());
2497       if (CS) {
2498         unsigned CaseNumber = 0;
2499         for (auto *SubStmt : CS->children()) {
2500           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2501           CGF.EmitBlock(CaseBB);
2502           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2503           CGF.EmitStmt(SubStmt);
2504           CGF.EmitBranch(ExitBB);
2505           ++CaseNumber;
2506         }
2507       } else {
2508         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2509         CGF.EmitBlock(CaseBB);
2510         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2511         CGF.EmitStmt(Stmt);
2512         CGF.EmitBranch(ExitBB);
2513       }
2514       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2515     };
2516 
2517     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2518     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2519       // Emit implicit barrier to synchronize threads and avoid data races on
2520       // initialization of firstprivate variables and post-update of lastprivate
2521       // variables.
2522       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2523           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2524           /*ForceSimpleCall=*/true);
2525     }
2526     CGF.EmitOMPPrivateClause(S, LoopScope);
2527     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2528     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2529     (void)LoopScope.Privatize();
2530 
2531     // Emit static non-chunked loop.
2532     OpenMPScheduleTy ScheduleKind;
2533     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2534     CGOpenMPRuntime::StaticRTInput StaticInit(
2535         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
2536         LB.getAddress(), UB.getAddress(), ST.getAddress());
2537     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2538         CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2539     // UB = min(UB, GlobalUB);
2540     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2541     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2542         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2543     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2544     // IV = LB;
2545     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2546     // while (idx <= UB) { BODY; ++idx; }
2547     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2548                          [](CodeGenFunction &) {});
2549     // Tell the runtime we are done.
2550     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2551       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
2552                                                      S.getDirectiveKind());
2553     };
2554     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2555     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2556     // Emit post-update of the reduction variables if IsLastIter != 0.
2557     emitPostUpdateForReductionClause(
2558         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2559           return CGF.Builder.CreateIsNotNull(
2560               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2561         });
2562 
2563     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2564     if (HasLastprivates)
2565       CGF.EmitOMPLastprivateClauseFinal(
2566           S, /*NoFinals=*/false,
2567           CGF.Builder.CreateIsNotNull(
2568               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2569   };
2570 
2571   bool HasCancel = false;
2572   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2573     HasCancel = OSD->hasCancel();
2574   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2575     HasCancel = OPSD->hasCancel();
2576   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2577   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2578                                               HasCancel);
2579   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2580   // clause. Otherwise the barrier will be generated by the codegen for the
2581   // directive.
2582   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2583     // Emit implicit barrier to synchronize threads and avoid data races on
2584     // initialization of firstprivate variables.
2585     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2586                                            OMPD_unknown);
2587   }
2588 }
2589 
2590 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2591   {
2592     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2593     EmitSections(S);
2594   }
2595   // Emit an implicit barrier at the end.
2596   if (!S.getSingleClause<OMPNowaitClause>()) {
2597     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2598                                            OMPD_sections);
2599   }
2600 }
2601 
2602 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2603   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2604     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2605   };
2606   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2607   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2608                                               S.hasCancel());
2609 }
2610 
2611 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2612   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2613   llvm::SmallVector<const Expr *, 8> DestExprs;
2614   llvm::SmallVector<const Expr *, 8> SrcExprs;
2615   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2616   // Check if there are any 'copyprivate' clauses associated with this
2617   // 'single' construct.
2618   // Build a list of copyprivate variables along with helper expressions
2619   // (<source>, <destination>, <destination>=<source> expressions)
2620   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2621     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2622     DestExprs.append(C->destination_exprs().begin(),
2623                      C->destination_exprs().end());
2624     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2625     AssignmentOps.append(C->assignment_ops().begin(),
2626                          C->assignment_ops().end());
2627   }
2628   // Emit code for 'single' region along with 'copyprivate' clauses
2629   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2630     Action.Enter(CGF);
2631     OMPPrivateScope SingleScope(CGF);
2632     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2633     CGF.EmitOMPPrivateClause(S, SingleScope);
2634     (void)SingleScope.Privatize();
2635     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2636   };
2637   {
2638     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2639     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2640                                             CopyprivateVars, DestExprs,
2641                                             SrcExprs, AssignmentOps);
2642   }
2643   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2644   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2645   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2646     CGM.getOpenMPRuntime().emitBarrierCall(
2647         *this, S.getLocStart(),
2648         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2649   }
2650 }
2651 
2652 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2653   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2654     Action.Enter(CGF);
2655     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2656   };
2657   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2658   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2659 }
2660 
2661 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2662   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2663     Action.Enter(CGF);
2664     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
2665   };
2666   Expr *Hint = nullptr;
2667   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2668     Hint = HintClause->getHint();
2669   OMPLexicalScope Scope(*this, S, OMPD_unknown);
2670   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2671                                             S.getDirectiveName().getAsString(),
2672                                             CodeGen, S.getLocStart(), Hint);
2673 }
2674 
2675 void CodeGenFunction::EmitOMPParallelForDirective(
2676     const OMPParallelForDirective &S) {
2677   // Emit directive as a combined directive that consists of two implicit
2678   // directives: 'parallel' with 'for' directive.
2679   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2680     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2681     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2682                                emitDispatchForLoopBounds);
2683   };
2684   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2685                                  emitEmptyBoundParameters);
2686 }
2687 
2688 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2689     const OMPParallelForSimdDirective &S) {
2690   // Emit directive as a combined directive that consists of two implicit
2691   // directives: 'parallel' with 'for' directive.
2692   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2693     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2694                                emitDispatchForLoopBounds);
2695   };
2696   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2697                                  emitEmptyBoundParameters);
2698 }
2699 
2700 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2701     const OMPParallelSectionsDirective &S) {
2702   // Emit directive as a combined directive that consists of two implicit
2703   // directives: 'parallel' with 'sections' directive.
2704   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2705     CGF.EmitSections(S);
2706   };
2707   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2708                                  emitEmptyBoundParameters);
2709 }
2710 
2711 void CodeGenFunction::EmitOMPTaskBasedDirective(
2712     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
2713     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
2714     OMPTaskDataTy &Data) {
2715   // Emit outlined function for task construct.
2716   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
2717   auto *I = CS->getCapturedDecl()->param_begin();
2718   auto *PartId = std::next(I);
2719   auto *TaskT = std::next(I, 4);
2720   // Check if the task is final
2721   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2722     // If the condition constant folds and can be elided, try to avoid emitting
2723     // the condition and the dead arm of the if/else.
2724     auto *Cond = Clause->getCondition();
2725     bool CondConstant;
2726     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2727       Data.Final.setInt(CondConstant);
2728     else
2729       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2730   } else {
2731     // By default the task is not final.
2732     Data.Final.setInt(/*IntVal=*/false);
2733   }
2734   // Check if the task has 'priority' clause.
2735   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2736     auto *Prio = Clause->getPriority();
2737     Data.Priority.setInt(/*IntVal=*/true);
2738     Data.Priority.setPointer(EmitScalarConversion(
2739         EmitScalarExpr(Prio), Prio->getType(),
2740         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2741         Prio->getExprLoc()));
2742   }
2743   // The first function argument for tasks is a thread id, the second one is a
2744   // part id (0 for tied tasks, >=0 for untied task).
2745   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2746   // Get list of private variables.
2747   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2748     auto IRef = C->varlist_begin();
2749     for (auto *IInit : C->private_copies()) {
2750       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2751       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2752         Data.PrivateVars.push_back(*IRef);
2753         Data.PrivateCopies.push_back(IInit);
2754       }
2755       ++IRef;
2756     }
2757   }
2758   EmittedAsPrivate.clear();
2759   // Get list of firstprivate variables.
2760   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2761     auto IRef = C->varlist_begin();
2762     auto IElemInitRef = C->inits().begin();
2763     for (auto *IInit : C->private_copies()) {
2764       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2765       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2766         Data.FirstprivateVars.push_back(*IRef);
2767         Data.FirstprivateCopies.push_back(IInit);
2768         Data.FirstprivateInits.push_back(*IElemInitRef);
2769       }
2770       ++IRef;
2771       ++IElemInitRef;
2772     }
2773   }
2774   // Get list of lastprivate variables (for taskloops).
2775   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2776   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2777     auto IRef = C->varlist_begin();
2778     auto ID = C->destination_exprs().begin();
2779     for (auto *IInit : C->private_copies()) {
2780       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2781       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2782         Data.LastprivateVars.push_back(*IRef);
2783         Data.LastprivateCopies.push_back(IInit);
2784       }
2785       LastprivateDstsOrigs.insert(
2786           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2787            cast<DeclRefExpr>(*IRef)});
2788       ++IRef;
2789       ++ID;
2790     }
2791   }
2792   SmallVector<const Expr *, 4> LHSs;
2793   SmallVector<const Expr *, 4> RHSs;
2794   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2795     auto IPriv = C->privates().begin();
2796     auto IRed = C->reduction_ops().begin();
2797     auto ILHS = C->lhs_exprs().begin();
2798     auto IRHS = C->rhs_exprs().begin();
2799     for (const auto *Ref : C->varlists()) {
2800       Data.ReductionVars.emplace_back(Ref);
2801       Data.ReductionCopies.emplace_back(*IPriv);
2802       Data.ReductionOps.emplace_back(*IRed);
2803       LHSs.emplace_back(*ILHS);
2804       RHSs.emplace_back(*IRHS);
2805       std::advance(IPriv, 1);
2806       std::advance(IRed, 1);
2807       std::advance(ILHS, 1);
2808       std::advance(IRHS, 1);
2809     }
2810   }
2811   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2812       *this, S.getLocStart(), LHSs, RHSs, Data);
2813   // Build list of dependences.
2814   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2815     for (auto *IRef : C->varlists())
2816       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2817   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
2818                     CapturedRegion](CodeGenFunction &CGF,
2819                                     PrePostActionTy &Action) {
2820     // Set proper addresses for generated private copies.
2821     OMPPrivateScope Scope(CGF);
2822     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2823         !Data.LastprivateVars.empty()) {
2824       enum { PrivatesParam = 2, CopyFnParam = 3 };
2825       auto *CopyFn = CGF.Builder.CreateLoad(
2826           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2827       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2828           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2829       // Map privates.
2830       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2831       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2832       CallArgs.push_back(PrivatesPtr);
2833       for (auto *E : Data.PrivateVars) {
2834         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2835         Address PrivatePtr = CGF.CreateMemTemp(
2836             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2837         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2838         CallArgs.push_back(PrivatePtr.getPointer());
2839       }
2840       for (auto *E : Data.FirstprivateVars) {
2841         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2842         Address PrivatePtr =
2843             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2844                               ".firstpriv.ptr.addr");
2845         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2846         CallArgs.push_back(PrivatePtr.getPointer());
2847       }
2848       for (auto *E : Data.LastprivateVars) {
2849         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2850         Address PrivatePtr =
2851             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2852                               ".lastpriv.ptr.addr");
2853         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2854         CallArgs.push_back(PrivatePtr.getPointer());
2855       }
2856       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
2857                                                           CopyFn, CallArgs);
2858       for (auto &&Pair : LastprivateDstsOrigs) {
2859         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2860         DeclRefExpr DRE(
2861             const_cast<VarDecl *>(OrigVD),
2862             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2863                 OrigVD) != nullptr,
2864             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2865         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2866           return CGF.EmitLValue(&DRE).getAddress();
2867         });
2868       }
2869       for (auto &&Pair : PrivatePtrs) {
2870         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2871                             CGF.getContext().getDeclAlign(Pair.first));
2872         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2873       }
2874     }
2875     if (Data.Reductions) {
2876       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
2877       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2878                              Data.ReductionOps);
2879       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2880           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2881       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2882         RedCG.emitSharedLValue(CGF, Cnt);
2883         RedCG.emitAggregateType(CGF, Cnt);
2884         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2885             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2886         Replacement =
2887             Address(CGF.EmitScalarConversion(
2888                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2889                         CGF.getContext().getPointerType(
2890                             Data.ReductionCopies[Cnt]->getType()),
2891                         Data.ReductionCopies[Cnt]->getExprLoc()),
2892                     Replacement.getAlignment());
2893         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2894         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2895                          [Replacement]() { return Replacement; });
2896         // FIXME: This must removed once the runtime library is fixed.
2897         // Emit required threadprivate variables for
2898         // initilizer/combiner/finalizer.
2899         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2900                                                            RedCG, Cnt);
2901       }
2902     }
2903     // Privatize all private variables except for in_reduction items.
2904     (void)Scope.Privatize();
2905     SmallVector<const Expr *, 4> InRedVars;
2906     SmallVector<const Expr *, 4> InRedPrivs;
2907     SmallVector<const Expr *, 4> InRedOps;
2908     SmallVector<const Expr *, 4> TaskgroupDescriptors;
2909     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
2910       auto IPriv = C->privates().begin();
2911       auto IRed = C->reduction_ops().begin();
2912       auto ITD = C->taskgroup_descriptors().begin();
2913       for (const auto *Ref : C->varlists()) {
2914         InRedVars.emplace_back(Ref);
2915         InRedPrivs.emplace_back(*IPriv);
2916         InRedOps.emplace_back(*IRed);
2917         TaskgroupDescriptors.emplace_back(*ITD);
2918         std::advance(IPriv, 1);
2919         std::advance(IRed, 1);
2920         std::advance(ITD, 1);
2921       }
2922     }
2923     // Privatize in_reduction items here, because taskgroup descriptors must be
2924     // privatized earlier.
2925     OMPPrivateScope InRedScope(CGF);
2926     if (!InRedVars.empty()) {
2927       ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
2928       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
2929         RedCG.emitSharedLValue(CGF, Cnt);
2930         RedCG.emitAggregateType(CGF, Cnt);
2931         // The taskgroup descriptor variable is always implicit firstprivate and
2932         // privatized already during procoessing of the firstprivates.
2933         llvm::Value *ReductionsPtr =
2934             CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
2935                                  TaskgroupDescriptors[Cnt]->getExprLoc());
2936         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2937             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2938         Replacement = Address(
2939             CGF.EmitScalarConversion(
2940                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2941                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
2942                 InRedPrivs[Cnt]->getExprLoc()),
2943             Replacement.getAlignment());
2944         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2945         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
2946                               [Replacement]() { return Replacement; });
2947         // FIXME: This must removed once the runtime library is fixed.
2948         // Emit required threadprivate variables for
2949         // initilizer/combiner/finalizer.
2950         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2951                                                            RedCG, Cnt);
2952       }
2953     }
2954     (void)InRedScope.Privatize();
2955 
2956     Action.Enter(CGF);
2957     BodyGen(CGF);
2958   };
2959   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2960       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2961       Data.NumberOfParts);
2962   OMPLexicalScope Scope(*this, S);
2963   TaskGen(*this, OutlinedFn, Data);
2964 }
2965 
2966 static ImplicitParamDecl *
2967 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
2968                                   QualType Ty, CapturedDecl *CD,
2969                                   SourceLocation Loc) {
2970   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
2971                                            ImplicitParamDecl::Other);
2972   auto *OrigRef = DeclRefExpr::Create(
2973       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
2974       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
2975   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
2976                                               ImplicitParamDecl::Other);
2977   auto *PrivateRef = DeclRefExpr::Create(
2978       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
2979       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
2980   QualType ElemType = C.getBaseElementType(Ty);
2981   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
2982                                            ImplicitParamDecl::Other);
2983   auto *InitRef = DeclRefExpr::Create(
2984       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
2985       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
2986   PrivateVD->setInitStyle(VarDecl::CInit);
2987   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
2988                                               InitRef, /*BasePath=*/nullptr,
2989                                               VK_RValue));
2990   Data.FirstprivateVars.emplace_back(OrigRef);
2991   Data.FirstprivateCopies.emplace_back(PrivateRef);
2992   Data.FirstprivateInits.emplace_back(InitRef);
2993   return OrigVD;
2994 }
2995 
2996 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
2997     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
2998     OMPTargetDataInfo &InputInfo) {
2999   // Emit outlined function for task construct.
3000   auto CS = S.getCapturedStmt(OMPD_task);
3001   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3002   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3003   auto *I = CS->getCapturedDecl()->param_begin();
3004   auto *PartId = std::next(I);
3005   auto *TaskT = std::next(I, 4);
3006   OMPTaskDataTy Data;
3007   // The task is not final.
3008   Data.Final.setInt(/*IntVal=*/false);
3009   // Get list of firstprivate variables.
3010   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3011     auto IRef = C->varlist_begin();
3012     auto IElemInitRef = C->inits().begin();
3013     for (auto *IInit : C->private_copies()) {
3014       Data.FirstprivateVars.push_back(*IRef);
3015       Data.FirstprivateCopies.push_back(IInit);
3016       Data.FirstprivateInits.push_back(*IElemInitRef);
3017       ++IRef;
3018       ++IElemInitRef;
3019     }
3020   }
3021   OMPPrivateScope TargetScope(*this);
3022   VarDecl *BPVD = nullptr;
3023   VarDecl *PVD = nullptr;
3024   VarDecl *SVD = nullptr;
3025   if (InputInfo.NumberOfTargetItems > 0) {
3026     auto *CD = CapturedDecl::Create(
3027         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
3028     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
3029     QualType BaseAndPointersType = getContext().getConstantArrayType(
3030         getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
3031         /*IndexTypeQuals=*/0);
3032     BPVD = createImplicitFirstprivateForType(
3033         getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
3034     PVD = createImplicitFirstprivateForType(
3035         getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
3036     QualType SizesType = getContext().getConstantArrayType(
3037         getContext().getSizeType(), ArrSize, ArrayType::Normal,
3038         /*IndexTypeQuals=*/0);
3039     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
3040                                             S.getLocStart());
3041     TargetScope.addPrivate(
3042         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
3043     TargetScope.addPrivate(PVD,
3044                            [&InputInfo]() { return InputInfo.PointersArray; });
3045     TargetScope.addPrivate(SVD,
3046                            [&InputInfo]() { return InputInfo.SizesArray; });
3047   }
3048   (void)TargetScope.Privatize();
3049   // Build list of dependences.
3050   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
3051     for (auto *IRef : C->varlists())
3052       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
3053   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
3054                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
3055     // Set proper addresses for generated private copies.
3056     OMPPrivateScope Scope(CGF);
3057     if (!Data.FirstprivateVars.empty()) {
3058       enum { PrivatesParam = 2, CopyFnParam = 3 };
3059       auto *CopyFn = CGF.Builder.CreateLoad(
3060           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
3061       auto *PrivatesPtr = CGF.Builder.CreateLoad(
3062           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
3063       // Map privates.
3064       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3065       llvm::SmallVector<llvm::Value *, 16> CallArgs;
3066       CallArgs.push_back(PrivatesPtr);
3067       for (auto *E : Data.FirstprivateVars) {
3068         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3069         Address PrivatePtr =
3070             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3071                               ".firstpriv.ptr.addr");
3072         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
3073         CallArgs.push_back(PrivatePtr.getPointer());
3074       }
3075       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3076                                                           CopyFn, CallArgs);
3077       for (auto &&Pair : PrivatePtrs) {
3078         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3079                             CGF.getContext().getDeclAlign(Pair.first));
3080         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3081       }
3082     }
3083     // Privatize all private variables except for in_reduction items.
3084     (void)Scope.Privatize();
3085     if (InputInfo.NumberOfTargetItems > 0) {
3086       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
3087           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
3088       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
3089           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
3090       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
3091           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
3092     }
3093 
3094     Action.Enter(CGF);
3095     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
3096     BodyGen(CGF);
3097   };
3098   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3099       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
3100       Data.NumberOfParts);
3101   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
3102   IntegerLiteral IfCond(getContext(), TrueOrFalse,
3103                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3104                         SourceLocation());
3105 
3106   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn,
3107                                       SharedsTy, CapturedStruct, &IfCond, Data);
3108 }
3109 
3110 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
3111   // Emit outlined function for task construct.
3112   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3113   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3114   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3115   const Expr *IfCond = nullptr;
3116   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3117     if (C->getNameModifier() == OMPD_unknown ||
3118         C->getNameModifier() == OMPD_task) {
3119       IfCond = C->getCondition();
3120       break;
3121     }
3122   }
3123 
3124   OMPTaskDataTy Data;
3125   // Check if we should emit tied or untied task.
3126   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
3127   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3128     CGF.EmitStmt(CS->getCapturedStmt());
3129   };
3130   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3131                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3132                             const OMPTaskDataTy &Data) {
3133     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
3134                                             SharedsTy, CapturedStruct, IfCond,
3135                                             Data);
3136   };
3137   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
3138 }
3139 
3140 void CodeGenFunction::EmitOMPTaskyieldDirective(
3141     const OMPTaskyieldDirective &S) {
3142   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
3143 }
3144 
3145 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3146   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
3147 }
3148 
3149 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3150   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
3151 }
3152 
3153 void CodeGenFunction::EmitOMPTaskgroupDirective(
3154     const OMPTaskgroupDirective &S) {
3155   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3156     Action.Enter(CGF);
3157     if (const Expr *E = S.getReductionRef()) {
3158       SmallVector<const Expr *, 4> LHSs;
3159       SmallVector<const Expr *, 4> RHSs;
3160       OMPTaskDataTy Data;
3161       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
3162         auto IPriv = C->privates().begin();
3163         auto IRed = C->reduction_ops().begin();
3164         auto ILHS = C->lhs_exprs().begin();
3165         auto IRHS = C->rhs_exprs().begin();
3166         for (const auto *Ref : C->varlists()) {
3167           Data.ReductionVars.emplace_back(Ref);
3168           Data.ReductionCopies.emplace_back(*IPriv);
3169           Data.ReductionOps.emplace_back(*IRed);
3170           LHSs.emplace_back(*ILHS);
3171           RHSs.emplace_back(*IRHS);
3172           std::advance(IPriv, 1);
3173           std::advance(IRed, 1);
3174           std::advance(ILHS, 1);
3175           std::advance(IRHS, 1);
3176         }
3177       }
3178       llvm::Value *ReductionDesc =
3179           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
3180                                                            LHSs, RHSs, Data);
3181       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3182       CGF.EmitVarDecl(*VD);
3183       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3184                             /*Volatile=*/false, E->getType());
3185     }
3186     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3187   };
3188   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3189   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3190 }
3191 
3192 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3193   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3194     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3195       return llvm::makeArrayRef(FlushClause->varlist_begin(),
3196                                 FlushClause->varlist_end());
3197     }
3198     return llvm::None;
3199   }(), S.getLocStart());
3200 }
3201 
3202 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3203                                             const CodeGenLoopTy &CodeGenLoop,
3204                                             Expr *IncExpr) {
3205   // Emit the loop iteration variable.
3206   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3207   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3208   EmitVarDecl(*IVDecl);
3209 
3210   // Emit the iterations count variable.
3211   // If it is not a variable, Sema decided to calculate iterations count on each
3212   // iteration (e.g., it is foldable into a constant).
3213   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3214     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3215     // Emit calculation of the iterations count.
3216     EmitIgnoredExpr(S.getCalcLastIteration());
3217   }
3218 
3219   auto &RT = CGM.getOpenMPRuntime();
3220 
3221   bool HasLastprivateClause = false;
3222   // Check pre-condition.
3223   {
3224     OMPLoopScope PreInitScope(*this, S);
3225     // Skip the entire loop if we don't meet the precondition.
3226     // If the condition constant folds and can be elided, avoid emitting the
3227     // whole loop.
3228     bool CondConstant;
3229     llvm::BasicBlock *ContBlock = nullptr;
3230     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3231       if (!CondConstant)
3232         return;
3233     } else {
3234       auto *ThenBlock = createBasicBlock("omp.precond.then");
3235       ContBlock = createBasicBlock("omp.precond.end");
3236       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3237                   getProfileCount(&S));
3238       EmitBlock(ThenBlock);
3239       incrementProfileCounter(&S);
3240     }
3241 
3242     emitAlignedClause(*this, S);
3243     // Emit 'then' code.
3244     {
3245       // Emit helper vars inits.
3246 
3247       LValue LB = EmitOMPHelperVar(
3248           *this, cast<DeclRefExpr>(
3249                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3250                           ? S.getCombinedLowerBoundVariable()
3251                           : S.getLowerBoundVariable())));
3252       LValue UB = EmitOMPHelperVar(
3253           *this, cast<DeclRefExpr>(
3254                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3255                           ? S.getCombinedUpperBoundVariable()
3256                           : S.getUpperBoundVariable())));
3257       LValue ST =
3258           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3259       LValue IL =
3260           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3261 
3262       OMPPrivateScope LoopScope(*this);
3263       if (EmitOMPFirstprivateClause(S, LoopScope)) {
3264         // Emit implicit barrier to synchronize threads and avoid data races
3265         // on initialization of firstprivate variables and post-update of
3266         // lastprivate variables.
3267         CGM.getOpenMPRuntime().emitBarrierCall(
3268             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3269             /*ForceSimpleCall=*/true);
3270       }
3271       EmitOMPPrivateClause(S, LoopScope);
3272       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3273           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3274           !isOpenMPTeamsDirective(S.getDirectiveKind()))
3275         EmitOMPReductionClauseInit(S, LoopScope);
3276       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3277       EmitOMPPrivateLoopCounters(S, LoopScope);
3278       (void)LoopScope.Privatize();
3279 
3280       // Detect the distribute schedule kind and chunk.
3281       llvm::Value *Chunk = nullptr;
3282       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3283       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3284         ScheduleKind = C->getDistScheduleKind();
3285         if (const auto *Ch = C->getChunkSize()) {
3286           Chunk = EmitScalarExpr(Ch);
3287           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3288                                        S.getIterationVariable()->getType(),
3289                                        S.getLocStart());
3290         }
3291       }
3292       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3293       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3294 
3295       // OpenMP [2.10.8, distribute Construct, Description]
3296       // If dist_schedule is specified, kind must be static. If specified,
3297       // iterations are divided into chunks of size chunk_size, chunks are
3298       // assigned to the teams of the league in a round-robin fashion in the
3299       // order of the team number. When no chunk_size is specified, the
3300       // iteration space is divided into chunks that are approximately equal
3301       // in size, and at most one chunk is distributed to each team of the
3302       // league. The size of the chunks is unspecified in this case.
3303       if (RT.isStaticNonchunked(ScheduleKind,
3304                                 /* Chunked */ Chunk != nullptr)) {
3305         if (isOpenMPSimdDirective(S.getDirectiveKind()))
3306           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
3307         CGOpenMPRuntime::StaticRTInput StaticInit(
3308             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
3309             LB.getAddress(), UB.getAddress(), ST.getAddress());
3310         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3311                                     StaticInit);
3312         auto LoopExit =
3313             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3314         // UB = min(UB, GlobalUB);
3315         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3316                             ? S.getCombinedEnsureUpperBound()
3317                             : S.getEnsureUpperBound());
3318         // IV = LB;
3319         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3320                             ? S.getCombinedInit()
3321                             : S.getInit());
3322 
3323         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3324                          ? S.getCombinedCond()
3325                          : S.getCond();
3326 
3327         // for distribute alone,  codegen
3328         // while (idx <= UB) { BODY; ++idx; }
3329         // when combined with 'for' (e.g. as in 'distribute parallel for')
3330         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3331         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3332                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3333                            CodeGenLoop(CGF, S, LoopExit);
3334                          },
3335                          [](CodeGenFunction &) {});
3336         EmitBlock(LoopExit.getBlock());
3337         // Tell the runtime we are done.
3338         RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
3339       } else {
3340         // Emit the outer loop, which requests its work chunk [LB..UB] from
3341         // runtime and runs the inner loop to process it.
3342         const OMPLoopArguments LoopArguments = {
3343             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3344             Chunk};
3345         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3346                                    CodeGenLoop);
3347       }
3348       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3349         EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3350           return CGF.Builder.CreateIsNotNull(
3351               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3352         });
3353       }
3354       OpenMPDirectiveKind ReductionKind = OMPD_unknown;
3355       if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
3356           isOpenMPSimdDirective(S.getDirectiveKind())) {
3357         ReductionKind = OMPD_parallel_for_simd;
3358       } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3359         ReductionKind = OMPD_parallel_for;
3360       } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3361         ReductionKind = OMPD_simd;
3362       } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
3363                  S.hasClausesOfKind<OMPReductionClause>()) {
3364         llvm_unreachable(
3365             "No reduction clauses is allowed in distribute directive.");
3366       }
3367       EmitOMPReductionClauseFinal(S, ReductionKind);
3368       // Emit post-update of the reduction variables if IsLastIter != 0.
3369       emitPostUpdateForReductionClause(
3370           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
3371             return CGF.Builder.CreateIsNotNull(
3372                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
3373           });
3374       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3375       if (HasLastprivateClause) {
3376         EmitOMPLastprivateClauseFinal(
3377             S, /*NoFinals=*/false,
3378             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
3379       }
3380     }
3381 
3382     // We're now done with the loop, so jump to the continuation block.
3383     if (ContBlock) {
3384       EmitBranch(ContBlock);
3385       EmitBlock(ContBlock, true);
3386     }
3387   }
3388 }
3389 
3390 void CodeGenFunction::EmitOMPDistributeDirective(
3391     const OMPDistributeDirective &S) {
3392   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3393 
3394     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3395   };
3396   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3397   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3398 }
3399 
3400 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3401                                                    const CapturedStmt *S) {
3402   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3403   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3404   CGF.CapturedStmtInfo = &CapStmtInfo;
3405   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3406   Fn->addFnAttr(llvm::Attribute::NoInline);
3407   return Fn;
3408 }
3409 
3410 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3411   if (S.hasClausesOfKind<OMPDependClause>()) {
3412     assert(!S.getAssociatedStmt() &&
3413            "No associated statement must be in ordered depend construct.");
3414     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3415       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3416     return;
3417   }
3418   auto *C = S.getSingleClause<OMPSIMDClause>();
3419   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3420                                  PrePostActionTy &Action) {
3421     const CapturedStmt *CS = S.getInnermostCapturedStmt();
3422     if (C) {
3423       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3424       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3425       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3426       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
3427                                                       OutlinedFn, CapturedVars);
3428     } else {
3429       Action.Enter(CGF);
3430       CGF.EmitStmt(CS->getCapturedStmt());
3431     }
3432   };
3433   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3434   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3435 }
3436 
3437 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3438                                          QualType SrcType, QualType DestType,
3439                                          SourceLocation Loc) {
3440   assert(CGF.hasScalarEvaluationKind(DestType) &&
3441          "DestType must have scalar evaluation kind.");
3442   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3443   return Val.isScalar()
3444              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3445                                         Loc)
3446              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3447                                                  DestType, Loc);
3448 }
3449 
3450 static CodeGenFunction::ComplexPairTy
3451 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3452                       QualType DestType, SourceLocation Loc) {
3453   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3454          "DestType must have complex evaluation kind.");
3455   CodeGenFunction::ComplexPairTy ComplexVal;
3456   if (Val.isScalar()) {
3457     // Convert the input element to the element type of the complex.
3458     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3459     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3460                                               DestElementType, Loc);
3461     ComplexVal = CodeGenFunction::ComplexPairTy(
3462         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3463   } else {
3464     assert(Val.isComplex() && "Must be a scalar or complex.");
3465     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3466     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3467     ComplexVal.first = CGF.EmitScalarConversion(
3468         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3469     ComplexVal.second = CGF.EmitScalarConversion(
3470         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3471   }
3472   return ComplexVal;
3473 }
3474 
3475 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3476                                   LValue LVal, RValue RVal) {
3477   if (LVal.isGlobalReg()) {
3478     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3479   } else {
3480     CGF.EmitAtomicStore(RVal, LVal,
3481                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3482                                  : llvm::AtomicOrdering::Monotonic,
3483                         LVal.isVolatile(), /*IsInit=*/false);
3484   }
3485 }
3486 
3487 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3488                                          QualType RValTy, SourceLocation Loc) {
3489   switch (getEvaluationKind(LVal.getType())) {
3490   case TEK_Scalar:
3491     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3492                                *this, RVal, RValTy, LVal.getType(), Loc)),
3493                            LVal);
3494     break;
3495   case TEK_Complex:
3496     EmitStoreOfComplex(
3497         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3498         /*isInit=*/false);
3499     break;
3500   case TEK_Aggregate:
3501     llvm_unreachable("Must be a scalar or complex.");
3502   }
3503 }
3504 
3505 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3506                                   const Expr *X, const Expr *V,
3507                                   SourceLocation Loc) {
3508   // v = x;
3509   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3510   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3511   LValue XLValue = CGF.EmitLValue(X);
3512   LValue VLValue = CGF.EmitLValue(V);
3513   RValue Res = XLValue.isGlobalReg()
3514                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3515                    : CGF.EmitAtomicLoad(
3516                          XLValue, Loc,
3517                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3518                                   : llvm::AtomicOrdering::Monotonic,
3519                          XLValue.isVolatile());
3520   // OpenMP, 2.12.6, atomic Construct
3521   // Any atomic construct with a seq_cst clause forces the atomically
3522   // performed operation to include an implicit flush operation without a
3523   // list.
3524   if (IsSeqCst)
3525     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3526   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3527 }
3528 
3529 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3530                                    const Expr *X, const Expr *E,
3531                                    SourceLocation Loc) {
3532   // x = expr;
3533   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3534   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3535   // OpenMP, 2.12.6, atomic Construct
3536   // Any atomic construct with a seq_cst clause forces the atomically
3537   // performed operation to include an implicit flush operation without a
3538   // list.
3539   if (IsSeqCst)
3540     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3541 }
3542 
3543 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3544                                                 RValue Update,
3545                                                 BinaryOperatorKind BO,
3546                                                 llvm::AtomicOrdering AO,
3547                                                 bool IsXLHSInRHSPart) {
3548   auto &Context = CGF.CGM.getContext();
3549   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3550   // expression is simple and atomic is allowed for the given type for the
3551   // target platform.
3552   if (BO == BO_Comma || !Update.isScalar() ||
3553       !Update.getScalarVal()->getType()->isIntegerTy() ||
3554       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3555                         (Update.getScalarVal()->getType() !=
3556                          X.getAddress().getElementType())) ||
3557       !X.getAddress().getElementType()->isIntegerTy() ||
3558       !Context.getTargetInfo().hasBuiltinAtomic(
3559           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3560     return std::make_pair(false, RValue::get(nullptr));
3561 
3562   llvm::AtomicRMWInst::BinOp RMWOp;
3563   switch (BO) {
3564   case BO_Add:
3565     RMWOp = llvm::AtomicRMWInst::Add;
3566     break;
3567   case BO_Sub:
3568     if (!IsXLHSInRHSPart)
3569       return std::make_pair(false, RValue::get(nullptr));
3570     RMWOp = llvm::AtomicRMWInst::Sub;
3571     break;
3572   case BO_And:
3573     RMWOp = llvm::AtomicRMWInst::And;
3574     break;
3575   case BO_Or:
3576     RMWOp = llvm::AtomicRMWInst::Or;
3577     break;
3578   case BO_Xor:
3579     RMWOp = llvm::AtomicRMWInst::Xor;
3580     break;
3581   case BO_LT:
3582     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3583                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3584                                    : llvm::AtomicRMWInst::Max)
3585                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3586                                    : llvm::AtomicRMWInst::UMax);
3587     break;
3588   case BO_GT:
3589     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3590                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3591                                    : llvm::AtomicRMWInst::Min)
3592                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3593                                    : llvm::AtomicRMWInst::UMin);
3594     break;
3595   case BO_Assign:
3596     RMWOp = llvm::AtomicRMWInst::Xchg;
3597     break;
3598   case BO_Mul:
3599   case BO_Div:
3600   case BO_Rem:
3601   case BO_Shl:
3602   case BO_Shr:
3603   case BO_LAnd:
3604   case BO_LOr:
3605     return std::make_pair(false, RValue::get(nullptr));
3606   case BO_PtrMemD:
3607   case BO_PtrMemI:
3608   case BO_LE:
3609   case BO_GE:
3610   case BO_EQ:
3611   case BO_NE:
3612   case BO_Cmp:
3613   case BO_AddAssign:
3614   case BO_SubAssign:
3615   case BO_AndAssign:
3616   case BO_OrAssign:
3617   case BO_XorAssign:
3618   case BO_MulAssign:
3619   case BO_DivAssign:
3620   case BO_RemAssign:
3621   case BO_ShlAssign:
3622   case BO_ShrAssign:
3623   case BO_Comma:
3624     llvm_unreachable("Unsupported atomic update operation");
3625   }
3626   auto *UpdateVal = Update.getScalarVal();
3627   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3628     UpdateVal = CGF.Builder.CreateIntCast(
3629         IC, X.getAddress().getElementType(),
3630         X.getType()->hasSignedIntegerRepresentation());
3631   }
3632   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3633   return std::make_pair(true, RValue::get(Res));
3634 }
3635 
3636 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3637     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3638     llvm::AtomicOrdering AO, SourceLocation Loc,
3639     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3640   // Update expressions are allowed to have the following forms:
3641   // x binop= expr; -> xrval + expr;
3642   // x++, ++x -> xrval + 1;
3643   // x--, --x -> xrval - 1;
3644   // x = x binop expr; -> xrval binop expr
3645   // x = expr Op x; - > expr binop xrval;
3646   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3647   if (!Res.first) {
3648     if (X.isGlobalReg()) {
3649       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3650       // 'xrval'.
3651       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3652     } else {
3653       // Perform compare-and-swap procedure.
3654       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3655     }
3656   }
3657   return Res;
3658 }
3659 
3660 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3661                                     const Expr *X, const Expr *E,
3662                                     const Expr *UE, bool IsXLHSInRHSPart,
3663                                     SourceLocation Loc) {
3664   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3665          "Update expr in 'atomic update' must be a binary operator.");
3666   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3667   // Update expressions are allowed to have the following forms:
3668   // x binop= expr; -> xrval + expr;
3669   // x++, ++x -> xrval + 1;
3670   // x--, --x -> xrval - 1;
3671   // x = x binop expr; -> xrval binop expr
3672   // x = expr Op x; - > expr binop xrval;
3673   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3674   LValue XLValue = CGF.EmitLValue(X);
3675   RValue ExprRValue = CGF.EmitAnyExpr(E);
3676   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3677                      : llvm::AtomicOrdering::Monotonic;
3678   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3679   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3680   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3681   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3682   auto Gen =
3683       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3684         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3685         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3686         return CGF.EmitAnyExpr(UE);
3687       };
3688   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3689       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3690   // OpenMP, 2.12.6, atomic Construct
3691   // Any atomic construct with a seq_cst clause forces the atomically
3692   // performed operation to include an implicit flush operation without a
3693   // list.
3694   if (IsSeqCst)
3695     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3696 }
3697 
3698 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3699                             QualType SourceType, QualType ResType,
3700                             SourceLocation Loc) {
3701   switch (CGF.getEvaluationKind(ResType)) {
3702   case TEK_Scalar:
3703     return RValue::get(
3704         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3705   case TEK_Complex: {
3706     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3707     return RValue::getComplex(Res.first, Res.second);
3708   }
3709   case TEK_Aggregate:
3710     break;
3711   }
3712   llvm_unreachable("Must be a scalar or complex.");
3713 }
3714 
3715 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3716                                      bool IsPostfixUpdate, const Expr *V,
3717                                      const Expr *X, const Expr *E,
3718                                      const Expr *UE, bool IsXLHSInRHSPart,
3719                                      SourceLocation Loc) {
3720   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3721   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3722   RValue NewVVal;
3723   LValue VLValue = CGF.EmitLValue(V);
3724   LValue XLValue = CGF.EmitLValue(X);
3725   RValue ExprRValue = CGF.EmitAnyExpr(E);
3726   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3727                      : llvm::AtomicOrdering::Monotonic;
3728   QualType NewVValType;
3729   if (UE) {
3730     // 'x' is updated with some additional value.
3731     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3732            "Update expr in 'atomic capture' must be a binary operator.");
3733     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3734     // Update expressions are allowed to have the following forms:
3735     // x binop= expr; -> xrval + expr;
3736     // x++, ++x -> xrval + 1;
3737     // x--, --x -> xrval - 1;
3738     // x = x binop expr; -> xrval binop expr
3739     // x = expr Op x; - > expr binop xrval;
3740     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3741     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3742     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3743     NewVValType = XRValExpr->getType();
3744     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3745     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3746                   IsPostfixUpdate](RValue XRValue) -> RValue {
3747       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3748       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3749       RValue Res = CGF.EmitAnyExpr(UE);
3750       NewVVal = IsPostfixUpdate ? XRValue : Res;
3751       return Res;
3752     };
3753     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3754         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3755     if (Res.first) {
3756       // 'atomicrmw' instruction was generated.
3757       if (IsPostfixUpdate) {
3758         // Use old value from 'atomicrmw'.
3759         NewVVal = Res.second;
3760       } else {
3761         // 'atomicrmw' does not provide new value, so evaluate it using old
3762         // value of 'x'.
3763         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3764         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3765         NewVVal = CGF.EmitAnyExpr(UE);
3766       }
3767     }
3768   } else {
3769     // 'x' is simply rewritten with some 'expr'.
3770     NewVValType = X->getType().getNonReferenceType();
3771     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3772                                X->getType().getNonReferenceType(), Loc);
3773     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3774       NewVVal = XRValue;
3775       return ExprRValue;
3776     };
3777     // Try to perform atomicrmw xchg, otherwise simple exchange.
3778     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3779         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3780         Loc, Gen);
3781     if (Res.first) {
3782       // 'atomicrmw' instruction was generated.
3783       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3784     }
3785   }
3786   // Emit post-update store to 'v' of old/new 'x' value.
3787   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3788   // OpenMP, 2.12.6, atomic Construct
3789   // Any atomic construct with a seq_cst clause forces the atomically
3790   // performed operation to include an implicit flush operation without a
3791   // list.
3792   if (IsSeqCst)
3793     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3794 }
3795 
3796 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3797                               bool IsSeqCst, bool IsPostfixUpdate,
3798                               const Expr *X, const Expr *V, const Expr *E,
3799                               const Expr *UE, bool IsXLHSInRHSPart,
3800                               SourceLocation Loc) {
3801   switch (Kind) {
3802   case OMPC_read:
3803     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3804     break;
3805   case OMPC_write:
3806     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3807     break;
3808   case OMPC_unknown:
3809   case OMPC_update:
3810     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3811     break;
3812   case OMPC_capture:
3813     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3814                              IsXLHSInRHSPart, Loc);
3815     break;
3816   case OMPC_if:
3817   case OMPC_final:
3818   case OMPC_num_threads:
3819   case OMPC_private:
3820   case OMPC_firstprivate:
3821   case OMPC_lastprivate:
3822   case OMPC_reduction:
3823   case OMPC_task_reduction:
3824   case OMPC_in_reduction:
3825   case OMPC_safelen:
3826   case OMPC_simdlen:
3827   case OMPC_collapse:
3828   case OMPC_default:
3829   case OMPC_seq_cst:
3830   case OMPC_shared:
3831   case OMPC_linear:
3832   case OMPC_aligned:
3833   case OMPC_copyin:
3834   case OMPC_copyprivate:
3835   case OMPC_flush:
3836   case OMPC_proc_bind:
3837   case OMPC_schedule:
3838   case OMPC_ordered:
3839   case OMPC_nowait:
3840   case OMPC_untied:
3841   case OMPC_threadprivate:
3842   case OMPC_depend:
3843   case OMPC_mergeable:
3844   case OMPC_device:
3845   case OMPC_threads:
3846   case OMPC_simd:
3847   case OMPC_map:
3848   case OMPC_num_teams:
3849   case OMPC_thread_limit:
3850   case OMPC_priority:
3851   case OMPC_grainsize:
3852   case OMPC_nogroup:
3853   case OMPC_num_tasks:
3854   case OMPC_hint:
3855   case OMPC_dist_schedule:
3856   case OMPC_defaultmap:
3857   case OMPC_uniform:
3858   case OMPC_to:
3859   case OMPC_from:
3860   case OMPC_use_device_ptr:
3861   case OMPC_is_device_ptr:
3862     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3863   }
3864 }
3865 
3866 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3867   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3868   OpenMPClauseKind Kind = OMPC_unknown;
3869   for (auto *C : S.clauses()) {
3870     // Find first clause (skip seq_cst clause, if it is first).
3871     if (C->getClauseKind() != OMPC_seq_cst) {
3872       Kind = C->getClauseKind();
3873       break;
3874     }
3875   }
3876 
3877   const auto *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
3878   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3879     enterFullExpression(EWC);
3880   }
3881   // Processing for statements under 'atomic capture'.
3882   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3883     for (const auto *C : Compound->body()) {
3884       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3885         enterFullExpression(EWC);
3886       }
3887     }
3888   }
3889 
3890   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3891                                             PrePostActionTy &) {
3892     CGF.EmitStopPoint(CS);
3893     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3894                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3895                       S.isXLHSInRHSPart(), S.getLocStart());
3896   };
3897   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3898   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3899 }
3900 
3901 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3902                                          const OMPExecutableDirective &S,
3903                                          const RegionCodeGenTy &CodeGen) {
3904   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3905   CodeGenModule &CGM = CGF.CGM;
3906 
3907   llvm::Function *Fn = nullptr;
3908   llvm::Constant *FnID = nullptr;
3909 
3910   const Expr *IfCond = nullptr;
3911   // Check for the at most one if clause associated with the target region.
3912   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3913     if (C->getNameModifier() == OMPD_unknown ||
3914         C->getNameModifier() == OMPD_target) {
3915       IfCond = C->getCondition();
3916       break;
3917     }
3918   }
3919 
3920   // Check if we have any device clause associated with the directive.
3921   const Expr *Device = nullptr;
3922   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3923     Device = C->getDevice();
3924   }
3925 
3926   // Check if we have an if clause whose conditional always evaluates to false
3927   // or if we do not have any targets specified. If so the target region is not
3928   // an offload entry point.
3929   bool IsOffloadEntry = true;
3930   if (IfCond) {
3931     bool Val;
3932     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3933       IsOffloadEntry = false;
3934   }
3935   if (CGM.getLangOpts().OMPTargetTriples.empty())
3936     IsOffloadEntry = false;
3937 
3938   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3939   StringRef ParentName;
3940   // In case we have Ctors/Dtors we use the complete type variant to produce
3941   // the mangling of the device outlined kernel.
3942   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3943     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3944   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3945     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3946   else
3947     ParentName =
3948         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3949 
3950   // Emit target region as a standalone region.
3951   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3952                                                     IsOffloadEntry, CodeGen);
3953   OMPLexicalScope Scope(CGF, S, OMPD_task);
3954   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
3955 }
3956 
3957 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3958                              PrePostActionTy &Action) {
3959   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3960   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3961   CGF.EmitOMPPrivateClause(S, PrivateScope);
3962   (void)PrivateScope.Privatize();
3963 
3964   Action.Enter(CGF);
3965   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
3966 }
3967 
3968 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3969                                                   StringRef ParentName,
3970                                                   const OMPTargetDirective &S) {
3971   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3972     emitTargetRegion(CGF, S, Action);
3973   };
3974   llvm::Function *Fn;
3975   llvm::Constant *Addr;
3976   // Emit target region as a standalone region.
3977   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3978       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3979   assert(Fn && Addr && "Target device function emission failed.");
3980 }
3981 
3982 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3983   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3984     emitTargetRegion(CGF, S, Action);
3985   };
3986   emitCommonOMPTargetDirective(*this, S, CodeGen);
3987 }
3988 
3989 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3990                                         const OMPExecutableDirective &S,
3991                                         OpenMPDirectiveKind InnermostKind,
3992                                         const RegionCodeGenTy &CodeGen) {
3993   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3994   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3995       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3996 
3997   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3998   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3999   if (NT || TL) {
4000     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
4001     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
4002 
4003     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
4004                                                   S.getLocStart());
4005   }
4006 
4007   OMPTeamsScope Scope(CGF, S);
4008   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4009   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4010   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
4011                                            CapturedVars);
4012 }
4013 
4014 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
4015   // Emit teams region as a standalone region.
4016   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4017     OMPPrivateScope PrivateScope(CGF);
4018     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4019     CGF.EmitOMPPrivateClause(S, PrivateScope);
4020     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4021     (void)PrivateScope.Privatize();
4022     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
4023     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4024   };
4025   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4026   emitPostUpdateForReductionClause(
4027       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4028 }
4029 
4030 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4031                                   const OMPTargetTeamsDirective &S) {
4032   auto *CS = S.getCapturedStmt(OMPD_teams);
4033   Action.Enter(CGF);
4034   // Emit teams region as a standalone region.
4035   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4036     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4037     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4038     CGF.EmitOMPPrivateClause(S, PrivateScope);
4039     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4040     (void)PrivateScope.Privatize();
4041     Action.Enter(CGF);
4042     CGF.EmitStmt(CS->getCapturedStmt());
4043     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4044   };
4045   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
4046   emitPostUpdateForReductionClause(
4047       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4048 }
4049 
4050 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
4051     CodeGenModule &CGM, StringRef ParentName,
4052     const OMPTargetTeamsDirective &S) {
4053   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4054     emitTargetTeamsRegion(CGF, Action, S);
4055   };
4056   llvm::Function *Fn;
4057   llvm::Constant *Addr;
4058   // Emit target region as a standalone region.
4059   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4060       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4061   assert(Fn && Addr && "Target device function emission failed.");
4062 }
4063 
4064 void CodeGenFunction::EmitOMPTargetTeamsDirective(
4065     const OMPTargetTeamsDirective &S) {
4066   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4067     emitTargetTeamsRegion(CGF, Action, S);
4068   };
4069   emitCommonOMPTargetDirective(*this, S, CodeGen);
4070 }
4071 
4072 static void
4073 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4074                                 const OMPTargetTeamsDistributeDirective &S) {
4075   Action.Enter(CGF);
4076   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4077     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4078   };
4079 
4080   // Emit teams region as a standalone region.
4081   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4082                                             PrePostActionTy &) {
4083     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4084     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4085     (void)PrivateScope.Privatize();
4086     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4087                                                     CodeGenDistribute);
4088     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4089   };
4090   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
4091   emitPostUpdateForReductionClause(CGF, S,
4092                                    [](CodeGenFunction &) { return nullptr; });
4093 }
4094 
4095 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
4096     CodeGenModule &CGM, StringRef ParentName,
4097     const OMPTargetTeamsDistributeDirective &S) {
4098   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4099     emitTargetTeamsDistributeRegion(CGF, Action, S);
4100   };
4101   llvm::Function *Fn;
4102   llvm::Constant *Addr;
4103   // Emit target region as a standalone region.
4104   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4105       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4106   assert(Fn && Addr && "Target device function emission failed.");
4107 }
4108 
4109 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
4110     const OMPTargetTeamsDistributeDirective &S) {
4111   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4112     emitTargetTeamsDistributeRegion(CGF, Action, S);
4113   };
4114   emitCommonOMPTargetDirective(*this, S, CodeGen);
4115 }
4116 
4117 static void emitTargetTeamsDistributeSimdRegion(
4118     CodeGenFunction &CGF, PrePostActionTy &Action,
4119     const OMPTargetTeamsDistributeSimdDirective &S) {
4120   Action.Enter(CGF);
4121   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4122     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4123   };
4124 
4125   // Emit teams region as a standalone region.
4126   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4127                                             PrePostActionTy &) {
4128     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4129     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4130     (void)PrivateScope.Privatize();
4131     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4132                                                     CodeGenDistribute);
4133     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4134   };
4135   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
4136   emitPostUpdateForReductionClause(CGF, S,
4137                                    [](CodeGenFunction &) { return nullptr; });
4138 }
4139 
4140 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
4141     CodeGenModule &CGM, StringRef ParentName,
4142     const OMPTargetTeamsDistributeSimdDirective &S) {
4143   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4144     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4145   };
4146   llvm::Function *Fn;
4147   llvm::Constant *Addr;
4148   // Emit target region as a standalone region.
4149   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4150       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4151   assert(Fn && Addr && "Target device function emission failed.");
4152 }
4153 
4154 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
4155     const OMPTargetTeamsDistributeSimdDirective &S) {
4156   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4157     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4158   };
4159   emitCommonOMPTargetDirective(*this, S, CodeGen);
4160 }
4161 
4162 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
4163     const OMPTeamsDistributeDirective &S) {
4164 
4165   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4166     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4167   };
4168 
4169   // Emit teams region as a standalone region.
4170   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4171                                             PrePostActionTy &) {
4172     OMPPrivateScope PrivateScope(CGF);
4173     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4174     (void)PrivateScope.Privatize();
4175     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4176                                                     CodeGenDistribute);
4177     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4178   };
4179   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4180   emitPostUpdateForReductionClause(*this, S,
4181                                    [](CodeGenFunction &) { return nullptr; });
4182 }
4183 
4184 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
4185     const OMPTeamsDistributeSimdDirective &S) {
4186   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4187     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4188   };
4189 
4190   // Emit teams region as a standalone region.
4191   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4192                                             PrePostActionTy &) {
4193     OMPPrivateScope PrivateScope(CGF);
4194     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4195     (void)PrivateScope.Privatize();
4196     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
4197                                                     CodeGenDistribute);
4198     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4199   };
4200   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
4201   emitPostUpdateForReductionClause(*this, S,
4202                                    [](CodeGenFunction &) { return nullptr; });
4203 }
4204 
4205 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
4206     const OMPTeamsDistributeParallelForDirective &S) {
4207   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4208     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4209                               S.getDistInc());
4210   };
4211 
4212   // Emit teams region as a standalone region.
4213   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4214                                             PrePostActionTy &) {
4215     OMPPrivateScope PrivateScope(CGF);
4216     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4217     (void)PrivateScope.Privatize();
4218     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4219                                                     CodeGenDistribute);
4220     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4221   };
4222   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4223   emitPostUpdateForReductionClause(*this, S,
4224                                    [](CodeGenFunction &) { return nullptr; });
4225 }
4226 
4227 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
4228     const OMPTeamsDistributeParallelForSimdDirective &S) {
4229   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4230     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4231                               S.getDistInc());
4232   };
4233 
4234   // Emit teams region as a standalone region.
4235   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4236                                             PrePostActionTy &) {
4237     OMPPrivateScope PrivateScope(CGF);
4238     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4239     (void)PrivateScope.Privatize();
4240     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4241         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4242     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4243   };
4244   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
4245   emitPostUpdateForReductionClause(*this, S,
4246                                    [](CodeGenFunction &) { return nullptr; });
4247 }
4248 
4249 static void emitTargetTeamsDistributeParallelForRegion(
4250     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
4251     PrePostActionTy &Action) {
4252   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4253     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4254                               S.getDistInc());
4255   };
4256 
4257   // Emit teams region as a standalone region.
4258   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4259                                                  PrePostActionTy &) {
4260     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4261     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4262     (void)PrivateScope.Privatize();
4263     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4264         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4265     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4266   };
4267 
4268   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
4269                               CodeGenTeams);
4270   emitPostUpdateForReductionClause(CGF, S,
4271                                    [](CodeGenFunction &) { return nullptr; });
4272 }
4273 
4274 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
4275     CodeGenModule &CGM, StringRef ParentName,
4276     const OMPTargetTeamsDistributeParallelForDirective &S) {
4277   // Emit SPMD target teams distribute parallel for region as a standalone
4278   // region.
4279   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4280     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4281   };
4282   llvm::Function *Fn;
4283   llvm::Constant *Addr;
4284   // Emit target region as a standalone region.
4285   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4286       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4287   assert(Fn && Addr && "Target device function emission failed.");
4288 }
4289 
4290 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
4291     const OMPTargetTeamsDistributeParallelForDirective &S) {
4292   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4293     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
4294   };
4295   emitCommonOMPTargetDirective(*this, S, CodeGen);
4296 }
4297 
4298 static void emitTargetTeamsDistributeParallelForSimdRegion(
4299     CodeGenFunction &CGF,
4300     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
4301     PrePostActionTy &Action) {
4302   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4303     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
4304                               S.getDistInc());
4305   };
4306 
4307   // Emit teams region as a standalone region.
4308   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4309                                                  PrePostActionTy &) {
4310     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4311     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4312     (void)PrivateScope.Privatize();
4313     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
4314         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
4315     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4316   };
4317 
4318   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
4319                               CodeGenTeams);
4320   emitPostUpdateForReductionClause(CGF, S,
4321                                    [](CodeGenFunction &) { return nullptr; });
4322 }
4323 
4324 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
4325     CodeGenModule &CGM, StringRef ParentName,
4326     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4327   // Emit SPMD target teams distribute parallel for simd region as a standalone
4328   // region.
4329   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4330     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4331   };
4332   llvm::Function *Fn;
4333   llvm::Constant *Addr;
4334   // Emit target region as a standalone region.
4335   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4336       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4337   assert(Fn && Addr && "Target device function emission failed.");
4338 }
4339 
4340 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
4341     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
4342   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4343     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
4344   };
4345   emitCommonOMPTargetDirective(*this, S, CodeGen);
4346 }
4347 
4348 void CodeGenFunction::EmitOMPCancellationPointDirective(
4349     const OMPCancellationPointDirective &S) {
4350   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
4351                                                    S.getCancelRegion());
4352 }
4353 
4354 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
4355   const Expr *IfCond = nullptr;
4356   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4357     if (C->getNameModifier() == OMPD_unknown ||
4358         C->getNameModifier() == OMPD_cancel) {
4359       IfCond = C->getCondition();
4360       break;
4361     }
4362   }
4363   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
4364                                         S.getCancelRegion());
4365 }
4366 
4367 CodeGenFunction::JumpDest
4368 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
4369   if (Kind == OMPD_parallel || Kind == OMPD_task ||
4370       Kind == OMPD_target_parallel)
4371     return ReturnBlock;
4372   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
4373          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
4374          Kind == OMPD_distribute_parallel_for ||
4375          Kind == OMPD_target_parallel_for ||
4376          Kind == OMPD_teams_distribute_parallel_for ||
4377          Kind == OMPD_target_teams_distribute_parallel_for);
4378   return OMPCancelStack.getExitBlock();
4379 }
4380 
4381 void CodeGenFunction::EmitOMPUseDevicePtrClause(
4382     const OMPClause &NC, OMPPrivateScope &PrivateScope,
4383     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
4384   const auto &C = cast<OMPUseDevicePtrClause>(NC);
4385   auto OrigVarIt = C.varlist_begin();
4386   auto InitIt = C.inits().begin();
4387   for (auto PvtVarIt : C.private_copies()) {
4388     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
4389     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
4390     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
4391 
4392     // In order to identify the right initializer we need to match the
4393     // declaration used by the mapping logic. In some cases we may get
4394     // OMPCapturedExprDecl that refers to the original declaration.
4395     const ValueDecl *MatchingVD = OrigVD;
4396     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
4397       // OMPCapturedExprDecl are used to privative fields of the current
4398       // structure.
4399       auto *ME = cast<MemberExpr>(OED->getInit());
4400       assert(isa<CXXThisExpr>(ME->getBase()) &&
4401              "Base should be the current struct!");
4402       MatchingVD = ME->getMemberDecl();
4403     }
4404 
4405     // If we don't have information about the current list item, move on to
4406     // the next one.
4407     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
4408     if (InitAddrIt == CaptureDeviceAddrMap.end())
4409       continue;
4410 
4411     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
4412       // Initialize the temporary initialization variable with the address we
4413       // get from the runtime library. We have to cast the source address
4414       // because it is always a void *. References are materialized in the
4415       // privatization scope, so the initialization here disregards the fact
4416       // the original variable is a reference.
4417       QualType AddrQTy =
4418           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
4419       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
4420       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
4421       setAddrOfLocalVar(InitVD, InitAddr);
4422 
4423       // Emit private declaration, it will be initialized by the value we
4424       // declaration we just added to the local declarations map.
4425       EmitDecl(*PvtVD);
4426 
4427       // The initialization variables reached its purpose in the emission
4428       // ofthe previous declaration, so we don't need it anymore.
4429       LocalDeclMap.erase(InitVD);
4430 
4431       // Return the address of the private variable.
4432       return GetAddrOfLocalVar(PvtVD);
4433     });
4434     assert(IsRegistered && "firstprivate var already registered as private");
4435     // Silence the warning about unused variable.
4436     (void)IsRegistered;
4437 
4438     ++OrigVarIt;
4439     ++InitIt;
4440   }
4441 }
4442 
4443 // Generate the instructions for '#pragma omp target data' directive.
4444 void CodeGenFunction::EmitOMPTargetDataDirective(
4445     const OMPTargetDataDirective &S) {
4446   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4447 
4448   // Create a pre/post action to signal the privatization of the device pointer.
4449   // This action can be replaced by the OpenMP runtime code generation to
4450   // deactivate privatization.
4451   bool PrivatizeDevicePointers = false;
4452   class DevicePointerPrivActionTy : public PrePostActionTy {
4453     bool &PrivatizeDevicePointers;
4454 
4455   public:
4456     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4457         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4458     void Enter(CodeGenFunction &CGF) override {
4459       PrivatizeDevicePointers = true;
4460     }
4461   };
4462   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4463 
4464   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4465                        CodeGenFunction &CGF, PrePostActionTy &Action) {
4466     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4467       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4468     };
4469 
4470     // Codegen that selects wheather to generate the privatization code or not.
4471     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4472                           &InnermostCodeGen](CodeGenFunction &CGF,
4473                                              PrePostActionTy &Action) {
4474       RegionCodeGenTy RCG(InnermostCodeGen);
4475       PrivatizeDevicePointers = false;
4476 
4477       // Call the pre-action to change the status of PrivatizeDevicePointers if
4478       // needed.
4479       Action.Enter(CGF);
4480 
4481       if (PrivatizeDevicePointers) {
4482         OMPPrivateScope PrivateScope(CGF);
4483         // Emit all instances of the use_device_ptr clause.
4484         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4485           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4486                                         Info.CaptureDeviceAddrMap);
4487         (void)PrivateScope.Privatize();
4488         RCG(CGF);
4489       } else
4490         RCG(CGF);
4491     };
4492 
4493     // Forward the provided action to the privatization codegen.
4494     RegionCodeGenTy PrivRCG(PrivCodeGen);
4495     PrivRCG.setAction(Action);
4496 
4497     // Notwithstanding the body of the region is emitted as inlined directive,
4498     // we don't use an inline scope as changes in the references inside the
4499     // region are expected to be visible outside, so we do not privative them.
4500     OMPLexicalScope Scope(CGF, S);
4501     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4502                                                     PrivRCG);
4503   };
4504 
4505   RegionCodeGenTy RCG(CodeGen);
4506 
4507   // If we don't have target devices, don't bother emitting the data mapping
4508   // code.
4509   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4510     RCG(*this);
4511     return;
4512   }
4513 
4514   // Check if we have any if clause associated with the directive.
4515   const Expr *IfCond = nullptr;
4516   if (auto *C = S.getSingleClause<OMPIfClause>())
4517     IfCond = C->getCondition();
4518 
4519   // Check if we have any device clause associated with the directive.
4520   const Expr *Device = nullptr;
4521   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4522     Device = C->getDevice();
4523 
4524   // Set the action to signal privatization of device pointers.
4525   RCG.setAction(PrivAction);
4526 
4527   // Emit region code.
4528   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4529                                              Info);
4530 }
4531 
4532 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4533     const OMPTargetEnterDataDirective &S) {
4534   // If we don't have target devices, don't bother emitting the data mapping
4535   // code.
4536   if (CGM.getLangOpts().OMPTargetTriples.empty())
4537     return;
4538 
4539   // Check if we have any if clause associated with the directive.
4540   const Expr *IfCond = nullptr;
4541   if (auto *C = S.getSingleClause<OMPIfClause>())
4542     IfCond = C->getCondition();
4543 
4544   // Check if we have any device clause associated with the directive.
4545   const Expr *Device = nullptr;
4546   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4547     Device = C->getDevice();
4548 
4549   OMPLexicalScope Scope(*this, S, OMPD_task);
4550   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4551 }
4552 
4553 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4554     const OMPTargetExitDataDirective &S) {
4555   // If we don't have target devices, don't bother emitting the data mapping
4556   // code.
4557   if (CGM.getLangOpts().OMPTargetTriples.empty())
4558     return;
4559 
4560   // Check if we have any if clause associated with the directive.
4561   const Expr *IfCond = nullptr;
4562   if (auto *C = S.getSingleClause<OMPIfClause>())
4563     IfCond = C->getCondition();
4564 
4565   // Check if we have any device clause associated with the directive.
4566   const Expr *Device = nullptr;
4567   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4568     Device = C->getDevice();
4569 
4570   OMPLexicalScope Scope(*this, S, OMPD_task);
4571   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4572 }
4573 
4574 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4575                                      const OMPTargetParallelDirective &S,
4576                                      PrePostActionTy &Action) {
4577   // Get the captured statement associated with the 'parallel' region.
4578   auto *CS = S.getCapturedStmt(OMPD_parallel);
4579   Action.Enter(CGF);
4580   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4581     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4582     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4583     CGF.EmitOMPPrivateClause(S, PrivateScope);
4584     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4585     (void)PrivateScope.Privatize();
4586     // TODO: Add support for clauses.
4587     CGF.EmitStmt(CS->getCapturedStmt());
4588     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4589   };
4590   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4591                                  emitEmptyBoundParameters);
4592   emitPostUpdateForReductionClause(
4593       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4594 }
4595 
4596 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4597     CodeGenModule &CGM, StringRef ParentName,
4598     const OMPTargetParallelDirective &S) {
4599   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4600     emitTargetParallelRegion(CGF, S, Action);
4601   };
4602   llvm::Function *Fn;
4603   llvm::Constant *Addr;
4604   // Emit target region as a standalone region.
4605   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4606       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4607   assert(Fn && Addr && "Target device function emission failed.");
4608 }
4609 
4610 void CodeGenFunction::EmitOMPTargetParallelDirective(
4611     const OMPTargetParallelDirective &S) {
4612   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4613     emitTargetParallelRegion(CGF, S, Action);
4614   };
4615   emitCommonOMPTargetDirective(*this, S, CodeGen);
4616 }
4617 
4618 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
4619                                         const OMPTargetParallelForDirective &S,
4620                                         PrePostActionTy &Action) {
4621   Action.Enter(CGF);
4622   // Emit directive as a combined directive that consists of two implicit
4623   // directives: 'parallel' with 'for' directive.
4624   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4625     CodeGenFunction::OMPCancelStackRAII CancelRegion(
4626         CGF, OMPD_target_parallel_for, S.hasCancel());
4627     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4628                                emitDispatchForLoopBounds);
4629   };
4630   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
4631                                  emitEmptyBoundParameters);
4632 }
4633 
4634 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
4635     CodeGenModule &CGM, StringRef ParentName,
4636     const OMPTargetParallelForDirective &S) {
4637   // Emit SPMD target parallel for region as a standalone region.
4638   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4639     emitTargetParallelForRegion(CGF, S, Action);
4640   };
4641   llvm::Function *Fn;
4642   llvm::Constant *Addr;
4643   // Emit target region as a standalone region.
4644   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4645       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4646   assert(Fn && Addr && "Target device function emission failed.");
4647 }
4648 
4649 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4650     const OMPTargetParallelForDirective &S) {
4651   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4652     emitTargetParallelForRegion(CGF, S, Action);
4653   };
4654   emitCommonOMPTargetDirective(*this, S, CodeGen);
4655 }
4656 
4657 static void
4658 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
4659                                 const OMPTargetParallelForSimdDirective &S,
4660                                 PrePostActionTy &Action) {
4661   Action.Enter(CGF);
4662   // Emit directive as a combined directive that consists of two implicit
4663   // directives: 'parallel' with 'for' directive.
4664   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4665     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
4666                                emitDispatchForLoopBounds);
4667   };
4668   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
4669                                  emitEmptyBoundParameters);
4670 }
4671 
4672 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
4673     CodeGenModule &CGM, StringRef ParentName,
4674     const OMPTargetParallelForSimdDirective &S) {
4675   // Emit SPMD target parallel for region as a standalone region.
4676   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4677     emitTargetParallelForSimdRegion(CGF, S, Action);
4678   };
4679   llvm::Function *Fn;
4680   llvm::Constant *Addr;
4681   // Emit target region as a standalone region.
4682   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4683       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4684   assert(Fn && Addr && "Target device function emission failed.");
4685 }
4686 
4687 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
4688     const OMPTargetParallelForSimdDirective &S) {
4689   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4690     emitTargetParallelForSimdRegion(CGF, S, Action);
4691   };
4692   emitCommonOMPTargetDirective(*this, S, CodeGen);
4693 }
4694 
4695 /// Emit a helper variable and return corresponding lvalue.
4696 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4697                      const ImplicitParamDecl *PVD,
4698                      CodeGenFunction::OMPPrivateScope &Privates) {
4699   auto *VDecl = cast<VarDecl>(Helper->getDecl());
4700   Privates.addPrivate(
4701       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4702 }
4703 
4704 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4705   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4706   // Emit outlined function for task construct.
4707   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
4708   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4709   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4710   const Expr *IfCond = nullptr;
4711   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4712     if (C->getNameModifier() == OMPD_unknown ||
4713         C->getNameModifier() == OMPD_taskloop) {
4714       IfCond = C->getCondition();
4715       break;
4716     }
4717   }
4718 
4719   OMPTaskDataTy Data;
4720   // Check if taskloop must be emitted without taskgroup.
4721   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4722   // TODO: Check if we should emit tied or untied task.
4723   Data.Tied = true;
4724   // Set scheduling for taskloop
4725   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4726     // grainsize clause
4727     Data.Schedule.setInt(/*IntVal=*/false);
4728     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4729   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4730     // num_tasks clause
4731     Data.Schedule.setInt(/*IntVal=*/true);
4732     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4733   }
4734 
4735   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4736     // if (PreCond) {
4737     //   for (IV in 0..LastIteration) BODY;
4738     //   <Final counter/linear vars updates>;
4739     // }
4740     //
4741 
4742     // Emit: if (PreCond) - begin.
4743     // If the condition constant folds and can be elided, avoid emitting the
4744     // whole loop.
4745     bool CondConstant;
4746     llvm::BasicBlock *ContBlock = nullptr;
4747     OMPLoopScope PreInitScope(CGF, S);
4748     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4749       if (!CondConstant)
4750         return;
4751     } else {
4752       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4753       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4754       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4755                   CGF.getProfileCount(&S));
4756       CGF.EmitBlock(ThenBlock);
4757       CGF.incrementProfileCounter(&S);
4758     }
4759 
4760     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4761       CGF.EmitOMPSimdInit(S);
4762 
4763     OMPPrivateScope LoopScope(CGF);
4764     // Emit helper vars inits.
4765     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4766     auto *I = CS->getCapturedDecl()->param_begin();
4767     auto *LBP = std::next(I, LowerBound);
4768     auto *UBP = std::next(I, UpperBound);
4769     auto *STP = std::next(I, Stride);
4770     auto *LIP = std::next(I, LastIter);
4771     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4772              LoopScope);
4773     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4774              LoopScope);
4775     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4776     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4777              LoopScope);
4778     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4779     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4780     (void)LoopScope.Privatize();
4781     // Emit the loop iteration variable.
4782     const Expr *IVExpr = S.getIterationVariable();
4783     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4784     CGF.EmitVarDecl(*IVDecl);
4785     CGF.EmitIgnoredExpr(S.getInit());
4786 
4787     // Emit the iterations count variable.
4788     // If it is not a variable, Sema decided to calculate iterations count on
4789     // each iteration (e.g., it is foldable into a constant).
4790     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4791       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4792       // Emit calculation of the iterations count.
4793       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4794     }
4795 
4796     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4797                          S.getInc(),
4798                          [&S](CodeGenFunction &CGF) {
4799                            CGF.EmitOMPLoopBody(S, JumpDest());
4800                            CGF.EmitStopPoint(&S);
4801                          },
4802                          [](CodeGenFunction &) {});
4803     // Emit: if (PreCond) - end.
4804     if (ContBlock) {
4805       CGF.EmitBranch(ContBlock);
4806       CGF.EmitBlock(ContBlock, true);
4807     }
4808     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4809     if (HasLastprivateClause) {
4810       CGF.EmitOMPLastprivateClauseFinal(
4811           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4812           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4813               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4814               (*LIP)->getType(), S.getLocStart())));
4815     }
4816   };
4817   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4818                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4819                             const OMPTaskDataTy &Data) {
4820     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4821       OMPLoopScope PreInitScope(CGF, S);
4822       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4823                                                   OutlinedFn, SharedsTy,
4824                                                   CapturedStruct, IfCond, Data);
4825     };
4826     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4827                                                     CodeGen);
4828   };
4829   if (Data.Nogroup) {
4830     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
4831   } else {
4832     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4833         *this,
4834         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4835                                         PrePostActionTy &Action) {
4836           Action.Enter(CGF);
4837           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
4838                                         Data);
4839         },
4840         S.getLocStart());
4841   }
4842 }
4843 
4844 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4845   EmitOMPTaskLoopBasedDirective(S);
4846 }
4847 
4848 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4849     const OMPTaskLoopSimdDirective &S) {
4850   EmitOMPTaskLoopBasedDirective(S);
4851 }
4852 
4853 // Generate the instructions for '#pragma omp target update' directive.
4854 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4855     const OMPTargetUpdateDirective &S) {
4856   // If we don't have target devices, don't bother emitting the data mapping
4857   // code.
4858   if (CGM.getLangOpts().OMPTargetTriples.empty())
4859     return;
4860 
4861   // Check if we have any if clause associated with the directive.
4862   const Expr *IfCond = nullptr;
4863   if (auto *C = S.getSingleClause<OMPIfClause>())
4864     IfCond = C->getCondition();
4865 
4866   // Check if we have any device clause associated with the directive.
4867   const Expr *Device = nullptr;
4868   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4869     Device = C->getDevice();
4870 
4871   OMPLexicalScope Scope(*this, S, OMPD_task);
4872   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4873 }
4874 
4875 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
4876     const OMPExecutableDirective &D) {
4877   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
4878     return;
4879   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
4880     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
4881       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
4882     } else {
4883       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
4884         for (const auto *E : LD->counters()) {
4885           if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
4886                   cast<DeclRefExpr>(E)->getDecl())) {
4887             // Emit only those that were not explicitly referenced in clauses.
4888             if (!CGF.LocalDeclMap.count(VD))
4889               CGF.EmitVarDecl(*VD);
4890           }
4891         }
4892       }
4893       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
4894     }
4895   };
4896   OMPSimdLexicalScope Scope(*this, D);
4897   CGM.getOpenMPRuntime().emitInlinedDirective(
4898       *this,
4899       isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
4900                                                   : D.getDirectiveKind(),
4901       CodeGen);
4902 }
4903