1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false, bool EmitPreInitStmt = true)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     if (EmitPreInitStmt)
61       emitPreInitStmt(CGF, S);
62     if (AsInlined) {
63       if (S.hasAssociatedStmt()) {
64         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65         for (auto &C : CS->captures()) {
66           if (C.capturesVariable() || C.capturesVariableByCopy()) {
67             auto *VD = C.getCapturedVar();
68             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
69                             isCapturedVar(CGF, VD) ||
70                                 (CGF.CapturedStmtInfo &&
71                                  InlinedShareds.isGlobalVarCaptured(VD)),
72                             VD->getType().getNonReferenceType(), VK_LValue,
73                             SourceLocation());
74             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
75               return CGF.EmitLValue(&DRE).getAddress();
76             });
77           }
78         }
79         (void)InlinedShareds.Privatize();
80       }
81     }
82   }
83 };
84 
85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
86 /// for captured expressions.
87 class OMPParallelScope final : public OMPLexicalScope {
88   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
89     OpenMPDirectiveKind Kind = S.getDirectiveKind();
90     return !(isOpenMPTargetExecutionDirective(Kind) ||
91              isOpenMPLoopBoundSharingDirective(Kind)) &&
92            isOpenMPParallelDirective(Kind);
93   }
94 
95 public:
96   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
97       : OMPLexicalScope(CGF, S,
98                         /*AsInlined=*/false,
99                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
100 };
101 
102 /// Lexical scope for OpenMP teams construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPTeamsScope final : public OMPLexicalScope {
105   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106     OpenMPDirectiveKind Kind = S.getDirectiveKind();
107     return !isOpenMPTargetExecutionDirective(Kind) &&
108            isOpenMPTeamsDirective(Kind);
109   }
110 
111 public:
112   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
113       : OMPLexicalScope(CGF, S,
114                         /*AsInlined=*/false,
115                         /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
116 };
117 
118 /// Private scope for OpenMP loop-based directives, that supports capturing
119 /// of used expression from loop statement.
120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
121   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
122     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
123       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
124         for (const auto *I : PreInits->decls())
125           CGF.EmitVarDecl(cast<VarDecl>(*I));
126       }
127     }
128   }
129 
130 public:
131   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
132       : CodeGenFunction::RunCleanupsScope(CGF) {
133     emitPreInitStmt(CGF, S);
134   }
135 };
136 
137 } // namespace
138 
139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
140   auto &C = getContext();
141   llvm::Value *Size = nullptr;
142   auto SizeInChars = C.getTypeSizeInChars(Ty);
143   if (SizeInChars.isZero()) {
144     // getTypeSizeInChars() returns 0 for a VLA.
145     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
146       llvm::Value *ArraySize;
147       std::tie(ArraySize, Ty) = getVLASize(VAT);
148       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
149     }
150     SizeInChars = C.getTypeSizeInChars(Ty);
151     if (SizeInChars.isZero())
152       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
153     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
154   } else
155     Size = CGM.getSize(SizeInChars);
156   return Size;
157 }
158 
159 void CodeGenFunction::GenerateOpenMPCapturedVars(
160     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
161   const RecordDecl *RD = S.getCapturedRecordDecl();
162   auto CurField = RD->field_begin();
163   auto CurCap = S.captures().begin();
164   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
165                                                  E = S.capture_init_end();
166        I != E; ++I, ++CurField, ++CurCap) {
167     if (CurField->hasCapturedVLAType()) {
168       auto VAT = CurField->getCapturedVLAType();
169       auto *Val = VLASizeMap[VAT->getSizeExpr()];
170       CapturedVars.push_back(Val);
171     } else if (CurCap->capturesThis())
172       CapturedVars.push_back(CXXThisValue);
173     else if (CurCap->capturesVariableByCopy()) {
174       llvm::Value *CV =
175           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
176 
177       // If the field is not a pointer, we need to save the actual value
178       // and load it as a void pointer.
179       if (!CurField->getType()->isAnyPointerType()) {
180         auto &Ctx = getContext();
181         auto DstAddr = CreateMemTemp(
182             Ctx.getUIntPtrType(),
183             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
184         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
185 
186         auto *SrcAddrVal = EmitScalarConversion(
187             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
188             Ctx.getPointerType(CurField->getType()), SourceLocation());
189         LValue SrcLV =
190             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
191 
192         // Store the value using the source type pointer.
193         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
194 
195         // Load the value using the destination type pointer.
196         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
197       }
198       CapturedVars.push_back(CV);
199     } else {
200       assert(CurCap->capturesVariable() && "Expected capture by reference.");
201       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
202     }
203   }
204 }
205 
206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
207                                     StringRef Name, LValue AddrLV,
208                                     bool isReferenceType = false) {
209   ASTContext &Ctx = CGF.getContext();
210 
211   auto *CastedPtr = CGF.EmitScalarConversion(
212       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
213       Ctx.getPointerType(DstType), SourceLocation());
214   auto TmpAddr =
215       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
216           .getAddress();
217 
218   // If we are dealing with references we need to return the address of the
219   // reference instead of the reference of the value.
220   if (isReferenceType) {
221     QualType RefType = Ctx.getLValueReferenceType(DstType);
222     auto *RefVal = TmpAddr.getPointer();
223     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
224     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
225     CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
226   }
227 
228   return TmpAddr;
229 }
230 
231 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
232   if (T->isLValueReferenceType()) {
233     return C.getLValueReferenceType(
234         getCanonicalParamType(C, T.getNonReferenceType()),
235         /*SpelledAsLValue=*/false);
236   }
237   if (T->isPointerType())
238     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
239   return C.getCanonicalParamType(T);
240 }
241 
242 namespace {
243   /// Contains required data for proper outlined function codegen.
244   struct FunctionOptions {
245     /// Captured statement for which the function is generated.
246     const CapturedStmt *S = nullptr;
247     /// true if cast to/from  UIntPtr is required for variables captured by
248     /// value.
249     bool UIntPtrCastRequired = true;
250     /// true if only casted argumefnts must be registered as local args or VLA
251     /// sizes.
252     bool RegisterCastedArgsOnly = false;
253     /// Name of the generated function.
254     StringRef FunctionName;
255     explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
256                              bool RegisterCastedArgsOnly,
257                              StringRef FunctionName)
258         : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
259           RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
260           FunctionName(FunctionName) {}
261   };
262 }
263 
264 static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue(
265     CodeGenFunction &CGF, FunctionArgList &Args,
266     llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>>
267         &LocalAddrs,
268     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
269         &VLASizes,
270     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
271   const CapturedDecl *CD = FO.S->getCapturedDecl();
272   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
273   assert(CD->hasBody() && "missing CapturedDecl body");
274 
275   CXXThisValue = nullptr;
276   // Build the argument list.
277   CodeGenModule &CGM = CGF.CGM;
278   ASTContext &Ctx = CGM.getContext();
279   bool HasUIntPtrArgs = false;
280   Args.append(CD->param_begin(),
281               std::next(CD->param_begin(), CD->getContextParamPosition()));
282   auto I = FO.S->captures().begin();
283   for (auto *FD : RD->fields()) {
284     QualType ArgType = FD->getType();
285     IdentifierInfo *II = nullptr;
286     VarDecl *CapVar = nullptr;
287 
288     // If this is a capture by copy and the type is not a pointer, the outlined
289     // function argument type should be uintptr and the value properly casted to
290     // uintptr. This is necessary given that the runtime library is only able to
291     // deal with pointers. We can pass in the same way the VLA type sizes to the
292     // outlined function.
293     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
294         I->capturesVariableArrayType()) {
295       HasUIntPtrArgs = true;
296       if (FO.UIntPtrCastRequired)
297         ArgType = Ctx.getUIntPtrType();
298     }
299 
300     if (I->capturesVariable() || I->capturesVariableByCopy()) {
301       CapVar = I->getCapturedVar();
302       II = CapVar->getIdentifier();
303     } else if (I->capturesThis())
304       II = &Ctx.Idents.get("this");
305     else {
306       assert(I->capturesVariableArrayType());
307       II = &Ctx.Idents.get("vla");
308     }
309     if (ArgType->isVariablyModifiedType())
310       ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
311     Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr,
312                                              FD->getLocation(), II, ArgType,
313                                              ImplicitParamDecl::Other));
314     ++I;
315   }
316   Args.append(
317       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
318       CD->param_end());
319 
320   // Create the function declaration.
321   FunctionType::ExtInfo ExtInfo;
322   const CGFunctionInfo &FuncInfo =
323       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
324   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
325 
326   llvm::Function *F =
327       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
328                              FO.FunctionName, &CGM.getModule());
329   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
330   if (CD->isNothrow())
331     F->addFnAttr(llvm::Attribute::NoUnwind);
332 
333   // Generate the function.
334   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
335                     CD->getBody()->getLocStart());
336   unsigned Cnt = CD->getContextParamPosition();
337   I = FO.S->captures().begin();
338   for (auto *FD : RD->fields()) {
339     // If we are capturing a pointer by copy we don't need to do anything, just
340     // use the value that we get from the arguments.
341     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
342       const VarDecl *CurVD = I->getCapturedVar();
343       Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
344       // If the variable is a reference we need to materialize it here.
345       if (CurVD->getType()->isReferenceType()) {
346         Address RefAddr = CGF.CreateMemTemp(
347             CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
348         CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
349                               /*Volatile=*/false, CurVD->getType());
350         LocalAddr = RefAddr;
351       }
352       if (!FO.RegisterCastedArgsOnly)
353         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
354       ++Cnt;
355       ++I;
356       continue;
357     }
358 
359     LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
360     LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]),
361                                         Args[Cnt]->getType(), BaseInfo);
362     if (FD->hasCapturedVLAType()) {
363       if (FO.UIntPtrCastRequired) {
364         ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
365                                                           Args[Cnt]->getName(),
366                                                           ArgLVal),
367                                      FD->getType(), BaseInfo);
368       }
369       auto *ExprArg =
370           CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
371       auto VAT = FD->getCapturedVLAType();
372       VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
373     } else if (I->capturesVariable()) {
374       auto *Var = I->getCapturedVar();
375       QualType VarTy = Var->getType();
376       Address ArgAddr = ArgLVal.getAddress();
377       if (!VarTy->isReferenceType()) {
378         if (ArgLVal.getType()->isLValueReferenceType()) {
379           ArgAddr = CGF.EmitLoadOfReference(
380               ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
381         } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
382           assert(ArgLVal.getType()->isPointerType());
383           ArgAddr = CGF.EmitLoadOfPointer(
384               ArgAddr, ArgLVal.getType()->castAs<PointerType>());
385         }
386       }
387       if (!FO.RegisterCastedArgsOnly) {
388         LocalAddrs.insert(
389             {Args[Cnt],
390              {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
391       }
392     } else if (I->capturesVariableByCopy()) {
393       assert(!FD->getType()->isAnyPointerType() &&
394              "Not expecting a captured pointer.");
395       auto *Var = I->getCapturedVar();
396       QualType VarTy = Var->getType();
397       LocalAddrs.insert(
398           {Args[Cnt],
399            {Var,
400             FO.UIntPtrCastRequired
401                 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
402                                        ArgLVal, VarTy->isReferenceType())
403                 : ArgLVal.getAddress()}});
404     } else {
405       // If 'this' is captured, load it into CXXThisValue.
406       assert(I->capturesThis());
407       CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
408                          .getScalarVal();
409       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
410     }
411     ++Cnt;
412     ++I;
413   }
414 
415   return {F, HasUIntPtrArgs};
416 }
417 
418 llvm::Function *
419 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
420   assert(
421       CapturedStmtInfo &&
422       "CapturedStmtInfo should be set when generating the captured function");
423   const CapturedDecl *CD = S.getCapturedDecl();
424   // Build the argument list.
425   bool NeedWrapperFunction =
426       getDebugInfo() &&
427       CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
428   FunctionArgList Args;
429   llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
430   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
431   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
432                      CapturedStmtInfo->getHelperName());
433   llvm::Function *F;
434   bool HasUIntPtrArgs;
435   std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue(
436       *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO);
437   for (const auto &LocalAddrPair : LocalAddrs) {
438     if (LocalAddrPair.second.first) {
439       setAddrOfLocalVar(LocalAddrPair.second.first,
440                         LocalAddrPair.second.second);
441     }
442   }
443   for (const auto &VLASizePair : VLASizes)
444     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
445   PGO.assignRegionCounters(GlobalDecl(CD), F);
446   CapturedStmtInfo->EmitBody(*this, CD->getBody());
447   FinishFunction(CD->getBodyRBrace());
448   if (!NeedWrapperFunction || !HasUIntPtrArgs)
449     return F;
450 
451   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
452                             /*RegisterCastedArgsOnly=*/true,
453                             ".nondebug_wrapper.");
454   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
455   WrapperCGF.disableDebugInfo();
456   Args.clear();
457   LocalAddrs.clear();
458   VLASizes.clear();
459   llvm::Function *WrapperF =
460       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
461                                    WrapperCGF.CXXThisValue, WrapperFO).first;
462   LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
463   llvm::SmallVector<llvm::Value *, 4> CallArgs;
464   for (const auto *Arg : Args) {
465     llvm::Value *CallArg;
466     auto I = LocalAddrs.find(Arg);
467     if (I != LocalAddrs.end()) {
468       LValue LV =
469           WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo);
470       CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
471     } else {
472       auto EI = VLASizes.find(Arg);
473       if (EI != VLASizes.end())
474         CallArg = EI->second.second;
475       else {
476         LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
477                                               Arg->getType(), BaseInfo);
478         CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
479       }
480     }
481     CallArgs.emplace_back(CallArg);
482   }
483   WrapperCGF.Builder.CreateCall(F, CallArgs);
484   WrapperCGF.FinishFunction();
485   return WrapperF;
486 }
487 
488 //===----------------------------------------------------------------------===//
489 //                              OpenMP Directive Emission
490 //===----------------------------------------------------------------------===//
491 void CodeGenFunction::EmitOMPAggregateAssign(
492     Address DestAddr, Address SrcAddr, QualType OriginalType,
493     const llvm::function_ref<void(Address, Address)> &CopyGen) {
494   // Perform element-by-element initialization.
495   QualType ElementTy;
496 
497   // Drill down to the base element type on both arrays.
498   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
499   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
500   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
501 
502   auto SrcBegin = SrcAddr.getPointer();
503   auto DestBegin = DestAddr.getPointer();
504   // Cast from pointer to array type to pointer to single element.
505   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
506   // The basic structure here is a while-do loop.
507   auto BodyBB = createBasicBlock("omp.arraycpy.body");
508   auto DoneBB = createBasicBlock("omp.arraycpy.done");
509   auto IsEmpty =
510       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
511   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
512 
513   // Enter the loop body, making that address the current address.
514   auto EntryBB = Builder.GetInsertBlock();
515   EmitBlock(BodyBB);
516 
517   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
518 
519   llvm::PHINode *SrcElementPHI =
520     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
521   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
522   Address SrcElementCurrent =
523       Address(SrcElementPHI,
524               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
525 
526   llvm::PHINode *DestElementPHI =
527     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
528   DestElementPHI->addIncoming(DestBegin, EntryBB);
529   Address DestElementCurrent =
530     Address(DestElementPHI,
531             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
532 
533   // Emit copy.
534   CopyGen(DestElementCurrent, SrcElementCurrent);
535 
536   // Shift the address forward by one element.
537   auto DestElementNext = Builder.CreateConstGEP1_32(
538       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
539   auto SrcElementNext = Builder.CreateConstGEP1_32(
540       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
541   // Check whether we've reached the end.
542   auto Done =
543       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
544   Builder.CreateCondBr(Done, DoneBB, BodyBB);
545   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
546   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
547 
548   // Done.
549   EmitBlock(DoneBB, /*IsFinished=*/true);
550 }
551 
552 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
553                                   Address SrcAddr, const VarDecl *DestVD,
554                                   const VarDecl *SrcVD, const Expr *Copy) {
555   if (OriginalType->isArrayType()) {
556     auto *BO = dyn_cast<BinaryOperator>(Copy);
557     if (BO && BO->getOpcode() == BO_Assign) {
558       // Perform simple memcpy for simple copying.
559       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
560     } else {
561       // For arrays with complex element types perform element by element
562       // copying.
563       EmitOMPAggregateAssign(
564           DestAddr, SrcAddr, OriginalType,
565           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
566             // Working with the single array element, so have to remap
567             // destination and source variables to corresponding array
568             // elements.
569             CodeGenFunction::OMPPrivateScope Remap(*this);
570             Remap.addPrivate(DestVD, [DestElement]() -> Address {
571               return DestElement;
572             });
573             Remap.addPrivate(
574                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
575             (void)Remap.Privatize();
576             EmitIgnoredExpr(Copy);
577           });
578     }
579   } else {
580     // Remap pseudo source variable to private copy.
581     CodeGenFunction::OMPPrivateScope Remap(*this);
582     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
583     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
584     (void)Remap.Privatize();
585     // Emit copying of the whole variable.
586     EmitIgnoredExpr(Copy);
587   }
588 }
589 
590 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
591                                                 OMPPrivateScope &PrivateScope) {
592   if (!HaveInsertPoint())
593     return false;
594   bool FirstprivateIsLastprivate = false;
595   llvm::DenseSet<const VarDecl *> Lastprivates;
596   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
597     for (const auto *D : C->varlists())
598       Lastprivates.insert(
599           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
600   }
601   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
602   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
603   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
604     auto IRef = C->varlist_begin();
605     auto InitsRef = C->inits().begin();
606     for (auto IInit : C->private_copies()) {
607       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
608       bool ThisFirstprivateIsLastprivate =
609           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
610       auto *CapFD = CapturesInfo.lookup(OrigVD);
611       auto *FD = CapturedStmtInfo->lookup(OrigVD);
612       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
613           !FD->getType()->isReferenceType()) {
614         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
615         ++IRef;
616         ++InitsRef;
617         continue;
618       }
619       FirstprivateIsLastprivate =
620           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
621       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
622         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
623         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
624         bool IsRegistered;
625         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
626                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
627                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
628         Address OriginalAddr = EmitLValue(&DRE).getAddress();
629         QualType Type = VD->getType();
630         if (Type->isArrayType()) {
631           // Emit VarDecl with copy init for arrays.
632           // Get the address of the original variable captured in current
633           // captured region.
634           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
635             auto Emission = EmitAutoVarAlloca(*VD);
636             auto *Init = VD->getInit();
637             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
638               // Perform simple memcpy.
639               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
640                                   Type);
641             } else {
642               EmitOMPAggregateAssign(
643                   Emission.getAllocatedAddress(), OriginalAddr, Type,
644                   [this, VDInit, Init](Address DestElement,
645                                        Address SrcElement) {
646                     // Clean up any temporaries needed by the initialization.
647                     RunCleanupsScope InitScope(*this);
648                     // Emit initialization for single element.
649                     setAddrOfLocalVar(VDInit, SrcElement);
650                     EmitAnyExprToMem(Init, DestElement,
651                                      Init->getType().getQualifiers(),
652                                      /*IsInitializer*/ false);
653                     LocalDeclMap.erase(VDInit);
654                   });
655             }
656             EmitAutoVarCleanups(Emission);
657             return Emission.getAllocatedAddress();
658           });
659         } else {
660           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
661             // Emit private VarDecl with copy init.
662             // Remap temp VDInit variable to the address of the original
663             // variable
664             // (for proper handling of captured global variables).
665             setAddrOfLocalVar(VDInit, OriginalAddr);
666             EmitDecl(*VD);
667             LocalDeclMap.erase(VDInit);
668             return GetAddrOfLocalVar(VD);
669           });
670         }
671         assert(IsRegistered &&
672                "firstprivate var already registered as private");
673         // Silence the warning about unused variable.
674         (void)IsRegistered;
675       }
676       ++IRef;
677       ++InitsRef;
678     }
679   }
680   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
681 }
682 
683 void CodeGenFunction::EmitOMPPrivateClause(
684     const OMPExecutableDirective &D,
685     CodeGenFunction::OMPPrivateScope &PrivateScope) {
686   if (!HaveInsertPoint())
687     return;
688   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
689   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
690     auto IRef = C->varlist_begin();
691     for (auto IInit : C->private_copies()) {
692       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
693       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
694         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
695         bool IsRegistered =
696             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
697               // Emit private VarDecl with copy init.
698               EmitDecl(*VD);
699               return GetAddrOfLocalVar(VD);
700             });
701         assert(IsRegistered && "private var already registered as private");
702         // Silence the warning about unused variable.
703         (void)IsRegistered;
704       }
705       ++IRef;
706     }
707   }
708 }
709 
710 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
711   if (!HaveInsertPoint())
712     return false;
713   // threadprivate_var1 = master_threadprivate_var1;
714   // operator=(threadprivate_var2, master_threadprivate_var2);
715   // ...
716   // __kmpc_barrier(&loc, global_tid);
717   llvm::DenseSet<const VarDecl *> CopiedVars;
718   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
719   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
720     auto IRef = C->varlist_begin();
721     auto ISrcRef = C->source_exprs().begin();
722     auto IDestRef = C->destination_exprs().begin();
723     for (auto *AssignOp : C->assignment_ops()) {
724       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
725       QualType Type = VD->getType();
726       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
727         // Get the address of the master variable. If we are emitting code with
728         // TLS support, the address is passed from the master as field in the
729         // captured declaration.
730         Address MasterAddr = Address::invalid();
731         if (getLangOpts().OpenMPUseTLS &&
732             getContext().getTargetInfo().isTLSSupported()) {
733           assert(CapturedStmtInfo->lookup(VD) &&
734                  "Copyin threadprivates should have been captured!");
735           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
736                           VK_LValue, (*IRef)->getExprLoc());
737           MasterAddr = EmitLValue(&DRE).getAddress();
738           LocalDeclMap.erase(VD);
739         } else {
740           MasterAddr =
741             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
742                                         : CGM.GetAddrOfGlobal(VD),
743                     getContext().getDeclAlign(VD));
744         }
745         // Get the address of the threadprivate variable.
746         Address PrivateAddr = EmitLValue(*IRef).getAddress();
747         if (CopiedVars.size() == 1) {
748           // At first check if current thread is a master thread. If it is, no
749           // need to copy data.
750           CopyBegin = createBasicBlock("copyin.not.master");
751           CopyEnd = createBasicBlock("copyin.not.master.end");
752           Builder.CreateCondBr(
753               Builder.CreateICmpNE(
754                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
755                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
756               CopyBegin, CopyEnd);
757           EmitBlock(CopyBegin);
758         }
759         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
760         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
761         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
762       }
763       ++IRef;
764       ++ISrcRef;
765       ++IDestRef;
766     }
767   }
768   if (CopyEnd) {
769     // Exit out of copying procedure for non-master thread.
770     EmitBlock(CopyEnd, /*IsFinished=*/true);
771     return true;
772   }
773   return false;
774 }
775 
776 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
777     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
778   if (!HaveInsertPoint())
779     return false;
780   bool HasAtLeastOneLastprivate = false;
781   llvm::DenseSet<const VarDecl *> SIMDLCVs;
782   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
783     auto *LoopDirective = cast<OMPLoopDirective>(&D);
784     for (auto *C : LoopDirective->counters()) {
785       SIMDLCVs.insert(
786           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
787     }
788   }
789   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
790   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
791     HasAtLeastOneLastprivate = true;
792     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
793       break;
794     auto IRef = C->varlist_begin();
795     auto IDestRef = C->destination_exprs().begin();
796     for (auto *IInit : C->private_copies()) {
797       // Keep the address of the original variable for future update at the end
798       // of the loop.
799       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
800       // Taskloops do not require additional initialization, it is done in
801       // runtime support library.
802       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
803         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
804         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
805           DeclRefExpr DRE(
806               const_cast<VarDecl *>(OrigVD),
807               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
808                   OrigVD) != nullptr,
809               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
810           return EmitLValue(&DRE).getAddress();
811         });
812         // Check if the variable is also a firstprivate: in this case IInit is
813         // not generated. Initialization of this variable will happen in codegen
814         // for 'firstprivate' clause.
815         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
816           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
817           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
818             // Emit private VarDecl with copy init.
819             EmitDecl(*VD);
820             return GetAddrOfLocalVar(VD);
821           });
822           assert(IsRegistered &&
823                  "lastprivate var already registered as private");
824           (void)IsRegistered;
825         }
826       }
827       ++IRef;
828       ++IDestRef;
829     }
830   }
831   return HasAtLeastOneLastprivate;
832 }
833 
834 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
835     const OMPExecutableDirective &D, bool NoFinals,
836     llvm::Value *IsLastIterCond) {
837   if (!HaveInsertPoint())
838     return;
839   // Emit following code:
840   // if (<IsLastIterCond>) {
841   //   orig_var1 = private_orig_var1;
842   //   ...
843   //   orig_varn = private_orig_varn;
844   // }
845   llvm::BasicBlock *ThenBB = nullptr;
846   llvm::BasicBlock *DoneBB = nullptr;
847   if (IsLastIterCond) {
848     ThenBB = createBasicBlock(".omp.lastprivate.then");
849     DoneBB = createBasicBlock(".omp.lastprivate.done");
850     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
851     EmitBlock(ThenBB);
852   }
853   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
854   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
855   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
856     auto IC = LoopDirective->counters().begin();
857     for (auto F : LoopDirective->finals()) {
858       auto *D =
859           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
860       if (NoFinals)
861         AlreadyEmittedVars.insert(D);
862       else
863         LoopCountersAndUpdates[D] = F;
864       ++IC;
865     }
866   }
867   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
868     auto IRef = C->varlist_begin();
869     auto ISrcRef = C->source_exprs().begin();
870     auto IDestRef = C->destination_exprs().begin();
871     for (auto *AssignOp : C->assignment_ops()) {
872       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
873       QualType Type = PrivateVD->getType();
874       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
875       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
876         // If lastprivate variable is a loop control variable for loop-based
877         // directive, update its value before copyin back to original
878         // variable.
879         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
880           EmitIgnoredExpr(FinalExpr);
881         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
882         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
883         // Get the address of the original variable.
884         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
885         // Get the address of the private variable.
886         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
887         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
888           PrivateAddr =
889               Address(Builder.CreateLoad(PrivateAddr),
890                       getNaturalTypeAlignment(RefTy->getPointeeType()));
891         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
892       }
893       ++IRef;
894       ++ISrcRef;
895       ++IDestRef;
896     }
897     if (auto *PostUpdate = C->getPostUpdateExpr())
898       EmitIgnoredExpr(PostUpdate);
899   }
900   if (IsLastIterCond)
901     EmitBlock(DoneBB, /*IsFinished=*/true);
902 }
903 
904 void CodeGenFunction::EmitOMPReductionClauseInit(
905     const OMPExecutableDirective &D,
906     CodeGenFunction::OMPPrivateScope &PrivateScope) {
907   if (!HaveInsertPoint())
908     return;
909   SmallVector<const Expr *, 4> Shareds;
910   SmallVector<const Expr *, 4> Privates;
911   SmallVector<const Expr *, 4> ReductionOps;
912   SmallVector<const Expr *, 4> LHSs;
913   SmallVector<const Expr *, 4> RHSs;
914   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
915     auto IPriv = C->privates().begin();
916     auto IRed = C->reduction_ops().begin();
917     auto ILHS = C->lhs_exprs().begin();
918     auto IRHS = C->rhs_exprs().begin();
919     for (const auto *Ref : C->varlists()) {
920       Shareds.emplace_back(Ref);
921       Privates.emplace_back(*IPriv);
922       ReductionOps.emplace_back(*IRed);
923       LHSs.emplace_back(*ILHS);
924       RHSs.emplace_back(*IRHS);
925       std::advance(IPriv, 1);
926       std::advance(IRed, 1);
927       std::advance(ILHS, 1);
928       std::advance(IRHS, 1);
929     }
930   }
931   ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
932   unsigned Count = 0;
933   auto ILHS = LHSs.begin();
934   auto IRHS = RHSs.begin();
935   auto IPriv = Privates.begin();
936   for (const auto *IRef : Shareds) {
937     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
938     // Emit private VarDecl with reduction init.
939     RedCG.emitSharedLValue(*this, Count);
940     RedCG.emitAggregateType(*this, Count);
941     auto Emission = EmitAutoVarAlloca(*PrivateVD);
942     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
943                              RedCG.getSharedLValue(Count),
944                              [&Emission](CodeGenFunction &CGF) {
945                                CGF.EmitAutoVarInit(Emission);
946                                return true;
947                              });
948     EmitAutoVarCleanups(Emission);
949     Address BaseAddr = RedCG.adjustPrivateAddress(
950         *this, Count, Emission.getAllocatedAddress());
951     bool IsRegistered = PrivateScope.addPrivate(
952         RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
953     assert(IsRegistered && "private var already registered as private");
954     // Silence the warning about unused variable.
955     (void)IsRegistered;
956 
957     auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
958     auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
959     if (isa<OMPArraySectionExpr>(IRef)) {
960       // Store the address of the original variable associated with the LHS
961       // implicit variable.
962       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
963         return RedCG.getSharedLValue(Count).getAddress();
964       });
965       PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
966         return GetAddrOfLocalVar(PrivateVD);
967       });
968     } else if (isa<ArraySubscriptExpr>(IRef)) {
969       // Store the address of the original variable associated with the LHS
970       // implicit variable.
971       PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
972         return RedCG.getSharedLValue(Count).getAddress();
973       });
974       PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
975         return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
976                                             ConvertTypeForMem(RHSVD->getType()),
977                                             "rhs.begin");
978       });
979     } else {
980       QualType Type = PrivateVD->getType();
981       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
982       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
983       // Store the address of the original variable associated with the LHS
984       // implicit variable.
985       if (IsArray) {
986         OriginalAddr = Builder.CreateElementBitCast(
987             OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
988       }
989       PrivateScope.addPrivate(
990           LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
991       PrivateScope.addPrivate(
992           RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
993             return IsArray
994                        ? Builder.CreateElementBitCast(
995                              GetAddrOfLocalVar(PrivateVD),
996                              ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
997                        : GetAddrOfLocalVar(PrivateVD);
998           });
999     }
1000     ++ILHS;
1001     ++IRHS;
1002     ++IPriv;
1003     ++Count;
1004   }
1005 }
1006 
1007 void CodeGenFunction::EmitOMPReductionClauseFinal(
1008     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1009   if (!HaveInsertPoint())
1010     return;
1011   llvm::SmallVector<const Expr *, 8> Privates;
1012   llvm::SmallVector<const Expr *, 8> LHSExprs;
1013   llvm::SmallVector<const Expr *, 8> RHSExprs;
1014   llvm::SmallVector<const Expr *, 8> ReductionOps;
1015   bool HasAtLeastOneReduction = false;
1016   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1017     HasAtLeastOneReduction = true;
1018     Privates.append(C->privates().begin(), C->privates().end());
1019     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1020     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1021     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1022   }
1023   if (HasAtLeastOneReduction) {
1024     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1025                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1026                       D.getDirectiveKind() == OMPD_simd;
1027     bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1028     // Emit nowait reduction if nowait clause is present or directive is a
1029     // parallel directive (it always has implicit barrier).
1030     CGM.getOpenMPRuntime().emitReduction(
1031         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1032         {WithNowait, SimpleReduction, ReductionKind});
1033   }
1034 }
1035 
1036 static void emitPostUpdateForReductionClause(
1037     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1038     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1039   if (!CGF.HaveInsertPoint())
1040     return;
1041   llvm::BasicBlock *DoneBB = nullptr;
1042   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1043     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1044       if (!DoneBB) {
1045         if (auto *Cond = CondGen(CGF)) {
1046           // If the first post-update expression is found, emit conditional
1047           // block if it was requested.
1048           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1049           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1050           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1051           CGF.EmitBlock(ThenBB);
1052         }
1053       }
1054       CGF.EmitIgnoredExpr(PostUpdate);
1055     }
1056   }
1057   if (DoneBB)
1058     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1059 }
1060 
1061 namespace {
1062 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1063 /// parallel function. This is necessary for combined constructs such as
1064 /// 'distribute parallel for'
1065 typedef llvm::function_ref<void(CodeGenFunction &,
1066                                 const OMPExecutableDirective &,
1067                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1068     CodeGenBoundParametersTy;
1069 } // anonymous namespace
1070 
1071 static void emitCommonOMPParallelDirective(
1072     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1073     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1074     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1075   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1076   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1077       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1078   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1079     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1080     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1081                                          /*IgnoreResultAssign*/ true);
1082     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1083         CGF, NumThreads, NumThreadsClause->getLocStart());
1084   }
1085   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1086     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1087     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1088         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1089   }
1090   const Expr *IfCond = nullptr;
1091   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1092     if (C->getNameModifier() == OMPD_unknown ||
1093         C->getNameModifier() == OMPD_parallel) {
1094       IfCond = C->getCondition();
1095       break;
1096     }
1097   }
1098 
1099   OMPParallelScope Scope(CGF, S);
1100   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1101   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1102   // lower and upper bounds with the pragma 'for' chunking mechanism.
1103   // The following lambda takes care of appending the lower and upper bound
1104   // parameters when necessary
1105   CodeGenBoundParameters(CGF, S, CapturedVars);
1106   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1107   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1108                                               CapturedVars, IfCond);
1109 }
1110 
1111 static void emitEmptyBoundParameters(CodeGenFunction &,
1112                                      const OMPExecutableDirective &,
1113                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1114 
1115 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1116   // Emit parallel region as a standalone region.
1117   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1118     OMPPrivateScope PrivateScope(CGF);
1119     bool Copyins = CGF.EmitOMPCopyinClause(S);
1120     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1121     if (Copyins) {
1122       // Emit implicit barrier to synchronize threads and avoid data races on
1123       // propagation master's thread values of threadprivate variables to local
1124       // instances of that variables of all other implicit threads.
1125       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1126           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1127           /*ForceSimpleCall=*/true);
1128     }
1129     CGF.EmitOMPPrivateClause(S, PrivateScope);
1130     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1131     (void)PrivateScope.Privatize();
1132     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1133     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1134   };
1135   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1136                                  emitEmptyBoundParameters);
1137   emitPostUpdateForReductionClause(
1138       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1139 }
1140 
1141 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1142                                       JumpDest LoopExit) {
1143   RunCleanupsScope BodyScope(*this);
1144   // Update counters values on current iteration.
1145   for (auto I : D.updates()) {
1146     EmitIgnoredExpr(I);
1147   }
1148   // Update the linear variables.
1149   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1150     for (auto *U : C->updates())
1151       EmitIgnoredExpr(U);
1152   }
1153 
1154   // On a continue in the body, jump to the end.
1155   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1156   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1157   // Emit loop body.
1158   EmitStmt(D.getBody());
1159   // The end (updates/cleanups).
1160   EmitBlock(Continue.getBlock());
1161   BreakContinueStack.pop_back();
1162 }
1163 
1164 void CodeGenFunction::EmitOMPInnerLoop(
1165     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1166     const Expr *IncExpr,
1167     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1168     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1169   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1170 
1171   // Start the loop with a block that tests the condition.
1172   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1173   EmitBlock(CondBlock);
1174   const SourceRange &R = S.getSourceRange();
1175   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1176                  SourceLocToDebugLoc(R.getEnd()));
1177 
1178   // If there are any cleanups between here and the loop-exit scope,
1179   // create a block to stage a loop exit along.
1180   auto ExitBlock = LoopExit.getBlock();
1181   if (RequiresCleanup)
1182     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1183 
1184   auto LoopBody = createBasicBlock("omp.inner.for.body");
1185 
1186   // Emit condition.
1187   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1188   if (ExitBlock != LoopExit.getBlock()) {
1189     EmitBlock(ExitBlock);
1190     EmitBranchThroughCleanup(LoopExit);
1191   }
1192 
1193   EmitBlock(LoopBody);
1194   incrementProfileCounter(&S);
1195 
1196   // Create a block for the increment.
1197   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1198   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1199 
1200   BodyGen(*this);
1201 
1202   // Emit "IV = IV + 1" and a back-edge to the condition block.
1203   EmitBlock(Continue.getBlock());
1204   EmitIgnoredExpr(IncExpr);
1205   PostIncGen(*this);
1206   BreakContinueStack.pop_back();
1207   EmitBranch(CondBlock);
1208   LoopStack.pop();
1209   // Emit the fall-through block.
1210   EmitBlock(LoopExit.getBlock());
1211 }
1212 
1213 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1214   if (!HaveInsertPoint())
1215     return false;
1216   // Emit inits for the linear variables.
1217   bool HasLinears = false;
1218   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1219     for (auto *Init : C->inits()) {
1220       HasLinears = true;
1221       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1222       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1223         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1224         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1225         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1226                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1227                         VD->getInit()->getType(), VK_LValue,
1228                         VD->getInit()->getExprLoc());
1229         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1230                                                 VD->getType()),
1231                        /*capturedByInit=*/false);
1232         EmitAutoVarCleanups(Emission);
1233       } else
1234         EmitVarDecl(*VD);
1235     }
1236     // Emit the linear steps for the linear clauses.
1237     // If a step is not constant, it is pre-calculated before the loop.
1238     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1239       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1240         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1241         // Emit calculation of the linear step.
1242         EmitIgnoredExpr(CS);
1243       }
1244   }
1245   return HasLinears;
1246 }
1247 
1248 void CodeGenFunction::EmitOMPLinearClauseFinal(
1249     const OMPLoopDirective &D,
1250     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1251   if (!HaveInsertPoint())
1252     return;
1253   llvm::BasicBlock *DoneBB = nullptr;
1254   // Emit the final values of the linear variables.
1255   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1256     auto IC = C->varlist_begin();
1257     for (auto *F : C->finals()) {
1258       if (!DoneBB) {
1259         if (auto *Cond = CondGen(*this)) {
1260           // If the first post-update expression is found, emit conditional
1261           // block if it was requested.
1262           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1263           DoneBB = createBasicBlock(".omp.linear.pu.done");
1264           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1265           EmitBlock(ThenBB);
1266         }
1267       }
1268       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1269       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1270                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1271                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1272       Address OrigAddr = EmitLValue(&DRE).getAddress();
1273       CodeGenFunction::OMPPrivateScope VarScope(*this);
1274       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1275       (void)VarScope.Privatize();
1276       EmitIgnoredExpr(F);
1277       ++IC;
1278     }
1279     if (auto *PostUpdate = C->getPostUpdateExpr())
1280       EmitIgnoredExpr(PostUpdate);
1281   }
1282   if (DoneBB)
1283     EmitBlock(DoneBB, /*IsFinished=*/true);
1284 }
1285 
1286 static void emitAlignedClause(CodeGenFunction &CGF,
1287                               const OMPExecutableDirective &D) {
1288   if (!CGF.HaveInsertPoint())
1289     return;
1290   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1291     unsigned ClauseAlignment = 0;
1292     if (auto AlignmentExpr = Clause->getAlignment()) {
1293       auto AlignmentCI =
1294           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1295       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1296     }
1297     for (auto E : Clause->varlists()) {
1298       unsigned Alignment = ClauseAlignment;
1299       if (Alignment == 0) {
1300         // OpenMP [2.8.1, Description]
1301         // If no optional parameter is specified, implementation-defined default
1302         // alignments for SIMD instructions on the target platforms are assumed.
1303         Alignment =
1304             CGF.getContext()
1305                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1306                     E->getType()->getPointeeType()))
1307                 .getQuantity();
1308       }
1309       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1310              "alignment is not power of 2");
1311       if (Alignment != 0) {
1312         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1313         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1314       }
1315     }
1316   }
1317 }
1318 
1319 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1320     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1321   if (!HaveInsertPoint())
1322     return;
1323   auto I = S.private_counters().begin();
1324   for (auto *E : S.counters()) {
1325     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1326     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1327     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1328       // Emit var without initialization.
1329       if (!LocalDeclMap.count(PrivateVD)) {
1330         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1331         EmitAutoVarCleanups(VarEmission);
1332       }
1333       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1334                       /*RefersToEnclosingVariableOrCapture=*/false,
1335                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1336       return EmitLValue(&DRE).getAddress();
1337     });
1338     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1339         VD->hasGlobalStorage()) {
1340       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1341         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1342                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1343                         E->getType(), VK_LValue, E->getExprLoc());
1344         return EmitLValue(&DRE).getAddress();
1345       });
1346     }
1347     ++I;
1348   }
1349 }
1350 
1351 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1352                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1353                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1354   if (!CGF.HaveInsertPoint())
1355     return;
1356   {
1357     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1358     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1359     (void)PreCondScope.Privatize();
1360     // Get initial values of real counters.
1361     for (auto I : S.inits()) {
1362       CGF.EmitIgnoredExpr(I);
1363     }
1364   }
1365   // Check that loop is executed at least one time.
1366   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1367 }
1368 
1369 void CodeGenFunction::EmitOMPLinearClause(
1370     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1371   if (!HaveInsertPoint())
1372     return;
1373   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1374   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1375     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1376     for (auto *C : LoopDirective->counters()) {
1377       SIMDLCVs.insert(
1378           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1379     }
1380   }
1381   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1382     auto CurPrivate = C->privates().begin();
1383     for (auto *E : C->varlists()) {
1384       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1385       auto *PrivateVD =
1386           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1387       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1388         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1389           // Emit private VarDecl with copy init.
1390           EmitVarDecl(*PrivateVD);
1391           return GetAddrOfLocalVar(PrivateVD);
1392         });
1393         assert(IsRegistered && "linear var already registered as private");
1394         // Silence the warning about unused variable.
1395         (void)IsRegistered;
1396       } else
1397         EmitVarDecl(*PrivateVD);
1398       ++CurPrivate;
1399     }
1400   }
1401 }
1402 
1403 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1404                                      const OMPExecutableDirective &D,
1405                                      bool IsMonotonic) {
1406   if (!CGF.HaveInsertPoint())
1407     return;
1408   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1409     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1410                                  /*ignoreResult=*/true);
1411     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1412     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1413     // In presence of finite 'safelen', it may be unsafe to mark all
1414     // the memory instructions parallel, because loop-carried
1415     // dependences of 'safelen' iterations are possible.
1416     if (!IsMonotonic)
1417       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1418   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1419     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1420                                  /*ignoreResult=*/true);
1421     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1422     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1423     // In presence of finite 'safelen', it may be unsafe to mark all
1424     // the memory instructions parallel, because loop-carried
1425     // dependences of 'safelen' iterations are possible.
1426     CGF.LoopStack.setParallel(false);
1427   }
1428 }
1429 
1430 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1431                                       bool IsMonotonic) {
1432   // Walk clauses and process safelen/lastprivate.
1433   LoopStack.setParallel(!IsMonotonic);
1434   LoopStack.setVectorizeEnable(true);
1435   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1436 }
1437 
1438 void CodeGenFunction::EmitOMPSimdFinal(
1439     const OMPLoopDirective &D,
1440     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1441   if (!HaveInsertPoint())
1442     return;
1443   llvm::BasicBlock *DoneBB = nullptr;
1444   auto IC = D.counters().begin();
1445   auto IPC = D.private_counters().begin();
1446   for (auto F : D.finals()) {
1447     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1448     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1449     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1450     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1451         OrigVD->hasGlobalStorage() || CED) {
1452       if (!DoneBB) {
1453         if (auto *Cond = CondGen(*this)) {
1454           // If the first post-update expression is found, emit conditional
1455           // block if it was requested.
1456           auto *ThenBB = createBasicBlock(".omp.final.then");
1457           DoneBB = createBasicBlock(".omp.final.done");
1458           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1459           EmitBlock(ThenBB);
1460         }
1461       }
1462       Address OrigAddr = Address::invalid();
1463       if (CED)
1464         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1465       else {
1466         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1467                         /*RefersToEnclosingVariableOrCapture=*/false,
1468                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1469         OrigAddr = EmitLValue(&DRE).getAddress();
1470       }
1471       OMPPrivateScope VarScope(*this);
1472       VarScope.addPrivate(OrigVD,
1473                           [OrigAddr]() -> Address { return OrigAddr; });
1474       (void)VarScope.Privatize();
1475       EmitIgnoredExpr(F);
1476     }
1477     ++IC;
1478     ++IPC;
1479   }
1480   if (DoneBB)
1481     EmitBlock(DoneBB, /*IsFinished=*/true);
1482 }
1483 
1484 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1485                                          const OMPLoopDirective &S,
1486                                          CodeGenFunction::JumpDest LoopExit) {
1487   CGF.EmitOMPLoopBody(S, LoopExit);
1488   CGF.EmitStopPoint(&S);
1489 }
1490 
1491 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1492   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1493     OMPLoopScope PreInitScope(CGF, S);
1494     // if (PreCond) {
1495     //   for (IV in 0..LastIteration) BODY;
1496     //   <Final counter/linear vars updates>;
1497     // }
1498     //
1499 
1500     // Emit: if (PreCond) - begin.
1501     // If the condition constant folds and can be elided, avoid emitting the
1502     // whole loop.
1503     bool CondConstant;
1504     llvm::BasicBlock *ContBlock = nullptr;
1505     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1506       if (!CondConstant)
1507         return;
1508     } else {
1509       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1510       ContBlock = CGF.createBasicBlock("simd.if.end");
1511       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1512                   CGF.getProfileCount(&S));
1513       CGF.EmitBlock(ThenBlock);
1514       CGF.incrementProfileCounter(&S);
1515     }
1516 
1517     // Emit the loop iteration variable.
1518     const Expr *IVExpr = S.getIterationVariable();
1519     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1520     CGF.EmitVarDecl(*IVDecl);
1521     CGF.EmitIgnoredExpr(S.getInit());
1522 
1523     // Emit the iterations count variable.
1524     // If it is not a variable, Sema decided to calculate iterations count on
1525     // each iteration (e.g., it is foldable into a constant).
1526     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1527       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1528       // Emit calculation of the iterations count.
1529       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1530     }
1531 
1532     CGF.EmitOMPSimdInit(S);
1533 
1534     emitAlignedClause(CGF, S);
1535     (void)CGF.EmitOMPLinearClauseInit(S);
1536     {
1537       OMPPrivateScope LoopScope(CGF);
1538       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1539       CGF.EmitOMPLinearClause(S, LoopScope);
1540       CGF.EmitOMPPrivateClause(S, LoopScope);
1541       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1542       bool HasLastprivateClause =
1543           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1544       (void)LoopScope.Privatize();
1545       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1546                            S.getInc(),
1547                            [&S](CodeGenFunction &CGF) {
1548                              CGF.EmitOMPLoopBody(S, JumpDest());
1549                              CGF.EmitStopPoint(&S);
1550                            },
1551                            [](CodeGenFunction &) {});
1552       CGF.EmitOMPSimdFinal(
1553           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1554       // Emit final copy of the lastprivate variables at the end of loops.
1555       if (HasLastprivateClause)
1556         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1557       CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1558       emitPostUpdateForReductionClause(
1559           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1560     }
1561     CGF.EmitOMPLinearClauseFinal(
1562         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1563     // Emit: if (PreCond) - end.
1564     if (ContBlock) {
1565       CGF.EmitBranch(ContBlock);
1566       CGF.EmitBlock(ContBlock, true);
1567     }
1568   };
1569   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1570   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1571 }
1572 
1573 void CodeGenFunction::EmitOMPOuterLoop(
1574     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1575     CodeGenFunction::OMPPrivateScope &LoopScope,
1576     const CodeGenFunction::OMPLoopArguments &LoopArgs,
1577     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1578     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1579   auto &RT = CGM.getOpenMPRuntime();
1580 
1581   const Expr *IVExpr = S.getIterationVariable();
1582   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1583   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1584 
1585   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1586 
1587   // Start the loop with a block that tests the condition.
1588   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1589   EmitBlock(CondBlock);
1590   const SourceRange &R = S.getSourceRange();
1591   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1592                  SourceLocToDebugLoc(R.getEnd()));
1593 
1594   llvm::Value *BoolCondVal = nullptr;
1595   if (!DynamicOrOrdered) {
1596     // UB = min(UB, GlobalUB) or
1597     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1598     // 'distribute parallel for')
1599     EmitIgnoredExpr(LoopArgs.EUB);
1600     // IV = LB
1601     EmitIgnoredExpr(LoopArgs.Init);
1602     // IV < UB
1603     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1604   } else {
1605     BoolCondVal =
1606         RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1607                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1608   }
1609 
1610   // If there are any cleanups between here and the loop-exit scope,
1611   // create a block to stage a loop exit along.
1612   auto ExitBlock = LoopExit.getBlock();
1613   if (LoopScope.requiresCleanups())
1614     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1615 
1616   auto LoopBody = createBasicBlock("omp.dispatch.body");
1617   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1618   if (ExitBlock != LoopExit.getBlock()) {
1619     EmitBlock(ExitBlock);
1620     EmitBranchThroughCleanup(LoopExit);
1621   }
1622   EmitBlock(LoopBody);
1623 
1624   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1625   // LB for loop condition and emitted it above).
1626   if (DynamicOrOrdered)
1627     EmitIgnoredExpr(LoopArgs.Init);
1628 
1629   // Create a block for the increment.
1630   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1631   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1632 
1633   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1634   // with dynamic/guided scheduling and without ordered clause.
1635   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1636     LoopStack.setParallel(!IsMonotonic);
1637   else
1638     EmitOMPSimdInit(S, IsMonotonic);
1639 
1640   SourceLocation Loc = S.getLocStart();
1641 
1642   // when 'distribute' is not combined with a 'for':
1643   // while (idx <= UB) { BODY; ++idx; }
1644   // when 'distribute' is combined with a 'for'
1645   // (e.g. 'distribute parallel for')
1646   // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1647   EmitOMPInnerLoop(
1648       S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1649       [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1650         CodeGenLoop(CGF, S, LoopExit);
1651       },
1652       [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1653         CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1654       });
1655 
1656   EmitBlock(Continue.getBlock());
1657   BreakContinueStack.pop_back();
1658   if (!DynamicOrOrdered) {
1659     // Emit "LB = LB + Stride", "UB = UB + Stride".
1660     EmitIgnoredExpr(LoopArgs.NextLB);
1661     EmitIgnoredExpr(LoopArgs.NextUB);
1662   }
1663 
1664   EmitBranch(CondBlock);
1665   LoopStack.pop();
1666   // Emit the fall-through block.
1667   EmitBlock(LoopExit.getBlock());
1668 
1669   // Tell the runtime we are done.
1670   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1671     if (!DynamicOrOrdered)
1672       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
1673   };
1674   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1675 }
1676 
1677 void CodeGenFunction::EmitOMPForOuterLoop(
1678     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1679     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1680     const OMPLoopArguments &LoopArgs,
1681     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1682   auto &RT = CGM.getOpenMPRuntime();
1683 
1684   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1685   const bool DynamicOrOrdered =
1686       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1687 
1688   assert((Ordered ||
1689           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1690                                  LoopArgs.Chunk != nullptr)) &&
1691          "static non-chunked schedule does not need outer loop");
1692 
1693   // Emit outer loop.
1694   //
1695   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1696   // When schedule(dynamic,chunk_size) is specified, the iterations are
1697   // distributed to threads in the team in chunks as the threads request them.
1698   // Each thread executes a chunk of iterations, then requests another chunk,
1699   // until no chunks remain to be distributed. Each chunk contains chunk_size
1700   // iterations, except for the last chunk to be distributed, which may have
1701   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1702   //
1703   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1704   // to threads in the team in chunks as the executing threads request them.
1705   // Each thread executes a chunk of iterations, then requests another chunk,
1706   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1707   // each chunk is proportional to the number of unassigned iterations divided
1708   // by the number of threads in the team, decreasing to 1. For a chunk_size
1709   // with value k (greater than 1), the size of each chunk is determined in the
1710   // same way, with the restriction that the chunks do not contain fewer than k
1711   // iterations (except for the last chunk to be assigned, which may have fewer
1712   // than k iterations).
1713   //
1714   // When schedule(auto) is specified, the decision regarding scheduling is
1715   // delegated to the compiler and/or runtime system. The programmer gives the
1716   // implementation the freedom to choose any possible mapping of iterations to
1717   // threads in the team.
1718   //
1719   // When schedule(runtime) is specified, the decision regarding scheduling is
1720   // deferred until run time, and the schedule and chunk size are taken from the
1721   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1722   // implementation defined
1723   //
1724   // while(__kmpc_dispatch_next(&LB, &UB)) {
1725   //   idx = LB;
1726   //   while (idx <= UB) { BODY; ++idx;
1727   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1728   //   } // inner loop
1729   // }
1730   //
1731   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1732   // When schedule(static, chunk_size) is specified, iterations are divided into
1733   // chunks of size chunk_size, and the chunks are assigned to the threads in
1734   // the team in a round-robin fashion in the order of the thread number.
1735   //
1736   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1737   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1738   //   LB = LB + ST;
1739   //   UB = UB + ST;
1740   // }
1741   //
1742 
1743   const Expr *IVExpr = S.getIterationVariable();
1744   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1745   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1746 
1747   if (DynamicOrOrdered) {
1748     auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
1749     llvm::Value *LBVal = DispatchBounds.first;
1750     llvm::Value *UBVal = DispatchBounds.second;
1751     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
1752                                                              LoopArgs.Chunk};
1753     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1754                            IVSigned, Ordered, DipatchRTInputValues);
1755   } else {
1756     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1757                          Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
1758                          LoopArgs.ST, LoopArgs.Chunk);
1759   }
1760 
1761   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
1762                                     const unsigned IVSize,
1763                                     const bool IVSigned) {
1764     if (Ordered) {
1765       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
1766                                                             IVSigned);
1767     }
1768   };
1769 
1770   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1771                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
1772   OuterLoopArgs.IncExpr = S.getInc();
1773   OuterLoopArgs.Init = S.getInit();
1774   OuterLoopArgs.Cond = S.getCond();
1775   OuterLoopArgs.NextLB = S.getNextLowerBound();
1776   OuterLoopArgs.NextUB = S.getNextUpperBound();
1777   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
1778                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
1779 }
1780 
1781 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
1782                              const unsigned IVSize, const bool IVSigned) {}
1783 
1784 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1785     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
1786     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
1787     const CodeGenLoopTy &CodeGenLoopContent) {
1788 
1789   auto &RT = CGM.getOpenMPRuntime();
1790 
1791   // Emit outer loop.
1792   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1793   // dynamic
1794   //
1795 
1796   const Expr *IVExpr = S.getIterationVariable();
1797   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1798   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1799 
1800   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1801                               IVSigned, /* Ordered = */ false, LoopArgs.IL,
1802                               LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
1803                               LoopArgs.Chunk);
1804 
1805   // for combined 'distribute' and 'for' the increment expression of distribute
1806   // is store in DistInc. For 'distribute' alone, it is in Inc.
1807   Expr *IncExpr;
1808   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
1809     IncExpr = S.getDistInc();
1810   else
1811     IncExpr = S.getInc();
1812 
1813   // this routine is shared by 'omp distribute parallel for' and
1814   // 'omp distribute': select the right EUB expression depending on the
1815   // directive
1816   OMPLoopArguments OuterLoopArgs;
1817   OuterLoopArgs.LB = LoopArgs.LB;
1818   OuterLoopArgs.UB = LoopArgs.UB;
1819   OuterLoopArgs.ST = LoopArgs.ST;
1820   OuterLoopArgs.IL = LoopArgs.IL;
1821   OuterLoopArgs.Chunk = LoopArgs.Chunk;
1822   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1823                           ? S.getCombinedEnsureUpperBound()
1824                           : S.getEnsureUpperBound();
1825   OuterLoopArgs.IncExpr = IncExpr;
1826   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1827                            ? S.getCombinedInit()
1828                            : S.getInit();
1829   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1830                            ? S.getCombinedCond()
1831                            : S.getCond();
1832   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1833                              ? S.getCombinedNextLowerBound()
1834                              : S.getNextLowerBound();
1835   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
1836                              ? S.getCombinedNextUpperBound()
1837                              : S.getNextUpperBound();
1838 
1839   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
1840                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
1841                    emitEmptyOrdered);
1842 }
1843 
1844 /// Emit a helper variable and return corresponding lvalue.
1845 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1846                                const DeclRefExpr *Helper) {
1847   auto VDecl = cast<VarDecl>(Helper->getDecl());
1848   CGF.EmitVarDecl(*VDecl);
1849   return CGF.EmitLValue(Helper);
1850 }
1851 
1852 static std::pair<LValue, LValue>
1853 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
1854                                      const OMPExecutableDirective &S) {
1855   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1856   LValue LB =
1857       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
1858   LValue UB =
1859       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
1860 
1861   // When composing 'distribute' with 'for' (e.g. as in 'distribute
1862   // parallel for') we need to use the 'distribute'
1863   // chunk lower and upper bounds rather than the whole loop iteration
1864   // space. These are parameters to the outlined function for 'parallel'
1865   // and we copy the bounds of the previous schedule into the
1866   // the current ones.
1867   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
1868   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
1869   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
1870   PrevLBVal = CGF.EmitScalarConversion(
1871       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
1872       LS.getIterationVariable()->getType(), SourceLocation());
1873   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
1874   PrevUBVal = CGF.EmitScalarConversion(
1875       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
1876       LS.getIterationVariable()->getType(), SourceLocation());
1877 
1878   CGF.EmitStoreOfScalar(PrevLBVal, LB);
1879   CGF.EmitStoreOfScalar(PrevUBVal, UB);
1880 
1881   return {LB, UB};
1882 }
1883 
1884 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
1885 /// we need to use the LB and UB expressions generated by the worksharing
1886 /// code generation support, whereas in non combined situations we would
1887 /// just emit 0 and the LastIteration expression
1888 /// This function is necessary due to the difference of the LB and UB
1889 /// types for the RT emission routines for 'for_static_init' and
1890 /// 'for_dispatch_init'
1891 static std::pair<llvm::Value *, llvm::Value *>
1892 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
1893                                         const OMPExecutableDirective &S,
1894                                         Address LB, Address UB) {
1895   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
1896   const Expr *IVExpr = LS.getIterationVariable();
1897   // when implementing a dynamic schedule for a 'for' combined with a
1898   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
1899   // is not normalized as each team only executes its own assigned
1900   // distribute chunk
1901   QualType IteratorTy = IVExpr->getType();
1902   llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
1903                                             SourceLocation());
1904   llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
1905                                             SourceLocation());
1906   return {LBVal, UBVal};
1907 }
1908 
1909 static void emitDistributeParallelForDistributeInnerBoundParams(
1910     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1911     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
1912   const auto &Dir = cast<OMPLoopDirective>(S);
1913   LValue LB =
1914       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
1915   auto LBCast = CGF.Builder.CreateIntCast(
1916       CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1917   CapturedVars.push_back(LBCast);
1918   LValue UB =
1919       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
1920 
1921   auto UBCast = CGF.Builder.CreateIntCast(
1922       CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
1923   CapturedVars.push_back(UBCast);
1924 }
1925 
1926 static void
1927 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
1928                                  const OMPLoopDirective &S,
1929                                  CodeGenFunction::JumpDest LoopExit) {
1930   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
1931                                          PrePostActionTy &) {
1932     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
1933                                emitDistributeParallelForInnerBounds,
1934                                emitDistributeParallelForDispatchBounds);
1935   };
1936 
1937   emitCommonOMPParallelDirective(
1938       CGF, S, OMPD_for, CGInlinedWorksharingLoop,
1939       emitDistributeParallelForDistributeInnerBoundParams);
1940 }
1941 
1942 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1943     const OMPDistributeParallelForDirective &S) {
1944   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1945     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
1946                               S.getDistInc());
1947   };
1948   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1949   OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
1950                                   /*HasCancel=*/false);
1951   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
1952                                               /*HasCancel=*/false);
1953 }
1954 
1955 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
1956     const OMPDistributeParallelForSimdDirective &S) {
1957   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1958   CGM.getOpenMPRuntime().emitInlinedDirective(
1959       *this, OMPD_distribute_parallel_for_simd,
1960       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1961         OMPLoopScope PreInitScope(CGF, S);
1962         CGF.EmitStmt(
1963             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1964       });
1965 }
1966 
1967 void CodeGenFunction::EmitOMPDistributeSimdDirective(
1968     const OMPDistributeSimdDirective &S) {
1969   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1970   CGM.getOpenMPRuntime().emitInlinedDirective(
1971       *this, OMPD_distribute_simd,
1972       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1973         OMPLoopScope PreInitScope(CGF, S);
1974         CGF.EmitStmt(
1975             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1976       });
1977 }
1978 
1979 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
1980     const OMPTargetParallelForSimdDirective &S) {
1981   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1982   CGM.getOpenMPRuntime().emitInlinedDirective(
1983       *this, OMPD_target_parallel_for_simd,
1984       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1985         OMPLoopScope PreInitScope(CGF, S);
1986         CGF.EmitStmt(
1987             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1988       });
1989 }
1990 
1991 void CodeGenFunction::EmitOMPTargetSimdDirective(
1992     const OMPTargetSimdDirective &S) {
1993   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1994   CGM.getOpenMPRuntime().emitInlinedDirective(
1995       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1996         OMPLoopScope PreInitScope(CGF, S);
1997         CGF.EmitStmt(
1998             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1999       });
2000 }
2001 
2002 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
2003     const OMPTeamsDistributeDirective &S) {
2004   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2005   CGM.getOpenMPRuntime().emitInlinedDirective(
2006       *this, OMPD_teams_distribute,
2007       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2008         OMPLoopScope PreInitScope(CGF, S);
2009         CGF.EmitStmt(
2010             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2011       });
2012 }
2013 
2014 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2015     const OMPTeamsDistributeSimdDirective &S) {
2016   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2017   CGM.getOpenMPRuntime().emitInlinedDirective(
2018       *this, OMPD_teams_distribute_simd,
2019       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2020         OMPLoopScope PreInitScope(CGF, S);
2021         CGF.EmitStmt(
2022             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2023       });
2024 }
2025 
2026 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2027     const OMPTeamsDistributeParallelForSimdDirective &S) {
2028   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2029   CGM.getOpenMPRuntime().emitInlinedDirective(
2030       *this, OMPD_teams_distribute_parallel_for_simd,
2031       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2032         OMPLoopScope PreInitScope(CGF, S);
2033         CGF.EmitStmt(
2034             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2035       });
2036 }
2037 
2038 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2039     const OMPTeamsDistributeParallelForDirective &S) {
2040   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2041   CGM.getOpenMPRuntime().emitInlinedDirective(
2042       *this, OMPD_teams_distribute_parallel_for,
2043       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2044         OMPLoopScope PreInitScope(CGF, S);
2045         CGF.EmitStmt(
2046             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2047       });
2048 }
2049 
2050 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2051     const OMPTargetTeamsDistributeDirective &S) {
2052   CGM.getOpenMPRuntime().emitInlinedDirective(
2053       *this, OMPD_target_teams_distribute,
2054       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2055         CGF.EmitStmt(
2056             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2057       });
2058 }
2059 
2060 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2061     const OMPTargetTeamsDistributeParallelForDirective &S) {
2062   CGM.getOpenMPRuntime().emitInlinedDirective(
2063       *this, OMPD_target_teams_distribute_parallel_for,
2064       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2065         CGF.EmitStmt(
2066             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2067       });
2068 }
2069 
2070 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2071     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2072   CGM.getOpenMPRuntime().emitInlinedDirective(
2073       *this, OMPD_target_teams_distribute_parallel_for_simd,
2074       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2075         CGF.EmitStmt(
2076             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2077       });
2078 }
2079 
2080 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2081     const OMPTargetTeamsDistributeSimdDirective &S) {
2082   CGM.getOpenMPRuntime().emitInlinedDirective(
2083       *this, OMPD_target_teams_distribute_simd,
2084       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2085         CGF.EmitStmt(
2086             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2087       });
2088 }
2089 
2090 namespace {
2091   struct ScheduleKindModifiersTy {
2092     OpenMPScheduleClauseKind Kind;
2093     OpenMPScheduleClauseModifier M1;
2094     OpenMPScheduleClauseModifier M2;
2095     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2096                             OpenMPScheduleClauseModifier M1,
2097                             OpenMPScheduleClauseModifier M2)
2098         : Kind(Kind), M1(M1), M2(M2) {}
2099   };
2100 } // namespace
2101 
2102 bool CodeGenFunction::EmitOMPWorksharingLoop(
2103     const OMPLoopDirective &S, Expr *EUB,
2104     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2105     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2106   // Emit the loop iteration variable.
2107   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2108   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2109   EmitVarDecl(*IVDecl);
2110 
2111   // Emit the iterations count variable.
2112   // If it is not a variable, Sema decided to calculate iterations count on each
2113   // iteration (e.g., it is foldable into a constant).
2114   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2115     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2116     // Emit calculation of the iterations count.
2117     EmitIgnoredExpr(S.getCalcLastIteration());
2118   }
2119 
2120   auto &RT = CGM.getOpenMPRuntime();
2121 
2122   bool HasLastprivateClause;
2123   // Check pre-condition.
2124   {
2125     OMPLoopScope PreInitScope(*this, S);
2126     // Skip the entire loop if we don't meet the precondition.
2127     // If the condition constant folds and can be elided, avoid emitting the
2128     // whole loop.
2129     bool CondConstant;
2130     llvm::BasicBlock *ContBlock = nullptr;
2131     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2132       if (!CondConstant)
2133         return false;
2134     } else {
2135       auto *ThenBlock = createBasicBlock("omp.precond.then");
2136       ContBlock = createBasicBlock("omp.precond.end");
2137       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2138                   getProfileCount(&S));
2139       EmitBlock(ThenBlock);
2140       incrementProfileCounter(&S);
2141     }
2142 
2143     bool Ordered = false;
2144     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2145       if (OrderedClause->getNumForLoops())
2146         RT.emitDoacrossInit(*this, S);
2147       else
2148         Ordered = true;
2149     }
2150 
2151     llvm::DenseSet<const Expr *> EmittedFinals;
2152     emitAlignedClause(*this, S);
2153     bool HasLinears = EmitOMPLinearClauseInit(S);
2154     // Emit helper vars inits.
2155 
2156     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2157     LValue LB = Bounds.first;
2158     LValue UB = Bounds.second;
2159     LValue ST =
2160         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2161     LValue IL =
2162         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2163 
2164     // Emit 'then' code.
2165     {
2166       OMPPrivateScope LoopScope(*this);
2167       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2168         // Emit implicit barrier to synchronize threads and avoid data races on
2169         // initialization of firstprivate variables and post-update of
2170         // lastprivate variables.
2171         CGM.getOpenMPRuntime().emitBarrierCall(
2172             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2173             /*ForceSimpleCall=*/true);
2174       }
2175       EmitOMPPrivateClause(S, LoopScope);
2176       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2177       EmitOMPReductionClauseInit(S, LoopScope);
2178       EmitOMPPrivateLoopCounters(S, LoopScope);
2179       EmitOMPLinearClause(S, LoopScope);
2180       (void)LoopScope.Privatize();
2181 
2182       // Detect the loop schedule kind and chunk.
2183       llvm::Value *Chunk = nullptr;
2184       OpenMPScheduleTy ScheduleKind;
2185       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2186         ScheduleKind.Schedule = C->getScheduleKind();
2187         ScheduleKind.M1 = C->getFirstScheduleModifier();
2188         ScheduleKind.M2 = C->getSecondScheduleModifier();
2189         if (const auto *Ch = C->getChunkSize()) {
2190           Chunk = EmitScalarExpr(Ch);
2191           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2192                                        S.getIterationVariable()->getType(),
2193                                        S.getLocStart());
2194         }
2195       }
2196       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2197       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2198       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2199       // If the static schedule kind is specified or if the ordered clause is
2200       // specified, and if no monotonic modifier is specified, the effect will
2201       // be as if the monotonic modifier was specified.
2202       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2203                                 /* Chunked */ Chunk != nullptr) &&
2204           !Ordered) {
2205         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2206           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2207         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2208         // When no chunk_size is specified, the iteration space is divided into
2209         // chunks that are approximately equal in size, and at most one chunk is
2210         // distributed to each thread. Note that the size of the chunks is
2211         // unspecified in this case.
2212         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
2213                              IVSize, IVSigned, Ordered,
2214                              IL.getAddress(), LB.getAddress(),
2215                              UB.getAddress(), ST.getAddress());
2216         auto LoopExit =
2217             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2218         // UB = min(UB, GlobalUB);
2219         EmitIgnoredExpr(S.getEnsureUpperBound());
2220         // IV = LB;
2221         EmitIgnoredExpr(S.getInit());
2222         // while (idx <= UB) { BODY; ++idx; }
2223         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2224                          S.getInc(),
2225                          [&S, LoopExit](CodeGenFunction &CGF) {
2226                            CGF.EmitOMPLoopBody(S, LoopExit);
2227                            CGF.EmitStopPoint(&S);
2228                          },
2229                          [](CodeGenFunction &) {});
2230         EmitBlock(LoopExit.getBlock());
2231         // Tell the runtime we are done.
2232         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2233           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2234         };
2235         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2236       } else {
2237         const bool IsMonotonic =
2238             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2239             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2240             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2241             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2242         // Emit the outer loop, which requests its work chunk [LB..UB] from
2243         // runtime and runs the inner loop to process it.
2244         const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2245                                              ST.getAddress(), IL.getAddress(),
2246                                              Chunk, EUB);
2247         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2248                             LoopArguments, CGDispatchBounds);
2249       }
2250       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2251         EmitOMPSimdFinal(S,
2252                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2253                            return CGF.Builder.CreateIsNotNull(
2254                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2255                          });
2256       }
2257       EmitOMPReductionClauseFinal(
2258           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2259                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2260                  : /*Parallel only*/ OMPD_parallel);
2261       // Emit post-update of the reduction variables if IsLastIter != 0.
2262       emitPostUpdateForReductionClause(
2263           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2264             return CGF.Builder.CreateIsNotNull(
2265                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2266           });
2267       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2268       if (HasLastprivateClause)
2269         EmitOMPLastprivateClauseFinal(
2270             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2271             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2272     }
2273     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2274       return CGF.Builder.CreateIsNotNull(
2275           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2276     });
2277     // We're now done with the loop, so jump to the continuation block.
2278     if (ContBlock) {
2279       EmitBranch(ContBlock);
2280       EmitBlock(ContBlock, true);
2281     }
2282   }
2283   return HasLastprivateClause;
2284 }
2285 
2286 /// The following two functions generate expressions for the loop lower
2287 /// and upper bounds in case of static and dynamic (dispatch) schedule
2288 /// of the associated 'for' or 'distribute' loop.
2289 static std::pair<LValue, LValue>
2290 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2291   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2292   LValue LB =
2293       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2294   LValue UB =
2295       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2296   return {LB, UB};
2297 }
2298 
2299 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2300 /// consider the lower and upper bound expressions generated by the
2301 /// worksharing loop support, but we use 0 and the iteration space size as
2302 /// constants
2303 static std::pair<llvm::Value *, llvm::Value *>
2304 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2305                           Address LB, Address UB) {
2306   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2307   const Expr *IVExpr = LS.getIterationVariable();
2308   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2309   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2310   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2311   return {LBVal, UBVal};
2312 }
2313 
2314 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2315   bool HasLastprivates = false;
2316   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2317                                           PrePostActionTy &) {
2318     OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2319     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2320                                                  emitForLoopBounds,
2321                                                  emitDispatchForLoopBounds);
2322   };
2323   {
2324     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2325     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2326                                                 S.hasCancel());
2327   }
2328 
2329   // Emit an implicit barrier at the end.
2330   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2331     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2332   }
2333 }
2334 
2335 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2336   bool HasLastprivates = false;
2337   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2338                                           PrePostActionTy &) {
2339     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2340                                                  emitForLoopBounds,
2341                                                  emitDispatchForLoopBounds);
2342   };
2343   {
2344     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2345     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2346   }
2347 
2348   // Emit an implicit barrier at the end.
2349   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2350     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2351   }
2352 }
2353 
2354 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2355                                 const Twine &Name,
2356                                 llvm::Value *Init = nullptr) {
2357   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2358   if (Init)
2359     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2360   return LVal;
2361 }
2362 
2363 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2364   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2365   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2366   bool HasLastprivates = false;
2367   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2368                                                     PrePostActionTy &) {
2369     auto &C = CGF.CGM.getContext();
2370     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2371     // Emit helper vars inits.
2372     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2373                                   CGF.Builder.getInt32(0));
2374     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2375                                       : CGF.Builder.getInt32(0);
2376     LValue UB =
2377         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2378     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2379                                   CGF.Builder.getInt32(1));
2380     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2381                                   CGF.Builder.getInt32(0));
2382     // Loop counter.
2383     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2384     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2385     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2386     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2387     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2388     // Generate condition for loop.
2389     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2390                         OK_Ordinary, S.getLocStart(), FPOptions());
2391     // Increment for loop counter.
2392     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2393                       S.getLocStart());
2394     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2395       // Iterate through all sections and emit a switch construct:
2396       // switch (IV) {
2397       //   case 0:
2398       //     <SectionStmt[0]>;
2399       //     break;
2400       // ...
2401       //   case <NumSection> - 1:
2402       //     <SectionStmt[<NumSection> - 1]>;
2403       //     break;
2404       // }
2405       // .omp.sections.exit:
2406       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2407       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2408           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2409           CS == nullptr ? 1 : CS->size());
2410       if (CS) {
2411         unsigned CaseNumber = 0;
2412         for (auto *SubStmt : CS->children()) {
2413           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2414           CGF.EmitBlock(CaseBB);
2415           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2416           CGF.EmitStmt(SubStmt);
2417           CGF.EmitBranch(ExitBB);
2418           ++CaseNumber;
2419         }
2420       } else {
2421         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2422         CGF.EmitBlock(CaseBB);
2423         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2424         CGF.EmitStmt(Stmt);
2425         CGF.EmitBranch(ExitBB);
2426       }
2427       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2428     };
2429 
2430     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2431     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2432       // Emit implicit barrier to synchronize threads and avoid data races on
2433       // initialization of firstprivate variables and post-update of lastprivate
2434       // variables.
2435       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2436           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2437           /*ForceSimpleCall=*/true);
2438     }
2439     CGF.EmitOMPPrivateClause(S, LoopScope);
2440     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2441     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2442     (void)LoopScope.Privatize();
2443 
2444     // Emit static non-chunked loop.
2445     OpenMPScheduleTy ScheduleKind;
2446     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2447     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2448         CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
2449         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
2450         UB.getAddress(), ST.getAddress());
2451     // UB = min(UB, GlobalUB);
2452     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2453     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2454         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2455     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2456     // IV = LB;
2457     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2458     // while (idx <= UB) { BODY; ++idx; }
2459     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2460                          [](CodeGenFunction &) {});
2461     // Tell the runtime we are done.
2462     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2463       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2464     };
2465     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2466     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2467     // Emit post-update of the reduction variables if IsLastIter != 0.
2468     emitPostUpdateForReductionClause(
2469         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2470           return CGF.Builder.CreateIsNotNull(
2471               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2472         });
2473 
2474     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2475     if (HasLastprivates)
2476       CGF.EmitOMPLastprivateClauseFinal(
2477           S, /*NoFinals=*/false,
2478           CGF.Builder.CreateIsNotNull(
2479               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2480   };
2481 
2482   bool HasCancel = false;
2483   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2484     HasCancel = OSD->hasCancel();
2485   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2486     HasCancel = OPSD->hasCancel();
2487   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2488   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2489                                               HasCancel);
2490   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2491   // clause. Otherwise the barrier will be generated by the codegen for the
2492   // directive.
2493   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2494     // Emit implicit barrier to synchronize threads and avoid data races on
2495     // initialization of firstprivate variables.
2496     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2497                                            OMPD_unknown);
2498   }
2499 }
2500 
2501 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2502   {
2503     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2504     EmitSections(S);
2505   }
2506   // Emit an implicit barrier at the end.
2507   if (!S.getSingleClause<OMPNowaitClause>()) {
2508     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2509                                            OMPD_sections);
2510   }
2511 }
2512 
2513 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2514   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2515     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2516   };
2517   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2518   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2519                                               S.hasCancel());
2520 }
2521 
2522 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2523   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2524   llvm::SmallVector<const Expr *, 8> DestExprs;
2525   llvm::SmallVector<const Expr *, 8> SrcExprs;
2526   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2527   // Check if there are any 'copyprivate' clauses associated with this
2528   // 'single' construct.
2529   // Build a list of copyprivate variables along with helper expressions
2530   // (<source>, <destination>, <destination>=<source> expressions)
2531   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2532     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2533     DestExprs.append(C->destination_exprs().begin(),
2534                      C->destination_exprs().end());
2535     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2536     AssignmentOps.append(C->assignment_ops().begin(),
2537                          C->assignment_ops().end());
2538   }
2539   // Emit code for 'single' region along with 'copyprivate' clauses
2540   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2541     Action.Enter(CGF);
2542     OMPPrivateScope SingleScope(CGF);
2543     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2544     CGF.EmitOMPPrivateClause(S, SingleScope);
2545     (void)SingleScope.Privatize();
2546     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2547   };
2548   {
2549     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2550     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2551                                             CopyprivateVars, DestExprs,
2552                                             SrcExprs, AssignmentOps);
2553   }
2554   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2555   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2556   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2557     CGM.getOpenMPRuntime().emitBarrierCall(
2558         *this, S.getLocStart(),
2559         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2560   }
2561 }
2562 
2563 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2564   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2565     Action.Enter(CGF);
2566     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2567   };
2568   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2569   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2570 }
2571 
2572 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2573   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2574     Action.Enter(CGF);
2575     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2576   };
2577   Expr *Hint = nullptr;
2578   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2579     Hint = HintClause->getHint();
2580   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2581   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2582                                             S.getDirectiveName().getAsString(),
2583                                             CodeGen, S.getLocStart(), Hint);
2584 }
2585 
2586 void CodeGenFunction::EmitOMPParallelForDirective(
2587     const OMPParallelForDirective &S) {
2588   // Emit directive as a combined directive that consists of two implicit
2589   // directives: 'parallel' with 'for' directive.
2590   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2591     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2592     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2593                                emitDispatchForLoopBounds);
2594   };
2595   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2596                                  emitEmptyBoundParameters);
2597 }
2598 
2599 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2600     const OMPParallelForSimdDirective &S) {
2601   // Emit directive as a combined directive that consists of two implicit
2602   // directives: 'parallel' with 'for' directive.
2603   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2604     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2605                                emitDispatchForLoopBounds);
2606   };
2607   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2608                                  emitEmptyBoundParameters);
2609 }
2610 
2611 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2612     const OMPParallelSectionsDirective &S) {
2613   // Emit directive as a combined directive that consists of two implicit
2614   // directives: 'parallel' with 'sections' directive.
2615   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2616     CGF.EmitSections(S);
2617   };
2618   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2619                                  emitEmptyBoundParameters);
2620 }
2621 
2622 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2623                                                 const RegionCodeGenTy &BodyGen,
2624                                                 const TaskGenTy &TaskGen,
2625                                                 OMPTaskDataTy &Data) {
2626   // Emit outlined function for task construct.
2627   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2628   auto *I = CS->getCapturedDecl()->param_begin();
2629   auto *PartId = std::next(I);
2630   auto *TaskT = std::next(I, 4);
2631   // Check if the task is final
2632   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2633     // If the condition constant folds and can be elided, try to avoid emitting
2634     // the condition and the dead arm of the if/else.
2635     auto *Cond = Clause->getCondition();
2636     bool CondConstant;
2637     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2638       Data.Final.setInt(CondConstant);
2639     else
2640       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2641   } else {
2642     // By default the task is not final.
2643     Data.Final.setInt(/*IntVal=*/false);
2644   }
2645   // Check if the task has 'priority' clause.
2646   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2647     auto *Prio = Clause->getPriority();
2648     Data.Priority.setInt(/*IntVal=*/true);
2649     Data.Priority.setPointer(EmitScalarConversion(
2650         EmitScalarExpr(Prio), Prio->getType(),
2651         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2652         Prio->getExprLoc()));
2653   }
2654   // The first function argument for tasks is a thread id, the second one is a
2655   // part id (0 for tied tasks, >=0 for untied task).
2656   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2657   // Get list of private variables.
2658   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2659     auto IRef = C->varlist_begin();
2660     for (auto *IInit : C->private_copies()) {
2661       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2662       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2663         Data.PrivateVars.push_back(*IRef);
2664         Data.PrivateCopies.push_back(IInit);
2665       }
2666       ++IRef;
2667     }
2668   }
2669   EmittedAsPrivate.clear();
2670   // Get list of firstprivate variables.
2671   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2672     auto IRef = C->varlist_begin();
2673     auto IElemInitRef = C->inits().begin();
2674     for (auto *IInit : C->private_copies()) {
2675       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2676       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2677         Data.FirstprivateVars.push_back(*IRef);
2678         Data.FirstprivateCopies.push_back(IInit);
2679         Data.FirstprivateInits.push_back(*IElemInitRef);
2680       }
2681       ++IRef;
2682       ++IElemInitRef;
2683     }
2684   }
2685   // Get list of lastprivate variables (for taskloops).
2686   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2687   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2688     auto IRef = C->varlist_begin();
2689     auto ID = C->destination_exprs().begin();
2690     for (auto *IInit : C->private_copies()) {
2691       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2692       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2693         Data.LastprivateVars.push_back(*IRef);
2694         Data.LastprivateCopies.push_back(IInit);
2695       }
2696       LastprivateDstsOrigs.insert(
2697           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2698            cast<DeclRefExpr>(*IRef)});
2699       ++IRef;
2700       ++ID;
2701     }
2702   }
2703   SmallVector<const Expr *, 4> LHSs;
2704   SmallVector<const Expr *, 4> RHSs;
2705   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
2706     auto IPriv = C->privates().begin();
2707     auto IRed = C->reduction_ops().begin();
2708     auto ILHS = C->lhs_exprs().begin();
2709     auto IRHS = C->rhs_exprs().begin();
2710     for (const auto *Ref : C->varlists()) {
2711       Data.ReductionVars.emplace_back(Ref);
2712       Data.ReductionCopies.emplace_back(*IPriv);
2713       Data.ReductionOps.emplace_back(*IRed);
2714       LHSs.emplace_back(*ILHS);
2715       RHSs.emplace_back(*IRHS);
2716       std::advance(IPriv, 1);
2717       std::advance(IRed, 1);
2718       std::advance(ILHS, 1);
2719       std::advance(IRHS, 1);
2720     }
2721   }
2722   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
2723       *this, S.getLocStart(), LHSs, RHSs, Data);
2724   // Build list of dependences.
2725   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2726     for (auto *IRef : C->varlists())
2727       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2728   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
2729       CodeGenFunction &CGF, PrePostActionTy &Action) {
2730     // Set proper addresses for generated private copies.
2731     OMPPrivateScope Scope(CGF);
2732     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2733         !Data.LastprivateVars.empty()) {
2734       auto *CopyFn = CGF.Builder.CreateLoad(
2735           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2736       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2737           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2738       // Map privates.
2739       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2740       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2741       CallArgs.push_back(PrivatesPtr);
2742       for (auto *E : Data.PrivateVars) {
2743         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2744         Address PrivatePtr = CGF.CreateMemTemp(
2745             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2746         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2747         CallArgs.push_back(PrivatePtr.getPointer());
2748       }
2749       for (auto *E : Data.FirstprivateVars) {
2750         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2751         Address PrivatePtr =
2752             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2753                               ".firstpriv.ptr.addr");
2754         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2755         CallArgs.push_back(PrivatePtr.getPointer());
2756       }
2757       for (auto *E : Data.LastprivateVars) {
2758         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2759         Address PrivatePtr =
2760             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2761                               ".lastpriv.ptr.addr");
2762         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2763         CallArgs.push_back(PrivatePtr.getPointer());
2764       }
2765       CGF.EmitRuntimeCall(CopyFn, CallArgs);
2766       for (auto &&Pair : LastprivateDstsOrigs) {
2767         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2768         DeclRefExpr DRE(
2769             const_cast<VarDecl *>(OrigVD),
2770             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2771                 OrigVD) != nullptr,
2772             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2773         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2774           return CGF.EmitLValue(&DRE).getAddress();
2775         });
2776       }
2777       for (auto &&Pair : PrivatePtrs) {
2778         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2779                             CGF.getContext().getDeclAlign(Pair.first));
2780         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2781       }
2782     }
2783     if (Data.Reductions) {
2784       OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
2785       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
2786                              Data.ReductionOps);
2787       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
2788           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
2789       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
2790         RedCG.emitSharedLValue(CGF, Cnt);
2791         RedCG.emitAggregateType(CGF, Cnt);
2792         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
2793             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
2794         Replacement =
2795             Address(CGF.EmitScalarConversion(
2796                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
2797                         CGF.getContext().getPointerType(
2798                             Data.ReductionCopies[Cnt]->getType()),
2799                         SourceLocation()),
2800                     Replacement.getAlignment());
2801         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
2802         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
2803                          [Replacement]() { return Replacement; });
2804         // FIXME: This must removed once the runtime library is fixed.
2805         // Emit required threadprivate variables for
2806         // initilizer/combiner/finalizer.
2807         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
2808                                                            RedCG, Cnt);
2809       }
2810     }
2811     (void)Scope.Privatize();
2812 
2813     Action.Enter(CGF);
2814     BodyGen(CGF);
2815   };
2816   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2817       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2818       Data.NumberOfParts);
2819   OMPLexicalScope Scope(*this, S);
2820   TaskGen(*this, OutlinedFn, Data);
2821 }
2822 
2823 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2824   // Emit outlined function for task construct.
2825   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2826   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2827   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2828   const Expr *IfCond = nullptr;
2829   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2830     if (C->getNameModifier() == OMPD_unknown ||
2831         C->getNameModifier() == OMPD_task) {
2832       IfCond = C->getCondition();
2833       break;
2834     }
2835   }
2836 
2837   OMPTaskDataTy Data;
2838   // Check if we should emit tied or untied task.
2839   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2840   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2841     CGF.EmitStmt(CS->getCapturedStmt());
2842   };
2843   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2844                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2845                             const OMPTaskDataTy &Data) {
2846     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2847                                             SharedsTy, CapturedStruct, IfCond,
2848                                             Data);
2849   };
2850   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2851 }
2852 
2853 void CodeGenFunction::EmitOMPTaskyieldDirective(
2854     const OMPTaskyieldDirective &S) {
2855   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2856 }
2857 
2858 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2859   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2860 }
2861 
2862 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2863   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2864 }
2865 
2866 void CodeGenFunction::EmitOMPTaskgroupDirective(
2867     const OMPTaskgroupDirective &S) {
2868   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2869     Action.Enter(CGF);
2870     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2871   };
2872   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2873   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2874 }
2875 
2876 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2877   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2878     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2879       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2880                                 FlushClause->varlist_end());
2881     }
2882     return llvm::None;
2883   }(), S.getLocStart());
2884 }
2885 
2886 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
2887                                             const CodeGenLoopTy &CodeGenLoop,
2888                                             Expr *IncExpr) {
2889   // Emit the loop iteration variable.
2890   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2891   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2892   EmitVarDecl(*IVDecl);
2893 
2894   // Emit the iterations count variable.
2895   // If it is not a variable, Sema decided to calculate iterations count on each
2896   // iteration (e.g., it is foldable into a constant).
2897   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2898     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2899     // Emit calculation of the iterations count.
2900     EmitIgnoredExpr(S.getCalcLastIteration());
2901   }
2902 
2903   auto &RT = CGM.getOpenMPRuntime();
2904 
2905   bool HasLastprivateClause = false;
2906   // Check pre-condition.
2907   {
2908     OMPLoopScope PreInitScope(*this, S);
2909     // Skip the entire loop if we don't meet the precondition.
2910     // If the condition constant folds and can be elided, avoid emitting the
2911     // whole loop.
2912     bool CondConstant;
2913     llvm::BasicBlock *ContBlock = nullptr;
2914     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2915       if (!CondConstant)
2916         return;
2917     } else {
2918       auto *ThenBlock = createBasicBlock("omp.precond.then");
2919       ContBlock = createBasicBlock("omp.precond.end");
2920       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2921                   getProfileCount(&S));
2922       EmitBlock(ThenBlock);
2923       incrementProfileCounter(&S);
2924     }
2925 
2926     // Emit 'then' code.
2927     {
2928       // Emit helper vars inits.
2929 
2930       LValue LB = EmitOMPHelperVar(
2931           *this, cast<DeclRefExpr>(
2932                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2933                           ? S.getCombinedLowerBoundVariable()
2934                           : S.getLowerBoundVariable())));
2935       LValue UB = EmitOMPHelperVar(
2936           *this, cast<DeclRefExpr>(
2937                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2938                           ? S.getCombinedUpperBoundVariable()
2939                           : S.getUpperBoundVariable())));
2940       LValue ST =
2941           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2942       LValue IL =
2943           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2944 
2945       OMPPrivateScope LoopScope(*this);
2946       if (EmitOMPFirstprivateClause(S, LoopScope)) {
2947         // Emit implicit barrier to synchronize threads and avoid data races on
2948         // initialization of firstprivate variables and post-update of
2949         // lastprivate variables.
2950         CGM.getOpenMPRuntime().emitBarrierCall(
2951           *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2952           /*ForceSimpleCall=*/true);
2953       }
2954       EmitOMPPrivateClause(S, LoopScope);
2955       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2956       EmitOMPPrivateLoopCounters(S, LoopScope);
2957       (void)LoopScope.Privatize();
2958 
2959       // Detect the distribute schedule kind and chunk.
2960       llvm::Value *Chunk = nullptr;
2961       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
2962       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
2963         ScheduleKind = C->getDistScheduleKind();
2964         if (const auto *Ch = C->getChunkSize()) {
2965           Chunk = EmitScalarExpr(Ch);
2966           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2967           S.getIterationVariable()->getType(),
2968           S.getLocStart());
2969         }
2970       }
2971       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2972       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2973 
2974       // OpenMP [2.10.8, distribute Construct, Description]
2975       // If dist_schedule is specified, kind must be static. If specified,
2976       // iterations are divided into chunks of size chunk_size, chunks are
2977       // assigned to the teams of the league in a round-robin fashion in the
2978       // order of the team number. When no chunk_size is specified, the
2979       // iteration space is divided into chunks that are approximately equal
2980       // in size, and at most one chunk is distributed to each team of the
2981       // league. The size of the chunks is unspecified in this case.
2982       if (RT.isStaticNonchunked(ScheduleKind,
2983                                 /* Chunked */ Chunk != nullptr)) {
2984         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
2985                              IVSize, IVSigned, /* Ordered = */ false,
2986                              IL.getAddress(), LB.getAddress(),
2987                              UB.getAddress(), ST.getAddress());
2988         auto LoopExit =
2989             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2990         // UB = min(UB, GlobalUB);
2991         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2992                             ? S.getCombinedEnsureUpperBound()
2993                             : S.getEnsureUpperBound());
2994         // IV = LB;
2995         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2996                             ? S.getCombinedInit()
2997                             : S.getInit());
2998 
2999         Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3000                          ? S.getCombinedCond()
3001                          : S.getCond();
3002 
3003         // for distribute alone,  codegen
3004         // while (idx <= UB) { BODY; ++idx; }
3005         // when combined with 'for' (e.g. as in 'distribute parallel for')
3006         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3007         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3008                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3009                            CodeGenLoop(CGF, S, LoopExit);
3010                          },
3011                          [](CodeGenFunction &) {});
3012         EmitBlock(LoopExit.getBlock());
3013         // Tell the runtime we are done.
3014         RT.emitForStaticFinish(*this, S.getLocStart());
3015       } else {
3016         // Emit the outer loop, which requests its work chunk [LB..UB] from
3017         // runtime and runs the inner loop to process it.
3018         const OMPLoopArguments LoopArguments = {
3019             LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3020             Chunk};
3021         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3022                                    CodeGenLoop);
3023       }
3024 
3025       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3026       if (HasLastprivateClause)
3027         EmitOMPLastprivateClauseFinal(
3028             S, /*NoFinals=*/false,
3029             Builder.CreateIsNotNull(
3030                 EmitLoadOfScalar(IL, S.getLocStart())));
3031     }
3032 
3033     // We're now done with the loop, so jump to the continuation block.
3034     if (ContBlock) {
3035       EmitBranch(ContBlock);
3036       EmitBlock(ContBlock, true);
3037     }
3038   }
3039 }
3040 
3041 void CodeGenFunction::EmitOMPDistributeDirective(
3042     const OMPDistributeDirective &S) {
3043   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3044 
3045     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3046   };
3047   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3048   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3049                                               false);
3050 }
3051 
3052 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3053                                                    const CapturedStmt *S) {
3054   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3055   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3056   CGF.CapturedStmtInfo = &CapStmtInfo;
3057   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3058   Fn->addFnAttr(llvm::Attribute::NoInline);
3059   return Fn;
3060 }
3061 
3062 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3063   if (!S.getAssociatedStmt()) {
3064     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3065       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3066     return;
3067   }
3068   auto *C = S.getSingleClause<OMPSIMDClause>();
3069   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3070                                  PrePostActionTy &Action) {
3071     if (C) {
3072       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3073       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3074       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3075       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3076       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
3077     } else {
3078       Action.Enter(CGF);
3079       CGF.EmitStmt(
3080           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3081     }
3082   };
3083   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3084   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3085 }
3086 
3087 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3088                                          QualType SrcType, QualType DestType,
3089                                          SourceLocation Loc) {
3090   assert(CGF.hasScalarEvaluationKind(DestType) &&
3091          "DestType must have scalar evaluation kind.");
3092   assert(!Val.isAggregate() && "Must be a scalar or complex.");
3093   return Val.isScalar()
3094              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3095                                         Loc)
3096              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3097                                                  DestType, Loc);
3098 }
3099 
3100 static CodeGenFunction::ComplexPairTy
3101 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3102                       QualType DestType, SourceLocation Loc) {
3103   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3104          "DestType must have complex evaluation kind.");
3105   CodeGenFunction::ComplexPairTy ComplexVal;
3106   if (Val.isScalar()) {
3107     // Convert the input element to the element type of the complex.
3108     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3109     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3110                                               DestElementType, Loc);
3111     ComplexVal = CodeGenFunction::ComplexPairTy(
3112         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3113   } else {
3114     assert(Val.isComplex() && "Must be a scalar or complex.");
3115     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3116     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3117     ComplexVal.first = CGF.EmitScalarConversion(
3118         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3119     ComplexVal.second = CGF.EmitScalarConversion(
3120         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3121   }
3122   return ComplexVal;
3123 }
3124 
3125 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3126                                   LValue LVal, RValue RVal) {
3127   if (LVal.isGlobalReg()) {
3128     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3129   } else {
3130     CGF.EmitAtomicStore(RVal, LVal,
3131                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3132                                  : llvm::AtomicOrdering::Monotonic,
3133                         LVal.isVolatile(), /*IsInit=*/false);
3134   }
3135 }
3136 
3137 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3138                                          QualType RValTy, SourceLocation Loc) {
3139   switch (getEvaluationKind(LVal.getType())) {
3140   case TEK_Scalar:
3141     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3142                                *this, RVal, RValTy, LVal.getType(), Loc)),
3143                            LVal);
3144     break;
3145   case TEK_Complex:
3146     EmitStoreOfComplex(
3147         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3148         /*isInit=*/false);
3149     break;
3150   case TEK_Aggregate:
3151     llvm_unreachable("Must be a scalar or complex.");
3152   }
3153 }
3154 
3155 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3156                                   const Expr *X, const Expr *V,
3157                                   SourceLocation Loc) {
3158   // v = x;
3159   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3160   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3161   LValue XLValue = CGF.EmitLValue(X);
3162   LValue VLValue = CGF.EmitLValue(V);
3163   RValue Res = XLValue.isGlobalReg()
3164                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
3165                    : CGF.EmitAtomicLoad(
3166                          XLValue, Loc,
3167                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3168                                   : llvm::AtomicOrdering::Monotonic,
3169                          XLValue.isVolatile());
3170   // OpenMP, 2.12.6, atomic Construct
3171   // Any atomic construct with a seq_cst clause forces the atomically
3172   // performed operation to include an implicit flush operation without a
3173   // list.
3174   if (IsSeqCst)
3175     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3176   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3177 }
3178 
3179 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3180                                    const Expr *X, const Expr *E,
3181                                    SourceLocation Loc) {
3182   // x = expr;
3183   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3184   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3185   // OpenMP, 2.12.6, atomic Construct
3186   // Any atomic construct with a seq_cst clause forces the atomically
3187   // performed operation to include an implicit flush operation without a
3188   // list.
3189   if (IsSeqCst)
3190     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3191 }
3192 
3193 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3194                                                 RValue Update,
3195                                                 BinaryOperatorKind BO,
3196                                                 llvm::AtomicOrdering AO,
3197                                                 bool IsXLHSInRHSPart) {
3198   auto &Context = CGF.CGM.getContext();
3199   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3200   // expression is simple and atomic is allowed for the given type for the
3201   // target platform.
3202   if (BO == BO_Comma || !Update.isScalar() ||
3203       !Update.getScalarVal()->getType()->isIntegerTy() ||
3204       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3205                         (Update.getScalarVal()->getType() !=
3206                          X.getAddress().getElementType())) ||
3207       !X.getAddress().getElementType()->isIntegerTy() ||
3208       !Context.getTargetInfo().hasBuiltinAtomic(
3209           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3210     return std::make_pair(false, RValue::get(nullptr));
3211 
3212   llvm::AtomicRMWInst::BinOp RMWOp;
3213   switch (BO) {
3214   case BO_Add:
3215     RMWOp = llvm::AtomicRMWInst::Add;
3216     break;
3217   case BO_Sub:
3218     if (!IsXLHSInRHSPart)
3219       return std::make_pair(false, RValue::get(nullptr));
3220     RMWOp = llvm::AtomicRMWInst::Sub;
3221     break;
3222   case BO_And:
3223     RMWOp = llvm::AtomicRMWInst::And;
3224     break;
3225   case BO_Or:
3226     RMWOp = llvm::AtomicRMWInst::Or;
3227     break;
3228   case BO_Xor:
3229     RMWOp = llvm::AtomicRMWInst::Xor;
3230     break;
3231   case BO_LT:
3232     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3233                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3234                                    : llvm::AtomicRMWInst::Max)
3235                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3236                                    : llvm::AtomicRMWInst::UMax);
3237     break;
3238   case BO_GT:
3239     RMWOp = X.getType()->hasSignedIntegerRepresentation()
3240                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3241                                    : llvm::AtomicRMWInst::Min)
3242                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3243                                    : llvm::AtomicRMWInst::UMin);
3244     break;
3245   case BO_Assign:
3246     RMWOp = llvm::AtomicRMWInst::Xchg;
3247     break;
3248   case BO_Mul:
3249   case BO_Div:
3250   case BO_Rem:
3251   case BO_Shl:
3252   case BO_Shr:
3253   case BO_LAnd:
3254   case BO_LOr:
3255     return std::make_pair(false, RValue::get(nullptr));
3256   case BO_PtrMemD:
3257   case BO_PtrMemI:
3258   case BO_LE:
3259   case BO_GE:
3260   case BO_EQ:
3261   case BO_NE:
3262   case BO_AddAssign:
3263   case BO_SubAssign:
3264   case BO_AndAssign:
3265   case BO_OrAssign:
3266   case BO_XorAssign:
3267   case BO_MulAssign:
3268   case BO_DivAssign:
3269   case BO_RemAssign:
3270   case BO_ShlAssign:
3271   case BO_ShrAssign:
3272   case BO_Comma:
3273     llvm_unreachable("Unsupported atomic update operation");
3274   }
3275   auto *UpdateVal = Update.getScalarVal();
3276   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3277     UpdateVal = CGF.Builder.CreateIntCast(
3278         IC, X.getAddress().getElementType(),
3279         X.getType()->hasSignedIntegerRepresentation());
3280   }
3281   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3282   return std::make_pair(true, RValue::get(Res));
3283 }
3284 
3285 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3286     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3287     llvm::AtomicOrdering AO, SourceLocation Loc,
3288     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3289   // Update expressions are allowed to have the following forms:
3290   // x binop= expr; -> xrval + expr;
3291   // x++, ++x -> xrval + 1;
3292   // x--, --x -> xrval - 1;
3293   // x = x binop expr; -> xrval binop expr
3294   // x = expr Op x; - > expr binop xrval;
3295   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3296   if (!Res.first) {
3297     if (X.isGlobalReg()) {
3298       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3299       // 'xrval'.
3300       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3301     } else {
3302       // Perform compare-and-swap procedure.
3303       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3304     }
3305   }
3306   return Res;
3307 }
3308 
3309 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3310                                     const Expr *X, const Expr *E,
3311                                     const Expr *UE, bool IsXLHSInRHSPart,
3312                                     SourceLocation Loc) {
3313   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3314          "Update expr in 'atomic update' must be a binary operator.");
3315   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3316   // Update expressions are allowed to have the following forms:
3317   // x binop= expr; -> xrval + expr;
3318   // x++, ++x -> xrval + 1;
3319   // x--, --x -> xrval - 1;
3320   // x = x binop expr; -> xrval binop expr
3321   // x = expr Op x; - > expr binop xrval;
3322   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3323   LValue XLValue = CGF.EmitLValue(X);
3324   RValue ExprRValue = CGF.EmitAnyExpr(E);
3325   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3326                      : llvm::AtomicOrdering::Monotonic;
3327   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3328   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3329   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3330   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3331   auto Gen =
3332       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3333         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3334         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3335         return CGF.EmitAnyExpr(UE);
3336       };
3337   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3338       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3339   // OpenMP, 2.12.6, atomic Construct
3340   // Any atomic construct with a seq_cst clause forces the atomically
3341   // performed operation to include an implicit flush operation without a
3342   // list.
3343   if (IsSeqCst)
3344     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3345 }
3346 
3347 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3348                             QualType SourceType, QualType ResType,
3349                             SourceLocation Loc) {
3350   switch (CGF.getEvaluationKind(ResType)) {
3351   case TEK_Scalar:
3352     return RValue::get(
3353         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3354   case TEK_Complex: {
3355     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3356     return RValue::getComplex(Res.first, Res.second);
3357   }
3358   case TEK_Aggregate:
3359     break;
3360   }
3361   llvm_unreachable("Must be a scalar or complex.");
3362 }
3363 
3364 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3365                                      bool IsPostfixUpdate, const Expr *V,
3366                                      const Expr *X, const Expr *E,
3367                                      const Expr *UE, bool IsXLHSInRHSPart,
3368                                      SourceLocation Loc) {
3369   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3370   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3371   RValue NewVVal;
3372   LValue VLValue = CGF.EmitLValue(V);
3373   LValue XLValue = CGF.EmitLValue(X);
3374   RValue ExprRValue = CGF.EmitAnyExpr(E);
3375   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3376                      : llvm::AtomicOrdering::Monotonic;
3377   QualType NewVValType;
3378   if (UE) {
3379     // 'x' is updated with some additional value.
3380     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3381            "Update expr in 'atomic capture' must be a binary operator.");
3382     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3383     // Update expressions are allowed to have the following forms:
3384     // x binop= expr; -> xrval + expr;
3385     // x++, ++x -> xrval + 1;
3386     // x--, --x -> xrval - 1;
3387     // x = x binop expr; -> xrval binop expr
3388     // x = expr Op x; - > expr binop xrval;
3389     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3390     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3391     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3392     NewVValType = XRValExpr->getType();
3393     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3394     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3395                   IsPostfixUpdate](RValue XRValue) -> RValue {
3396       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3397       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3398       RValue Res = CGF.EmitAnyExpr(UE);
3399       NewVVal = IsPostfixUpdate ? XRValue : Res;
3400       return Res;
3401     };
3402     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3403         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3404     if (Res.first) {
3405       // 'atomicrmw' instruction was generated.
3406       if (IsPostfixUpdate) {
3407         // Use old value from 'atomicrmw'.
3408         NewVVal = Res.second;
3409       } else {
3410         // 'atomicrmw' does not provide new value, so evaluate it using old
3411         // value of 'x'.
3412         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3413         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3414         NewVVal = CGF.EmitAnyExpr(UE);
3415       }
3416     }
3417   } else {
3418     // 'x' is simply rewritten with some 'expr'.
3419     NewVValType = X->getType().getNonReferenceType();
3420     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3421                                X->getType().getNonReferenceType(), Loc);
3422     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3423       NewVVal = XRValue;
3424       return ExprRValue;
3425     };
3426     // Try to perform atomicrmw xchg, otherwise simple exchange.
3427     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3428         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3429         Loc, Gen);
3430     if (Res.first) {
3431       // 'atomicrmw' instruction was generated.
3432       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3433     }
3434   }
3435   // Emit post-update store to 'v' of old/new 'x' value.
3436   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3437   // OpenMP, 2.12.6, atomic Construct
3438   // Any atomic construct with a seq_cst clause forces the atomically
3439   // performed operation to include an implicit flush operation without a
3440   // list.
3441   if (IsSeqCst)
3442     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3443 }
3444 
3445 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3446                               bool IsSeqCst, bool IsPostfixUpdate,
3447                               const Expr *X, const Expr *V, const Expr *E,
3448                               const Expr *UE, bool IsXLHSInRHSPart,
3449                               SourceLocation Loc) {
3450   switch (Kind) {
3451   case OMPC_read:
3452     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3453     break;
3454   case OMPC_write:
3455     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3456     break;
3457   case OMPC_unknown:
3458   case OMPC_update:
3459     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3460     break;
3461   case OMPC_capture:
3462     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3463                              IsXLHSInRHSPart, Loc);
3464     break;
3465   case OMPC_if:
3466   case OMPC_final:
3467   case OMPC_num_threads:
3468   case OMPC_private:
3469   case OMPC_firstprivate:
3470   case OMPC_lastprivate:
3471   case OMPC_reduction:
3472   case OMPC_task_reduction:
3473   case OMPC_safelen:
3474   case OMPC_simdlen:
3475   case OMPC_collapse:
3476   case OMPC_default:
3477   case OMPC_seq_cst:
3478   case OMPC_shared:
3479   case OMPC_linear:
3480   case OMPC_aligned:
3481   case OMPC_copyin:
3482   case OMPC_copyprivate:
3483   case OMPC_flush:
3484   case OMPC_proc_bind:
3485   case OMPC_schedule:
3486   case OMPC_ordered:
3487   case OMPC_nowait:
3488   case OMPC_untied:
3489   case OMPC_threadprivate:
3490   case OMPC_depend:
3491   case OMPC_mergeable:
3492   case OMPC_device:
3493   case OMPC_threads:
3494   case OMPC_simd:
3495   case OMPC_map:
3496   case OMPC_num_teams:
3497   case OMPC_thread_limit:
3498   case OMPC_priority:
3499   case OMPC_grainsize:
3500   case OMPC_nogroup:
3501   case OMPC_num_tasks:
3502   case OMPC_hint:
3503   case OMPC_dist_schedule:
3504   case OMPC_defaultmap:
3505   case OMPC_uniform:
3506   case OMPC_to:
3507   case OMPC_from:
3508   case OMPC_use_device_ptr:
3509   case OMPC_is_device_ptr:
3510     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3511   }
3512 }
3513 
3514 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3515   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3516   OpenMPClauseKind Kind = OMPC_unknown;
3517   for (auto *C : S.clauses()) {
3518     // Find first clause (skip seq_cst clause, if it is first).
3519     if (C->getClauseKind() != OMPC_seq_cst) {
3520       Kind = C->getClauseKind();
3521       break;
3522     }
3523   }
3524 
3525   const auto *CS =
3526       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3527   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3528     enterFullExpression(EWC);
3529   }
3530   // Processing for statements under 'atomic capture'.
3531   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3532     for (const auto *C : Compound->body()) {
3533       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3534         enterFullExpression(EWC);
3535       }
3536     }
3537   }
3538 
3539   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3540                                             PrePostActionTy &) {
3541     CGF.EmitStopPoint(CS);
3542     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3543                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3544                       S.isXLHSInRHSPart(), S.getLocStart());
3545   };
3546   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3547   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3548 }
3549 
3550 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3551                                          const OMPExecutableDirective &S,
3552                                          const RegionCodeGenTy &CodeGen) {
3553   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3554   CodeGenModule &CGM = CGF.CGM;
3555   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3556 
3557   llvm::Function *Fn = nullptr;
3558   llvm::Constant *FnID = nullptr;
3559 
3560   const Expr *IfCond = nullptr;
3561   // Check for the at most one if clause associated with the target region.
3562   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3563     if (C->getNameModifier() == OMPD_unknown ||
3564         C->getNameModifier() == OMPD_target) {
3565       IfCond = C->getCondition();
3566       break;
3567     }
3568   }
3569 
3570   // Check if we have any device clause associated with the directive.
3571   const Expr *Device = nullptr;
3572   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3573     Device = C->getDevice();
3574   }
3575 
3576   // Check if we have an if clause whose conditional always evaluates to false
3577   // or if we do not have any targets specified. If so the target region is not
3578   // an offload entry point.
3579   bool IsOffloadEntry = true;
3580   if (IfCond) {
3581     bool Val;
3582     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3583       IsOffloadEntry = false;
3584   }
3585   if (CGM.getLangOpts().OMPTargetTriples.empty())
3586     IsOffloadEntry = false;
3587 
3588   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3589   StringRef ParentName;
3590   // In case we have Ctors/Dtors we use the complete type variant to produce
3591   // the mangling of the device outlined kernel.
3592   if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3593     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3594   else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3595     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3596   else
3597     ParentName =
3598         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3599 
3600   // Emit target region as a standalone region.
3601   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3602                                                     IsOffloadEntry, CodeGen);
3603   OMPLexicalScope Scope(CGF, S);
3604   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3605   CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3606   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3607                                         CapturedVars);
3608 }
3609 
3610 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3611                              PrePostActionTy &Action) {
3612   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3613   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3614   CGF.EmitOMPPrivateClause(S, PrivateScope);
3615   (void)PrivateScope.Privatize();
3616 
3617   Action.Enter(CGF);
3618   CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3619 }
3620 
3621 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3622                                                   StringRef ParentName,
3623                                                   const OMPTargetDirective &S) {
3624   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3625     emitTargetRegion(CGF, S, Action);
3626   };
3627   llvm::Function *Fn;
3628   llvm::Constant *Addr;
3629   // Emit target region as a standalone region.
3630   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3631       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3632   assert(Fn && Addr && "Target device function emission failed.");
3633 }
3634 
3635 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3636   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3637     emitTargetRegion(CGF, S, Action);
3638   };
3639   emitCommonOMPTargetDirective(*this, S, CodeGen);
3640 }
3641 
3642 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3643                                         const OMPExecutableDirective &S,
3644                                         OpenMPDirectiveKind InnermostKind,
3645                                         const RegionCodeGenTy &CodeGen) {
3646   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3647   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3648       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3649 
3650   const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3651   const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3652   if (NT || TL) {
3653     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3654     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3655 
3656     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3657                                                   S.getLocStart());
3658   }
3659 
3660   OMPTeamsScope Scope(CGF, S);
3661   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3662   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3663   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3664                                            CapturedVars);
3665 }
3666 
3667 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3668   // Emit teams region as a standalone region.
3669   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3670     OMPPrivateScope PrivateScope(CGF);
3671     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3672     CGF.EmitOMPPrivateClause(S, PrivateScope);
3673     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3674     (void)PrivateScope.Privatize();
3675     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3676     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3677   };
3678   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3679   emitPostUpdateForReductionClause(
3680       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3681 }
3682 
3683 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3684                                   const OMPTargetTeamsDirective &S) {
3685   auto *CS = S.getCapturedStmt(OMPD_teams);
3686   Action.Enter(CGF);
3687   auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3688     // TODO: Add support for clauses.
3689     CGF.EmitStmt(CS->getCapturedStmt());
3690   };
3691   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3692 }
3693 
3694 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3695     CodeGenModule &CGM, StringRef ParentName,
3696     const OMPTargetTeamsDirective &S) {
3697   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3698     emitTargetTeamsRegion(CGF, Action, S);
3699   };
3700   llvm::Function *Fn;
3701   llvm::Constant *Addr;
3702   // Emit target region as a standalone region.
3703   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3704       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3705   assert(Fn && Addr && "Target device function emission failed.");
3706 }
3707 
3708 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3709     const OMPTargetTeamsDirective &S) {
3710   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3711     emitTargetTeamsRegion(CGF, Action, S);
3712   };
3713   emitCommonOMPTargetDirective(*this, S, CodeGen);
3714 }
3715 
3716 void CodeGenFunction::EmitOMPCancellationPointDirective(
3717     const OMPCancellationPointDirective &S) {
3718   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3719                                                    S.getCancelRegion());
3720 }
3721 
3722 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3723   const Expr *IfCond = nullptr;
3724   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3725     if (C->getNameModifier() == OMPD_unknown ||
3726         C->getNameModifier() == OMPD_cancel) {
3727       IfCond = C->getCondition();
3728       break;
3729     }
3730   }
3731   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3732                                         S.getCancelRegion());
3733 }
3734 
3735 CodeGenFunction::JumpDest
3736 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3737   if (Kind == OMPD_parallel || Kind == OMPD_task ||
3738       Kind == OMPD_target_parallel)
3739     return ReturnBlock;
3740   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3741          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3742          Kind == OMPD_distribute_parallel_for ||
3743          Kind == OMPD_target_parallel_for);
3744   return OMPCancelStack.getExitBlock();
3745 }
3746 
3747 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3748     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3749     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3750   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3751   auto OrigVarIt = C.varlist_begin();
3752   auto InitIt = C.inits().begin();
3753   for (auto PvtVarIt : C.private_copies()) {
3754     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3755     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3756     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3757 
3758     // In order to identify the right initializer we need to match the
3759     // declaration used by the mapping logic. In some cases we may get
3760     // OMPCapturedExprDecl that refers to the original declaration.
3761     const ValueDecl *MatchingVD = OrigVD;
3762     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3763       // OMPCapturedExprDecl are used to privative fields of the current
3764       // structure.
3765       auto *ME = cast<MemberExpr>(OED->getInit());
3766       assert(isa<CXXThisExpr>(ME->getBase()) &&
3767              "Base should be the current struct!");
3768       MatchingVD = ME->getMemberDecl();
3769     }
3770 
3771     // If we don't have information about the current list item, move on to
3772     // the next one.
3773     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3774     if (InitAddrIt == CaptureDeviceAddrMap.end())
3775       continue;
3776 
3777     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3778       // Initialize the temporary initialization variable with the address we
3779       // get from the runtime library. We have to cast the source address
3780       // because it is always a void *. References are materialized in the
3781       // privatization scope, so the initialization here disregards the fact
3782       // the original variable is a reference.
3783       QualType AddrQTy =
3784           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3785       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3786       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3787       setAddrOfLocalVar(InitVD, InitAddr);
3788 
3789       // Emit private declaration, it will be initialized by the value we
3790       // declaration we just added to the local declarations map.
3791       EmitDecl(*PvtVD);
3792 
3793       // The initialization variables reached its purpose in the emission
3794       // ofthe previous declaration, so we don't need it anymore.
3795       LocalDeclMap.erase(InitVD);
3796 
3797       // Return the address of the private variable.
3798       return GetAddrOfLocalVar(PvtVD);
3799     });
3800     assert(IsRegistered && "firstprivate var already registered as private");
3801     // Silence the warning about unused variable.
3802     (void)IsRegistered;
3803 
3804     ++OrigVarIt;
3805     ++InitIt;
3806   }
3807 }
3808 
3809 // Generate the instructions for '#pragma omp target data' directive.
3810 void CodeGenFunction::EmitOMPTargetDataDirective(
3811     const OMPTargetDataDirective &S) {
3812   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3813 
3814   // Create a pre/post action to signal the privatization of the device pointer.
3815   // This action can be replaced by the OpenMP runtime code generation to
3816   // deactivate privatization.
3817   bool PrivatizeDevicePointers = false;
3818   class DevicePointerPrivActionTy : public PrePostActionTy {
3819     bool &PrivatizeDevicePointers;
3820 
3821   public:
3822     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3823         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3824     void Enter(CodeGenFunction &CGF) override {
3825       PrivatizeDevicePointers = true;
3826     }
3827   };
3828   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3829 
3830   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3831       CodeGenFunction &CGF, PrePostActionTy &Action) {
3832     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3833       CGF.EmitStmt(
3834           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3835     };
3836 
3837     // Codegen that selects wheather to generate the privatization code or not.
3838     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3839                           &InnermostCodeGen](CodeGenFunction &CGF,
3840                                              PrePostActionTy &Action) {
3841       RegionCodeGenTy RCG(InnermostCodeGen);
3842       PrivatizeDevicePointers = false;
3843 
3844       // Call the pre-action to change the status of PrivatizeDevicePointers if
3845       // needed.
3846       Action.Enter(CGF);
3847 
3848       if (PrivatizeDevicePointers) {
3849         OMPPrivateScope PrivateScope(CGF);
3850         // Emit all instances of the use_device_ptr clause.
3851         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
3852           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
3853                                         Info.CaptureDeviceAddrMap);
3854         (void)PrivateScope.Privatize();
3855         RCG(CGF);
3856       } else
3857         RCG(CGF);
3858     };
3859 
3860     // Forward the provided action to the privatization codegen.
3861     RegionCodeGenTy PrivRCG(PrivCodeGen);
3862     PrivRCG.setAction(Action);
3863 
3864     // Notwithstanding the body of the region is emitted as inlined directive,
3865     // we don't use an inline scope as changes in the references inside the
3866     // region are expected to be visible outside, so we do not privative them.
3867     OMPLexicalScope Scope(CGF, S);
3868     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
3869                                                     PrivRCG);
3870   };
3871 
3872   RegionCodeGenTy RCG(CodeGen);
3873 
3874   // If we don't have target devices, don't bother emitting the data mapping
3875   // code.
3876   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
3877     RCG(*this);
3878     return;
3879   }
3880 
3881   // Check if we have any if clause associated with the directive.
3882   const Expr *IfCond = nullptr;
3883   if (auto *C = S.getSingleClause<OMPIfClause>())
3884     IfCond = C->getCondition();
3885 
3886   // Check if we have any device clause associated with the directive.
3887   const Expr *Device = nullptr;
3888   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3889     Device = C->getDevice();
3890 
3891   // Set the action to signal privatization of device pointers.
3892   RCG.setAction(PrivAction);
3893 
3894   // Emit region code.
3895   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
3896                                              Info);
3897 }
3898 
3899 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
3900     const OMPTargetEnterDataDirective &S) {
3901   // If we don't have target devices, don't bother emitting the data mapping
3902   // code.
3903   if (CGM.getLangOpts().OMPTargetTriples.empty())
3904     return;
3905 
3906   // Check if we have any if clause associated with the directive.
3907   const Expr *IfCond = nullptr;
3908   if (auto *C = S.getSingleClause<OMPIfClause>())
3909     IfCond = C->getCondition();
3910 
3911   // Check if we have any device clause associated with the directive.
3912   const Expr *Device = nullptr;
3913   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3914     Device = C->getDevice();
3915 
3916   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3917 }
3918 
3919 void CodeGenFunction::EmitOMPTargetExitDataDirective(
3920     const OMPTargetExitDataDirective &S) {
3921   // If we don't have target devices, don't bother emitting the data mapping
3922   // code.
3923   if (CGM.getLangOpts().OMPTargetTriples.empty())
3924     return;
3925 
3926   // Check if we have any if clause associated with the directive.
3927   const Expr *IfCond = nullptr;
3928   if (auto *C = S.getSingleClause<OMPIfClause>())
3929     IfCond = C->getCondition();
3930 
3931   // Check if we have any device clause associated with the directive.
3932   const Expr *Device = nullptr;
3933   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3934     Device = C->getDevice();
3935 
3936   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3937 }
3938 
3939 static void emitTargetParallelRegion(CodeGenFunction &CGF,
3940                                      const OMPTargetParallelDirective &S,
3941                                      PrePostActionTy &Action) {
3942   // Get the captured statement associated with the 'parallel' region.
3943   auto *CS = S.getCapturedStmt(OMPD_parallel);
3944   Action.Enter(CGF);
3945   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
3946     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3947     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3948     CGF.EmitOMPPrivateClause(S, PrivateScope);
3949     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3950     (void)PrivateScope.Privatize();
3951     // TODO: Add support for clauses.
3952     CGF.EmitStmt(CS->getCapturedStmt());
3953     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3954   };
3955   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
3956                                  emitEmptyBoundParameters);
3957   emitPostUpdateForReductionClause(
3958       CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3959 }
3960 
3961 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
3962     CodeGenModule &CGM, StringRef ParentName,
3963     const OMPTargetParallelDirective &S) {
3964   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3965     emitTargetParallelRegion(CGF, S, Action);
3966   };
3967   llvm::Function *Fn;
3968   llvm::Constant *Addr;
3969   // Emit target region as a standalone region.
3970   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3971       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3972   assert(Fn && Addr && "Target device function emission failed.");
3973 }
3974 
3975 void CodeGenFunction::EmitOMPTargetParallelDirective(
3976     const OMPTargetParallelDirective &S) {
3977   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3978     emitTargetParallelRegion(CGF, S, Action);
3979   };
3980   emitCommonOMPTargetDirective(*this, S, CodeGen);
3981 }
3982 
3983 void CodeGenFunction::EmitOMPTargetParallelForDirective(
3984     const OMPTargetParallelForDirective &S) {
3985   // TODO: codegen for target parallel for.
3986 }
3987 
3988 /// Emit a helper variable and return corresponding lvalue.
3989 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
3990                      const ImplicitParamDecl *PVD,
3991                      CodeGenFunction::OMPPrivateScope &Privates) {
3992   auto *VDecl = cast<VarDecl>(Helper->getDecl());
3993   Privates.addPrivate(
3994       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
3995 }
3996 
3997 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
3998   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
3999   // Emit outlined function for task construct.
4000   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4001   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4002   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4003   const Expr *IfCond = nullptr;
4004   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4005     if (C->getNameModifier() == OMPD_unknown ||
4006         C->getNameModifier() == OMPD_taskloop) {
4007       IfCond = C->getCondition();
4008       break;
4009     }
4010   }
4011 
4012   OMPTaskDataTy Data;
4013   // Check if taskloop must be emitted without taskgroup.
4014   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4015   // TODO: Check if we should emit tied or untied task.
4016   Data.Tied = true;
4017   // Set scheduling for taskloop
4018   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4019     // grainsize clause
4020     Data.Schedule.setInt(/*IntVal=*/false);
4021     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4022   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4023     // num_tasks clause
4024     Data.Schedule.setInt(/*IntVal=*/true);
4025     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4026   }
4027 
4028   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4029     // if (PreCond) {
4030     //   for (IV in 0..LastIteration) BODY;
4031     //   <Final counter/linear vars updates>;
4032     // }
4033     //
4034 
4035     // Emit: if (PreCond) - begin.
4036     // If the condition constant folds and can be elided, avoid emitting the
4037     // whole loop.
4038     bool CondConstant;
4039     llvm::BasicBlock *ContBlock = nullptr;
4040     OMPLoopScope PreInitScope(CGF, S);
4041     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4042       if (!CondConstant)
4043         return;
4044     } else {
4045       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4046       ContBlock = CGF.createBasicBlock("taskloop.if.end");
4047       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4048                   CGF.getProfileCount(&S));
4049       CGF.EmitBlock(ThenBlock);
4050       CGF.incrementProfileCounter(&S);
4051     }
4052 
4053     if (isOpenMPSimdDirective(S.getDirectiveKind()))
4054       CGF.EmitOMPSimdInit(S);
4055 
4056     OMPPrivateScope LoopScope(CGF);
4057     // Emit helper vars inits.
4058     enum { LowerBound = 5, UpperBound, Stride, LastIter };
4059     auto *I = CS->getCapturedDecl()->param_begin();
4060     auto *LBP = std::next(I, LowerBound);
4061     auto *UBP = std::next(I, UpperBound);
4062     auto *STP = std::next(I, Stride);
4063     auto *LIP = std::next(I, LastIter);
4064     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4065              LoopScope);
4066     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4067              LoopScope);
4068     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4069     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4070              LoopScope);
4071     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4072     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4073     (void)LoopScope.Privatize();
4074     // Emit the loop iteration variable.
4075     const Expr *IVExpr = S.getIterationVariable();
4076     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4077     CGF.EmitVarDecl(*IVDecl);
4078     CGF.EmitIgnoredExpr(S.getInit());
4079 
4080     // Emit the iterations count variable.
4081     // If it is not a variable, Sema decided to calculate iterations count on
4082     // each iteration (e.g., it is foldable into a constant).
4083     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4084       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4085       // Emit calculation of the iterations count.
4086       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4087     }
4088 
4089     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4090                          S.getInc(),
4091                          [&S](CodeGenFunction &CGF) {
4092                            CGF.EmitOMPLoopBody(S, JumpDest());
4093                            CGF.EmitStopPoint(&S);
4094                          },
4095                          [](CodeGenFunction &) {});
4096     // Emit: if (PreCond) - end.
4097     if (ContBlock) {
4098       CGF.EmitBranch(ContBlock);
4099       CGF.EmitBlock(ContBlock, true);
4100     }
4101     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4102     if (HasLastprivateClause) {
4103       CGF.EmitOMPLastprivateClauseFinal(
4104           S, isOpenMPSimdDirective(S.getDirectiveKind()),
4105           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4106               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4107               (*LIP)->getType(), S.getLocStart())));
4108     }
4109   };
4110   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4111                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4112                             const OMPTaskDataTy &Data) {
4113     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4114       OMPLoopScope PreInitScope(CGF, S);
4115       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4116                                                   OutlinedFn, SharedsTy,
4117                                                   CapturedStruct, IfCond, Data);
4118     };
4119     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4120                                                     CodeGen);
4121   };
4122   if (Data.Nogroup)
4123     EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4124   else {
4125     CGM.getOpenMPRuntime().emitTaskgroupRegion(
4126         *this,
4127         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
4128                                         PrePostActionTy &Action) {
4129           Action.Enter(CGF);
4130           CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4131         },
4132         S.getLocStart());
4133   }
4134 }
4135 
4136 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4137   EmitOMPTaskLoopBasedDirective(S);
4138 }
4139 
4140 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4141     const OMPTaskLoopSimdDirective &S) {
4142   EmitOMPTaskLoopBasedDirective(S);
4143 }
4144 
4145 // Generate the instructions for '#pragma omp target update' directive.
4146 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4147     const OMPTargetUpdateDirective &S) {
4148   // If we don't have target devices, don't bother emitting the data mapping
4149   // code.
4150   if (CGM.getLangOpts().OMPTargetTriples.empty())
4151     return;
4152 
4153   // Check if we have any if clause associated with the directive.
4154   const Expr *IfCond = nullptr;
4155   if (auto *C = S.getSingleClause<OMPIfClause>())
4156     IfCond = C->getCondition();
4157 
4158   // Check if we have any device clause associated with the directive.
4159   const Expr *Device = nullptr;
4160   if (auto *C = S.getSingleClause<OMPDeviceClause>())
4161     Device = C->getDevice();
4162 
4163   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4164 }
4165