1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     emitPreInitStmt(CGF, S);
61     if (AsInlined) {
62       if (S.hasAssociatedStmt()) {
63         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
64         for (auto &C : CS->captures()) {
65           if (C.capturesVariable() || C.capturesVariableByCopy()) {
66             auto *VD = C.getCapturedVar();
67             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
68                             isCapturedVar(CGF, VD) ||
69                                 (CGF.CapturedStmtInfo &&
70                                  InlinedShareds.isGlobalVarCaptured(VD)),
71                             VD->getType().getNonReferenceType(), VK_LValue,
72                             SourceLocation());
73             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
74               return CGF.EmitLValue(&DRE).getAddress();
75             });
76           }
77         }
78         (void)InlinedShareds.Privatize();
79       }
80     }
81   }
82 };
83 
84 /// Private scope for OpenMP loop-based directives, that supports capturing
85 /// of used expression from loop statement.
86 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
87   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
88     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
89       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
90         for (const auto *I : PreInits->decls())
91           CGF.EmitVarDecl(cast<VarDecl>(*I));
92       }
93     }
94   }
95 
96 public:
97   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
98       : CodeGenFunction::RunCleanupsScope(CGF) {
99     emitPreInitStmt(CGF, S);
100   }
101 };
102 
103 } // namespace
104 
105 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
106   auto &C = getContext();
107   llvm::Value *Size = nullptr;
108   auto SizeInChars = C.getTypeSizeInChars(Ty);
109   if (SizeInChars.isZero()) {
110     // getTypeSizeInChars() returns 0 for a VLA.
111     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
112       llvm::Value *ArraySize;
113       std::tie(ArraySize, Ty) = getVLASize(VAT);
114       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
115     }
116     SizeInChars = C.getTypeSizeInChars(Ty);
117     if (SizeInChars.isZero())
118       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
119     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
120   } else
121     Size = CGM.getSize(SizeInChars);
122   return Size;
123 }
124 
125 void CodeGenFunction::GenerateOpenMPCapturedVars(
126     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
127   const RecordDecl *RD = S.getCapturedRecordDecl();
128   auto CurField = RD->field_begin();
129   auto CurCap = S.captures().begin();
130   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
131                                                  E = S.capture_init_end();
132        I != E; ++I, ++CurField, ++CurCap) {
133     if (CurField->hasCapturedVLAType()) {
134       auto VAT = CurField->getCapturedVLAType();
135       auto *Val = VLASizeMap[VAT->getSizeExpr()];
136       CapturedVars.push_back(Val);
137     } else if (CurCap->capturesThis())
138       CapturedVars.push_back(CXXThisValue);
139     else if (CurCap->capturesVariableByCopy())
140       CapturedVars.push_back(
141           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
142     else {
143       assert(CurCap->capturesVariable() && "Expected capture by reference.");
144       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
145     }
146   }
147 }
148 
149 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
150                                     StringRef Name, LValue AddrLV,
151                                     bool isReferenceType = false) {
152   ASTContext &Ctx = CGF.getContext();
153 
154   auto *CastedPtr = CGF.EmitScalarConversion(
155       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
156       Ctx.getPointerType(DstType), SourceLocation());
157   auto TmpAddr =
158       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
159           .getAddress();
160 
161   // If we are dealing with references we need to return the address of the
162   // reference instead of the reference of the value.
163   if (isReferenceType) {
164     QualType RefType = Ctx.getLValueReferenceType(DstType);
165     auto *RefVal = TmpAddr.getPointer();
166     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
167     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
168     CGF.EmitScalarInit(RefVal, TmpLVal);
169   }
170 
171   return TmpAddr;
172 }
173 
174 llvm::Function *
175 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
176                                                     bool CastValToPtr) {
177   assert(
178       CapturedStmtInfo &&
179       "CapturedStmtInfo should be set when generating the captured function");
180   const CapturedDecl *CD = S.getCapturedDecl();
181   const RecordDecl *RD = S.getCapturedRecordDecl();
182   assert(CD->hasBody() && "missing CapturedDecl body");
183 
184   // Build the argument list.
185   ASTContext &Ctx = CGM.getContext();
186   FunctionArgList Args;
187   Args.append(CD->param_begin(),
188               std::next(CD->param_begin(), CD->getContextParamPosition()));
189   auto I = S.captures().begin();
190   for (auto *FD : RD->fields()) {
191     QualType ArgType = FD->getType();
192     IdentifierInfo *II = nullptr;
193     VarDecl *CapVar = nullptr;
194 
195     // If this is a capture by copy and the type is not a pointer, the outlined
196     // function argument type should be uintptr and the value properly casted to
197     // uintptr. This is necessary given that the runtime library is only able to
198     // deal with pointers. We can pass in the same way the VLA type sizes to the
199     // outlined function.
200     if (CastValToPtr) {
201       if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
202           I->capturesVariableArrayType())
203         ArgType = Ctx.getUIntPtrType();
204     }
205 
206     if (I->capturesVariable() || I->capturesVariableByCopy()) {
207       CapVar = I->getCapturedVar();
208       II = CapVar->getIdentifier();
209     } else if (I->capturesThis())
210       II = &getContext().Idents.get("this");
211     else {
212       assert(I->capturesVariableArrayType());
213       II = &getContext().Idents.get("vla");
214     }
215     if (ArgType->isVariablyModifiedType())
216       ArgType = getContext().getVariableArrayDecayedType(ArgType);
217     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
218                                              FD->getLocation(), II, ArgType));
219     ++I;
220   }
221   Args.append(
222       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
223       CD->param_end());
224 
225   // Create the function declaration.
226   FunctionType::ExtInfo ExtInfo;
227   const CGFunctionInfo &FuncInfo =
228       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
229   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
230 
231   llvm::Function *F = llvm::Function::Create(
232       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
233       CapturedStmtInfo->getHelperName(), &CGM.getModule());
234   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
235   if (CD->isNothrow())
236     F->addFnAttr(llvm::Attribute::NoUnwind);
237 
238   // Generate the function.
239   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
240                 CD->getBody()->getLocStart());
241   unsigned Cnt = CD->getContextParamPosition();
242   I = S.captures().begin();
243   for (auto *FD : RD->fields()) {
244     // If we are capturing a pointer by copy we don't need to do anything, just
245     // use the value that we get from the arguments.
246     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
247       setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
248       ++Cnt;
249       ++I;
250       continue;
251     }
252 
253     LValue ArgLVal =
254         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
255                        AlignmentSource::Decl);
256     if (FD->hasCapturedVLAType()) {
257       LValue CastedArgLVal =
258           CastValToPtr
259               ? MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
260                                                     Args[Cnt]->getName(),
261                                                     ArgLVal),
262                                FD->getType(), AlignmentSource::Decl)
263               : ArgLVal;
264       auto *ExprArg =
265           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
266       auto VAT = FD->getCapturedVLAType();
267       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
268     } else if (I->capturesVariable()) {
269       auto *Var = I->getCapturedVar();
270       QualType VarTy = Var->getType();
271       Address ArgAddr = ArgLVal.getAddress();
272       if (!VarTy->isReferenceType()) {
273         ArgAddr = EmitLoadOfReference(
274             ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
275       }
276       setAddrOfLocalVar(
277           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
278     } else if (I->capturesVariableByCopy()) {
279       assert(!FD->getType()->isAnyPointerType() &&
280              "Not expecting a captured pointer.");
281       auto *Var = I->getCapturedVar();
282       QualType VarTy = Var->getType();
283       if (!CastValToPtr && VarTy->isReferenceType()) {
284         Address Temp = CreateMemTemp(VarTy);
285         Builder.CreateStore(ArgLVal.getPointer(), Temp);
286         ArgLVal = MakeAddrLValue(Temp, VarTy);
287       }
288       setAddrOfLocalVar(Var, CastValToPtr ? castValueFromUintptr(
289                                                 *this, FD->getType(),
290                                                 Args[Cnt]->getName(), ArgLVal,
291                                                 VarTy->isReferenceType())
292                                           : ArgLVal.getAddress());
293     } else {
294       // If 'this' is captured, load it into CXXThisValue.
295       assert(I->capturesThis());
296       CXXThisValue =
297           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
298     }
299     ++Cnt;
300     ++I;
301   }
302 
303   PGO.assignRegionCounters(GlobalDecl(CD), F);
304   CapturedStmtInfo->EmitBody(*this, CD->getBody());
305   FinishFunction(CD->getBodyRBrace());
306 
307   return F;
308 }
309 
310 //===----------------------------------------------------------------------===//
311 //                              OpenMP Directive Emission
312 //===----------------------------------------------------------------------===//
313 void CodeGenFunction::EmitOMPAggregateAssign(
314     Address DestAddr, Address SrcAddr, QualType OriginalType,
315     const llvm::function_ref<void(Address, Address)> &CopyGen) {
316   // Perform element-by-element initialization.
317   QualType ElementTy;
318 
319   // Drill down to the base element type on both arrays.
320   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
321   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
322   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
323 
324   auto SrcBegin = SrcAddr.getPointer();
325   auto DestBegin = DestAddr.getPointer();
326   // Cast from pointer to array type to pointer to single element.
327   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
328   // The basic structure here is a while-do loop.
329   auto BodyBB = createBasicBlock("omp.arraycpy.body");
330   auto DoneBB = createBasicBlock("omp.arraycpy.done");
331   auto IsEmpty =
332       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
333   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
334 
335   // Enter the loop body, making that address the current address.
336   auto EntryBB = Builder.GetInsertBlock();
337   EmitBlock(BodyBB);
338 
339   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
340 
341   llvm::PHINode *SrcElementPHI =
342     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
343   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
344   Address SrcElementCurrent =
345       Address(SrcElementPHI,
346               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
347 
348   llvm::PHINode *DestElementPHI =
349     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
350   DestElementPHI->addIncoming(DestBegin, EntryBB);
351   Address DestElementCurrent =
352     Address(DestElementPHI,
353             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
354 
355   // Emit copy.
356   CopyGen(DestElementCurrent, SrcElementCurrent);
357 
358   // Shift the address forward by one element.
359   auto DestElementNext = Builder.CreateConstGEP1_32(
360       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
361   auto SrcElementNext = Builder.CreateConstGEP1_32(
362       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
363   // Check whether we've reached the end.
364   auto Done =
365       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
366   Builder.CreateCondBr(Done, DoneBB, BodyBB);
367   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
368   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
369 
370   // Done.
371   EmitBlock(DoneBB, /*IsFinished=*/true);
372 }
373 
374 /// Check if the combiner is a call to UDR combiner and if it is so return the
375 /// UDR decl used for reduction.
376 static const OMPDeclareReductionDecl *
377 getReductionInit(const Expr *ReductionOp) {
378   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
379     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
380       if (auto *DRE =
381               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
382         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
383           return DRD;
384   return nullptr;
385 }
386 
387 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
388                                              const OMPDeclareReductionDecl *DRD,
389                                              const Expr *InitOp,
390                                              Address Private, Address Original,
391                                              QualType Ty) {
392   if (DRD->getInitializer()) {
393     std::pair<llvm::Function *, llvm::Function *> Reduction =
394         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
395     auto *CE = cast<CallExpr>(InitOp);
396     auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
397     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
398     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
399     auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
400     auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
401     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
402     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
403                             [=]() -> Address { return Private; });
404     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
405                             [=]() -> Address { return Original; });
406     (void)PrivateScope.Privatize();
407     RValue Func = RValue::get(Reduction.second);
408     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
409     CGF.EmitIgnoredExpr(InitOp);
410   } else {
411     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
412     auto *GV = new llvm::GlobalVariable(
413         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
414         llvm::GlobalValue::PrivateLinkage, Init, ".init");
415     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
416     RValue InitRVal;
417     switch (CGF.getEvaluationKind(Ty)) {
418     case TEK_Scalar:
419       InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
420       break;
421     case TEK_Complex:
422       InitRVal =
423           RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
424       break;
425     case TEK_Aggregate:
426       InitRVal = RValue::getAggregate(LV.getAddress());
427       break;
428     }
429     OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
430     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
431     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
432                          /*IsInitializer=*/false);
433   }
434 }
435 
436 /// \brief Emit initialization of arrays of complex types.
437 /// \param DestAddr Address of the array.
438 /// \param Type Type of array.
439 /// \param Init Initial expression of array.
440 /// \param SrcAddr Address of the original array.
441 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
442                                  QualType Type, const Expr *Init,
443                                  Address SrcAddr = Address::invalid()) {
444   auto *DRD = getReductionInit(Init);
445   // Perform element-by-element initialization.
446   QualType ElementTy;
447 
448   // Drill down to the base element type on both arrays.
449   auto ArrayTy = Type->getAsArrayTypeUnsafe();
450   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
451   DestAddr =
452       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
453   if (DRD)
454     SrcAddr =
455         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
456 
457   llvm::Value *SrcBegin = nullptr;
458   if (DRD)
459     SrcBegin = SrcAddr.getPointer();
460   auto DestBegin = DestAddr.getPointer();
461   // Cast from pointer to array type to pointer to single element.
462   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
463   // The basic structure here is a while-do loop.
464   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
465   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
466   auto IsEmpty =
467       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
468   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
469 
470   // Enter the loop body, making that address the current address.
471   auto EntryBB = CGF.Builder.GetInsertBlock();
472   CGF.EmitBlock(BodyBB);
473 
474   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
475 
476   llvm::PHINode *SrcElementPHI = nullptr;
477   Address SrcElementCurrent = Address::invalid();
478   if (DRD) {
479     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
480                                           "omp.arraycpy.srcElementPast");
481     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
482     SrcElementCurrent =
483         Address(SrcElementPHI,
484                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
485   }
486   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
487       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
488   DestElementPHI->addIncoming(DestBegin, EntryBB);
489   Address DestElementCurrent =
490       Address(DestElementPHI,
491               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
492 
493   // Emit copy.
494   {
495     CodeGenFunction::RunCleanupsScope InitScope(CGF);
496     if (DRD && (DRD->getInitializer() || !Init)) {
497       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
498                                        SrcElementCurrent, ElementTy);
499     } else
500       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
501                            /*IsInitializer=*/false);
502   }
503 
504   if (DRD) {
505     // Shift the address forward by one element.
506     auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
507         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
508     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
509   }
510 
511   // Shift the address forward by one element.
512   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
513       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
514   // Check whether we've reached the end.
515   auto Done =
516       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
517   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
518   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
519 
520   // Done.
521   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
522 }
523 
524 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
525                                   Address SrcAddr, const VarDecl *DestVD,
526                                   const VarDecl *SrcVD, const Expr *Copy) {
527   if (OriginalType->isArrayType()) {
528     auto *BO = dyn_cast<BinaryOperator>(Copy);
529     if (BO && BO->getOpcode() == BO_Assign) {
530       // Perform simple memcpy for simple copying.
531       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
532     } else {
533       // For arrays with complex element types perform element by element
534       // copying.
535       EmitOMPAggregateAssign(
536           DestAddr, SrcAddr, OriginalType,
537           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
538             // Working with the single array element, so have to remap
539             // destination and source variables to corresponding array
540             // elements.
541             CodeGenFunction::OMPPrivateScope Remap(*this);
542             Remap.addPrivate(DestVD, [DestElement]() -> Address {
543               return DestElement;
544             });
545             Remap.addPrivate(
546                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
547             (void)Remap.Privatize();
548             EmitIgnoredExpr(Copy);
549           });
550     }
551   } else {
552     // Remap pseudo source variable to private copy.
553     CodeGenFunction::OMPPrivateScope Remap(*this);
554     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
555     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
556     (void)Remap.Privatize();
557     // Emit copying of the whole variable.
558     EmitIgnoredExpr(Copy);
559   }
560 }
561 
562 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
563                                                 OMPPrivateScope &PrivateScope) {
564   if (!HaveInsertPoint())
565     return false;
566   bool FirstprivateIsLastprivate = false;
567   llvm::DenseSet<const VarDecl *> Lastprivates;
568   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
569     for (const auto *D : C->varlists())
570       Lastprivates.insert(
571           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
572   }
573   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
574   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
575     auto IRef = C->varlist_begin();
576     auto InitsRef = C->inits().begin();
577     for (auto IInit : C->private_copies()) {
578       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
579       bool ThisFirstprivateIsLastprivate =
580           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
581       auto *FD = CapturedStmtInfo->lookup(OrigVD);
582       if (!ThisFirstprivateIsLastprivate && FD &&
583           !FD->getType()->isReferenceType()) {
584         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
585         ++IRef;
586         ++InitsRef;
587         continue;
588       }
589       FirstprivateIsLastprivate =
590           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
591       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
592         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
593         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
594         bool IsRegistered;
595         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
596                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
597                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
598         Address OriginalAddr = EmitLValue(&DRE).getAddress();
599         QualType Type = VD->getType();
600         if (Type->isArrayType()) {
601           // Emit VarDecl with copy init for arrays.
602           // Get the address of the original variable captured in current
603           // captured region.
604           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
605             auto Emission = EmitAutoVarAlloca(*VD);
606             auto *Init = VD->getInit();
607             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
608               // Perform simple memcpy.
609               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
610                                   Type);
611             } else {
612               EmitOMPAggregateAssign(
613                   Emission.getAllocatedAddress(), OriginalAddr, Type,
614                   [this, VDInit, Init](Address DestElement,
615                                        Address SrcElement) {
616                     // Clean up any temporaries needed by the initialization.
617                     RunCleanupsScope InitScope(*this);
618                     // Emit initialization for single element.
619                     setAddrOfLocalVar(VDInit, SrcElement);
620                     EmitAnyExprToMem(Init, DestElement,
621                                      Init->getType().getQualifiers(),
622                                      /*IsInitializer*/ false);
623                     LocalDeclMap.erase(VDInit);
624                   });
625             }
626             EmitAutoVarCleanups(Emission);
627             return Emission.getAllocatedAddress();
628           });
629         } else {
630           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
631             // Emit private VarDecl with copy init.
632             // Remap temp VDInit variable to the address of the original
633             // variable
634             // (for proper handling of captured global variables).
635             setAddrOfLocalVar(VDInit, OriginalAddr);
636             EmitDecl(*VD);
637             LocalDeclMap.erase(VDInit);
638             return GetAddrOfLocalVar(VD);
639           });
640         }
641         assert(IsRegistered &&
642                "firstprivate var already registered as private");
643         // Silence the warning about unused variable.
644         (void)IsRegistered;
645       }
646       ++IRef;
647       ++InitsRef;
648     }
649   }
650   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
651 }
652 
653 void CodeGenFunction::EmitOMPPrivateClause(
654     const OMPExecutableDirective &D,
655     CodeGenFunction::OMPPrivateScope &PrivateScope) {
656   if (!HaveInsertPoint())
657     return;
658   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
659   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
660     auto IRef = C->varlist_begin();
661     for (auto IInit : C->private_copies()) {
662       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
663       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
664         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
665         bool IsRegistered =
666             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
667               // Emit private VarDecl with copy init.
668               EmitDecl(*VD);
669               return GetAddrOfLocalVar(VD);
670             });
671         assert(IsRegistered && "private var already registered as private");
672         // Silence the warning about unused variable.
673         (void)IsRegistered;
674       }
675       ++IRef;
676     }
677   }
678 }
679 
680 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
681   if (!HaveInsertPoint())
682     return false;
683   // threadprivate_var1 = master_threadprivate_var1;
684   // operator=(threadprivate_var2, master_threadprivate_var2);
685   // ...
686   // __kmpc_barrier(&loc, global_tid);
687   llvm::DenseSet<const VarDecl *> CopiedVars;
688   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
689   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
690     auto IRef = C->varlist_begin();
691     auto ISrcRef = C->source_exprs().begin();
692     auto IDestRef = C->destination_exprs().begin();
693     for (auto *AssignOp : C->assignment_ops()) {
694       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
695       QualType Type = VD->getType();
696       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
697         // Get the address of the master variable. If we are emitting code with
698         // TLS support, the address is passed from the master as field in the
699         // captured declaration.
700         Address MasterAddr = Address::invalid();
701         if (getLangOpts().OpenMPUseTLS &&
702             getContext().getTargetInfo().isTLSSupported()) {
703           assert(CapturedStmtInfo->lookup(VD) &&
704                  "Copyin threadprivates should have been captured!");
705           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
706                           VK_LValue, (*IRef)->getExprLoc());
707           MasterAddr = EmitLValue(&DRE).getAddress();
708           LocalDeclMap.erase(VD);
709         } else {
710           MasterAddr =
711             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
712                                         : CGM.GetAddrOfGlobal(VD),
713                     getContext().getDeclAlign(VD));
714         }
715         // Get the address of the threadprivate variable.
716         Address PrivateAddr = EmitLValue(*IRef).getAddress();
717         if (CopiedVars.size() == 1) {
718           // At first check if current thread is a master thread. If it is, no
719           // need to copy data.
720           CopyBegin = createBasicBlock("copyin.not.master");
721           CopyEnd = createBasicBlock("copyin.not.master.end");
722           Builder.CreateCondBr(
723               Builder.CreateICmpNE(
724                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
725                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
726               CopyBegin, CopyEnd);
727           EmitBlock(CopyBegin);
728         }
729         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
730         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
731         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
732       }
733       ++IRef;
734       ++ISrcRef;
735       ++IDestRef;
736     }
737   }
738   if (CopyEnd) {
739     // Exit out of copying procedure for non-master thread.
740     EmitBlock(CopyEnd, /*IsFinished=*/true);
741     return true;
742   }
743   return false;
744 }
745 
746 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
747     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
748   if (!HaveInsertPoint())
749     return false;
750   bool HasAtLeastOneLastprivate = false;
751   llvm::DenseSet<const VarDecl *> SIMDLCVs;
752   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
753     auto *LoopDirective = cast<OMPLoopDirective>(&D);
754     for (auto *C : LoopDirective->counters()) {
755       SIMDLCVs.insert(
756           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
757     }
758   }
759   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
760   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
761     HasAtLeastOneLastprivate = true;
762     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
763       break;
764     auto IRef = C->varlist_begin();
765     auto IDestRef = C->destination_exprs().begin();
766     for (auto *IInit : C->private_copies()) {
767       // Keep the address of the original variable for future update at the end
768       // of the loop.
769       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
770       // Taskloops do not require additional initialization, it is done in
771       // runtime support library.
772       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
773         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
774         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
775           DeclRefExpr DRE(
776               const_cast<VarDecl *>(OrigVD),
777               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
778                   OrigVD) != nullptr,
779               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
780           return EmitLValue(&DRE).getAddress();
781         });
782         // Check if the variable is also a firstprivate: in this case IInit is
783         // not generated. Initialization of this variable will happen in codegen
784         // for 'firstprivate' clause.
785         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
786           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
787           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
788             // Emit private VarDecl with copy init.
789             EmitDecl(*VD);
790             return GetAddrOfLocalVar(VD);
791           });
792           assert(IsRegistered &&
793                  "lastprivate var already registered as private");
794           (void)IsRegistered;
795         }
796       }
797       ++IRef;
798       ++IDestRef;
799     }
800   }
801   return HasAtLeastOneLastprivate;
802 }
803 
804 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
805     const OMPExecutableDirective &D, bool NoFinals,
806     llvm::Value *IsLastIterCond) {
807   if (!HaveInsertPoint())
808     return;
809   // Emit following code:
810   // if (<IsLastIterCond>) {
811   //   orig_var1 = private_orig_var1;
812   //   ...
813   //   orig_varn = private_orig_varn;
814   // }
815   llvm::BasicBlock *ThenBB = nullptr;
816   llvm::BasicBlock *DoneBB = nullptr;
817   if (IsLastIterCond) {
818     ThenBB = createBasicBlock(".omp.lastprivate.then");
819     DoneBB = createBasicBlock(".omp.lastprivate.done");
820     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
821     EmitBlock(ThenBB);
822   }
823   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
824   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
825   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
826     auto IC = LoopDirective->counters().begin();
827     for (auto F : LoopDirective->finals()) {
828       auto *D =
829           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
830       if (NoFinals)
831         AlreadyEmittedVars.insert(D);
832       else
833         LoopCountersAndUpdates[D] = F;
834       ++IC;
835     }
836   }
837   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
838     auto IRef = C->varlist_begin();
839     auto ISrcRef = C->source_exprs().begin();
840     auto IDestRef = C->destination_exprs().begin();
841     for (auto *AssignOp : C->assignment_ops()) {
842       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
843       QualType Type = PrivateVD->getType();
844       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
845       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
846         // If lastprivate variable is a loop control variable for loop-based
847         // directive, update its value before copyin back to original
848         // variable.
849         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
850           EmitIgnoredExpr(FinalExpr);
851         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
852         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
853         // Get the address of the original variable.
854         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
855         // Get the address of the private variable.
856         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
857         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
858           PrivateAddr =
859               Address(Builder.CreateLoad(PrivateAddr),
860                       getNaturalTypeAlignment(RefTy->getPointeeType()));
861         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
862       }
863       ++IRef;
864       ++ISrcRef;
865       ++IDestRef;
866     }
867     if (auto *PostUpdate = C->getPostUpdateExpr())
868       EmitIgnoredExpr(PostUpdate);
869   }
870   if (IsLastIterCond)
871     EmitBlock(DoneBB, /*IsFinished=*/true);
872 }
873 
874 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
875                           LValue BaseLV, llvm::Value *Addr) {
876   Address Tmp = Address::invalid();
877   Address TopTmp = Address::invalid();
878   Address MostTopTmp = Address::invalid();
879   BaseTy = BaseTy.getNonReferenceType();
880   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
881          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
882     Tmp = CGF.CreateMemTemp(BaseTy);
883     if (TopTmp.isValid())
884       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
885     else
886       MostTopTmp = Tmp;
887     TopTmp = Tmp;
888     BaseTy = BaseTy->getPointeeType();
889   }
890   llvm::Type *Ty = BaseLV.getPointer()->getType();
891   if (Tmp.isValid())
892     Ty = Tmp.getElementType();
893   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
894   if (Tmp.isValid()) {
895     CGF.Builder.CreateStore(Addr, Tmp);
896     return MostTopTmp;
897   }
898   return Address(Addr, BaseLV.getAlignment());
899 }
900 
901 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
902                           LValue BaseLV) {
903   BaseTy = BaseTy.getNonReferenceType();
904   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
905          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
906     if (auto *PtrTy = BaseTy->getAs<PointerType>())
907       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
908     else {
909       BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
910                                              BaseTy->castAs<ReferenceType>());
911     }
912     BaseTy = BaseTy->getPointeeType();
913   }
914   return CGF.MakeAddrLValue(
915       Address(
916           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
917               BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
918           BaseLV.getAlignment()),
919       BaseLV.getType(), BaseLV.getAlignmentSource());
920 }
921 
922 void CodeGenFunction::EmitOMPReductionClauseInit(
923     const OMPExecutableDirective &D,
924     CodeGenFunction::OMPPrivateScope &PrivateScope) {
925   if (!HaveInsertPoint())
926     return;
927   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
928     auto ILHS = C->lhs_exprs().begin();
929     auto IRHS = C->rhs_exprs().begin();
930     auto IPriv = C->privates().begin();
931     auto IRed = C->reduction_ops().begin();
932     for (auto IRef : C->varlists()) {
933       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
934       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
935       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
936       auto *DRD = getReductionInit(*IRed);
937       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
938         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
939         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
940           Base = TempOASE->getBase()->IgnoreParenImpCasts();
941         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
942           Base = TempASE->getBase()->IgnoreParenImpCasts();
943         auto *DE = cast<DeclRefExpr>(Base);
944         auto *OrigVD = cast<VarDecl>(DE->getDecl());
945         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
946         auto OASELValueUB =
947             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
948         auto OriginalBaseLValue = EmitLValue(DE);
949         LValue BaseLValue =
950             loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
951                         OriginalBaseLValue);
952         // Store the address of the original variable associated with the LHS
953         // implicit variable.
954         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
955           return OASELValueLB.getAddress();
956         });
957         // Emit reduction copy.
958         bool IsRegistered = PrivateScope.addPrivate(
959             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
960                      OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
961               // Emit VarDecl with copy init for arrays.
962               // Get the address of the original variable captured in current
963               // captured region.
964               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
965                                                  OASELValueLB.getPointer());
966               Size = Builder.CreateNUWAdd(
967                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
968               CodeGenFunction::OpaqueValueMapping OpaqueMap(
969                   *this, cast<OpaqueValueExpr>(
970                              getContext()
971                                  .getAsVariableArrayType(PrivateVD->getType())
972                                  ->getSizeExpr()),
973                   RValue::get(Size));
974               EmitVariablyModifiedType(PrivateVD->getType());
975               auto Emission = EmitAutoVarAlloca(*PrivateVD);
976               auto Addr = Emission.getAllocatedAddress();
977               auto *Init = PrivateVD->getInit();
978               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
979                                    DRD ? *IRed : Init,
980                                    OASELValueLB.getAddress());
981               EmitAutoVarCleanups(Emission);
982               // Emit private VarDecl with reduction init.
983               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
984                                                    OASELValueLB.getPointer());
985               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
986               return castToBase(*this, OrigVD->getType(),
987                                 OASELValueLB.getType(), OriginalBaseLValue,
988                                 Ptr);
989             });
990         assert(IsRegistered && "private var already registered as private");
991         // Silence the warning about unused variable.
992         (void)IsRegistered;
993         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
994           return GetAddrOfLocalVar(PrivateVD);
995         });
996       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
997         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
998         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
999           Base = TempASE->getBase()->IgnoreParenImpCasts();
1000         auto *DE = cast<DeclRefExpr>(Base);
1001         auto *OrigVD = cast<VarDecl>(DE->getDecl());
1002         auto ASELValue = EmitLValue(ASE);
1003         auto OriginalBaseLValue = EmitLValue(DE);
1004         LValue BaseLValue = loadToBegin(
1005             *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
1006         // Store the address of the original variable associated with the LHS
1007         // implicit variable.
1008         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
1009           return ASELValue.getAddress();
1010         });
1011         // Emit reduction copy.
1012         bool IsRegistered = PrivateScope.addPrivate(
1013             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
1014                      OriginalBaseLValue, DRD, IRed]() -> Address {
1015               // Emit private VarDecl with reduction init.
1016               AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1017               auto Addr = Emission.getAllocatedAddress();
1018               if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1019                 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1020                                                  ASELValue.getAddress(),
1021                                                  ASELValue.getType());
1022               } else
1023                 EmitAutoVarInit(Emission);
1024               EmitAutoVarCleanups(Emission);
1025               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1026                                                    ASELValue.getPointer());
1027               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1028               return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
1029                                 OriginalBaseLValue, Ptr);
1030             });
1031         assert(IsRegistered && "private var already registered as private");
1032         // Silence the warning about unused variable.
1033         (void)IsRegistered;
1034         PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1035           return Builder.CreateElementBitCast(
1036               GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
1037               "rhs.begin");
1038         });
1039       } else {
1040         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
1041         QualType Type = PrivateVD->getType();
1042         if (getContext().getAsArrayType(Type)) {
1043           // Store the address of the original variable associated with the LHS
1044           // implicit variable.
1045           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1046                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
1047                           IRef->getType(), VK_LValue, IRef->getExprLoc());
1048           Address OriginalAddr = EmitLValue(&DRE).getAddress();
1049           PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
1050                                           LHSVD]() -> Address {
1051             OriginalAddr = Builder.CreateElementBitCast(
1052                 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1053             return OriginalAddr;
1054           });
1055           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
1056             if (Type->isVariablyModifiedType()) {
1057               CodeGenFunction::OpaqueValueMapping OpaqueMap(
1058                   *this, cast<OpaqueValueExpr>(
1059                              getContext()
1060                                  .getAsVariableArrayType(PrivateVD->getType())
1061                                  ->getSizeExpr()),
1062                   RValue::get(
1063                       getTypeSize(OrigVD->getType().getNonReferenceType())));
1064               EmitVariablyModifiedType(Type);
1065             }
1066             auto Emission = EmitAutoVarAlloca(*PrivateVD);
1067             auto Addr = Emission.getAllocatedAddress();
1068             auto *Init = PrivateVD->getInit();
1069             EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1070                                  DRD ? *IRed : Init, OriginalAddr);
1071             EmitAutoVarCleanups(Emission);
1072             return Emission.getAllocatedAddress();
1073           });
1074           assert(IsRegistered && "private var already registered as private");
1075           // Silence the warning about unused variable.
1076           (void)IsRegistered;
1077           PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1078             return Builder.CreateElementBitCast(
1079                 GetAddrOfLocalVar(PrivateVD),
1080                 ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
1081           });
1082         } else {
1083           // Store the address of the original variable associated with the LHS
1084           // implicit variable.
1085           Address OriginalAddr = Address::invalid();
1086           PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
1087                                           &OriginalAddr]() -> Address {
1088             DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1089                             CapturedStmtInfo->lookup(OrigVD) != nullptr,
1090                             IRef->getType(), VK_LValue, IRef->getExprLoc());
1091             OriginalAddr = EmitLValue(&DRE).getAddress();
1092             return OriginalAddr;
1093           });
1094           // Emit reduction copy.
1095           bool IsRegistered = PrivateScope.addPrivate(
1096               OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
1097                 // Emit private VarDecl with reduction init.
1098                 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1099                 auto Addr = Emission.getAllocatedAddress();
1100                 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1101                   emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1102                                                    OriginalAddr,
1103                                                    PrivateVD->getType());
1104                 } else
1105                   EmitAutoVarInit(Emission);
1106                 EmitAutoVarCleanups(Emission);
1107                 return Addr;
1108               });
1109           assert(IsRegistered && "private var already registered as private");
1110           // Silence the warning about unused variable.
1111           (void)IsRegistered;
1112           PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1113             return GetAddrOfLocalVar(PrivateVD);
1114           });
1115         }
1116       }
1117       ++ILHS;
1118       ++IRHS;
1119       ++IPriv;
1120       ++IRed;
1121     }
1122   }
1123 }
1124 
1125 void CodeGenFunction::EmitOMPReductionClauseFinal(
1126     const OMPExecutableDirective &D) {
1127   if (!HaveInsertPoint())
1128     return;
1129   llvm::SmallVector<const Expr *, 8> Privates;
1130   llvm::SmallVector<const Expr *, 8> LHSExprs;
1131   llvm::SmallVector<const Expr *, 8> RHSExprs;
1132   llvm::SmallVector<const Expr *, 8> ReductionOps;
1133   bool HasAtLeastOneReduction = false;
1134   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1135     HasAtLeastOneReduction = true;
1136     Privates.append(C->privates().begin(), C->privates().end());
1137     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1138     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1139     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1140   }
1141   if (HasAtLeastOneReduction) {
1142     // Emit nowait reduction if nowait clause is present or directive is a
1143     // parallel directive (it always has implicit barrier).
1144     CGM.getOpenMPRuntime().emitReduction(
1145         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1146         D.getSingleClause<OMPNowaitClause>() ||
1147             isOpenMPParallelDirective(D.getDirectiveKind()) ||
1148             D.getDirectiveKind() == OMPD_simd,
1149         D.getDirectiveKind() == OMPD_simd);
1150   }
1151 }
1152 
1153 static void emitPostUpdateForReductionClause(
1154     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1155     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1156   if (!CGF.HaveInsertPoint())
1157     return;
1158   llvm::BasicBlock *DoneBB = nullptr;
1159   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1160     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1161       if (!DoneBB) {
1162         if (auto *Cond = CondGen(CGF)) {
1163           // If the first post-update expression is found, emit conditional
1164           // block if it was requested.
1165           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1166           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1167           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1168           CGF.EmitBlock(ThenBB);
1169         }
1170       }
1171       CGF.EmitIgnoredExpr(PostUpdate);
1172     }
1173   }
1174   if (DoneBB)
1175     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1176 }
1177 
1178 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
1179                                            const OMPExecutableDirective &S,
1180                                            OpenMPDirectiveKind InnermostKind,
1181                                            const RegionCodeGenTy &CodeGen) {
1182   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1183   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
1184       emitParallelOrTeamsOutlinedFunction(S,
1185           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1186   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1187     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1188     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1189                                          /*IgnoreResultAssign*/ true);
1190     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1191         CGF, NumThreads, NumThreadsClause->getLocStart());
1192   }
1193   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1194     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1195     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1196         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1197   }
1198   const Expr *IfCond = nullptr;
1199   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1200     if (C->getNameModifier() == OMPD_unknown ||
1201         C->getNameModifier() == OMPD_parallel) {
1202       IfCond = C->getCondition();
1203       break;
1204     }
1205   }
1206 
1207   OMPLexicalScope Scope(CGF, S);
1208   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1209   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1210   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1211                                               CapturedVars, IfCond);
1212 }
1213 
1214 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1215   // Emit parallel region as a standalone region.
1216   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1217     OMPPrivateScope PrivateScope(CGF);
1218     bool Copyins = CGF.EmitOMPCopyinClause(S);
1219     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1220     if (Copyins) {
1221       // Emit implicit barrier to synchronize threads and avoid data races on
1222       // propagation master's thread values of threadprivate variables to local
1223       // instances of that variables of all other implicit threads.
1224       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1225           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1226           /*ForceSimpleCall=*/true);
1227     }
1228     CGF.EmitOMPPrivateClause(S, PrivateScope);
1229     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1230     (void)PrivateScope.Privatize();
1231     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1232     CGF.EmitOMPReductionClauseFinal(S);
1233   };
1234   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
1235   emitPostUpdateForReductionClause(
1236       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1237 }
1238 
1239 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1240                                       JumpDest LoopExit) {
1241   RunCleanupsScope BodyScope(*this);
1242   // Update counters values on current iteration.
1243   for (auto I : D.updates()) {
1244     EmitIgnoredExpr(I);
1245   }
1246   // Update the linear variables.
1247   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1248     for (auto *U : C->updates())
1249       EmitIgnoredExpr(U);
1250   }
1251 
1252   // On a continue in the body, jump to the end.
1253   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1254   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1255   // Emit loop body.
1256   EmitStmt(D.getBody());
1257   // The end (updates/cleanups).
1258   EmitBlock(Continue.getBlock());
1259   BreakContinueStack.pop_back();
1260 }
1261 
1262 void CodeGenFunction::EmitOMPInnerLoop(
1263     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1264     const Expr *IncExpr,
1265     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1266     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1267   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1268 
1269   // Start the loop with a block that tests the condition.
1270   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1271   EmitBlock(CondBlock);
1272   LoopStack.push(CondBlock);
1273 
1274   // If there are any cleanups between here and the loop-exit scope,
1275   // create a block to stage a loop exit along.
1276   auto ExitBlock = LoopExit.getBlock();
1277   if (RequiresCleanup)
1278     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1279 
1280   auto LoopBody = createBasicBlock("omp.inner.for.body");
1281 
1282   // Emit condition.
1283   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1284   if (ExitBlock != LoopExit.getBlock()) {
1285     EmitBlock(ExitBlock);
1286     EmitBranchThroughCleanup(LoopExit);
1287   }
1288 
1289   EmitBlock(LoopBody);
1290   incrementProfileCounter(&S);
1291 
1292   // Create a block for the increment.
1293   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1294   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1295 
1296   BodyGen(*this);
1297 
1298   // Emit "IV = IV + 1" and a back-edge to the condition block.
1299   EmitBlock(Continue.getBlock());
1300   EmitIgnoredExpr(IncExpr);
1301   PostIncGen(*this);
1302   BreakContinueStack.pop_back();
1303   EmitBranch(CondBlock);
1304   LoopStack.pop();
1305   // Emit the fall-through block.
1306   EmitBlock(LoopExit.getBlock());
1307 }
1308 
1309 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1310   if (!HaveInsertPoint())
1311     return;
1312   // Emit inits for the linear variables.
1313   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1314     for (auto *Init : C->inits()) {
1315       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1316       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1317         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1318         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1319         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1320                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1321                         VD->getInit()->getType(), VK_LValue,
1322                         VD->getInit()->getExprLoc());
1323         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1324                                                 VD->getType()),
1325                        /*capturedByInit=*/false);
1326         EmitAutoVarCleanups(Emission);
1327       } else
1328         EmitVarDecl(*VD);
1329     }
1330     // Emit the linear steps for the linear clauses.
1331     // If a step is not constant, it is pre-calculated before the loop.
1332     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1333       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1334         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1335         // Emit calculation of the linear step.
1336         EmitIgnoredExpr(CS);
1337       }
1338   }
1339 }
1340 
1341 void CodeGenFunction::EmitOMPLinearClauseFinal(
1342     const OMPLoopDirective &D,
1343     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1344   if (!HaveInsertPoint())
1345     return;
1346   llvm::BasicBlock *DoneBB = nullptr;
1347   // Emit the final values of the linear variables.
1348   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1349     auto IC = C->varlist_begin();
1350     for (auto *F : C->finals()) {
1351       if (!DoneBB) {
1352         if (auto *Cond = CondGen(*this)) {
1353           // If the first post-update expression is found, emit conditional
1354           // block if it was requested.
1355           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1356           DoneBB = createBasicBlock(".omp.linear.pu.done");
1357           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1358           EmitBlock(ThenBB);
1359         }
1360       }
1361       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1362       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1363                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1364                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1365       Address OrigAddr = EmitLValue(&DRE).getAddress();
1366       CodeGenFunction::OMPPrivateScope VarScope(*this);
1367       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1368       (void)VarScope.Privatize();
1369       EmitIgnoredExpr(F);
1370       ++IC;
1371     }
1372     if (auto *PostUpdate = C->getPostUpdateExpr())
1373       EmitIgnoredExpr(PostUpdate);
1374   }
1375   if (DoneBB)
1376     EmitBlock(DoneBB, /*IsFinished=*/true);
1377 }
1378 
1379 static void emitAlignedClause(CodeGenFunction &CGF,
1380                               const OMPExecutableDirective &D) {
1381   if (!CGF.HaveInsertPoint())
1382     return;
1383   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1384     unsigned ClauseAlignment = 0;
1385     if (auto AlignmentExpr = Clause->getAlignment()) {
1386       auto AlignmentCI =
1387           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1388       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1389     }
1390     for (auto E : Clause->varlists()) {
1391       unsigned Alignment = ClauseAlignment;
1392       if (Alignment == 0) {
1393         // OpenMP [2.8.1, Description]
1394         // If no optional parameter is specified, implementation-defined default
1395         // alignments for SIMD instructions on the target platforms are assumed.
1396         Alignment =
1397             CGF.getContext()
1398                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1399                     E->getType()->getPointeeType()))
1400                 .getQuantity();
1401       }
1402       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1403              "alignment is not power of 2");
1404       if (Alignment != 0) {
1405         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1406         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1407       }
1408     }
1409   }
1410 }
1411 
1412 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1413     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1414   if (!HaveInsertPoint())
1415     return;
1416   auto I = S.private_counters().begin();
1417   for (auto *E : S.counters()) {
1418     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1419     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1420     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1421       // Emit var without initialization.
1422       if (!LocalDeclMap.count(PrivateVD)) {
1423         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1424         EmitAutoVarCleanups(VarEmission);
1425       }
1426       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1427                       /*RefersToEnclosingVariableOrCapture=*/false,
1428                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1429       return EmitLValue(&DRE).getAddress();
1430     });
1431     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1432         VD->hasGlobalStorage()) {
1433       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1434         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1435                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1436                         E->getType(), VK_LValue, E->getExprLoc());
1437         return EmitLValue(&DRE).getAddress();
1438       });
1439     }
1440     ++I;
1441   }
1442 }
1443 
1444 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1445                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1446                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1447   if (!CGF.HaveInsertPoint())
1448     return;
1449   {
1450     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1451     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1452     (void)PreCondScope.Privatize();
1453     // Get initial values of real counters.
1454     for (auto I : S.inits()) {
1455       CGF.EmitIgnoredExpr(I);
1456     }
1457   }
1458   // Check that loop is executed at least one time.
1459   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1460 }
1461 
1462 void CodeGenFunction::EmitOMPLinearClause(
1463     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1464   if (!HaveInsertPoint())
1465     return;
1466   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1467   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1468     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1469     for (auto *C : LoopDirective->counters()) {
1470       SIMDLCVs.insert(
1471           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1472     }
1473   }
1474   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1475     auto CurPrivate = C->privates().begin();
1476     for (auto *E : C->varlists()) {
1477       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1478       auto *PrivateVD =
1479           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1480       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1481         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1482           // Emit private VarDecl with copy init.
1483           EmitVarDecl(*PrivateVD);
1484           return GetAddrOfLocalVar(PrivateVD);
1485         });
1486         assert(IsRegistered && "linear var already registered as private");
1487         // Silence the warning about unused variable.
1488         (void)IsRegistered;
1489       } else
1490         EmitVarDecl(*PrivateVD);
1491       ++CurPrivate;
1492     }
1493   }
1494 }
1495 
1496 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1497                                      const OMPExecutableDirective &D,
1498                                      bool IsMonotonic) {
1499   if (!CGF.HaveInsertPoint())
1500     return;
1501   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1502     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1503                                  /*ignoreResult=*/true);
1504     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1505     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1506     // In presence of finite 'safelen', it may be unsafe to mark all
1507     // the memory instructions parallel, because loop-carried
1508     // dependences of 'safelen' iterations are possible.
1509     if (!IsMonotonic)
1510       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1511   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1512     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1513                                  /*ignoreResult=*/true);
1514     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1515     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1516     // In presence of finite 'safelen', it may be unsafe to mark all
1517     // the memory instructions parallel, because loop-carried
1518     // dependences of 'safelen' iterations are possible.
1519     CGF.LoopStack.setParallel(false);
1520   }
1521 }
1522 
1523 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1524                                       bool IsMonotonic) {
1525   // Walk clauses and process safelen/lastprivate.
1526   LoopStack.setParallel(!IsMonotonic);
1527   LoopStack.setVectorizeEnable(true);
1528   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1529 }
1530 
1531 void CodeGenFunction::EmitOMPSimdFinal(
1532     const OMPLoopDirective &D,
1533     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1534   if (!HaveInsertPoint())
1535     return;
1536   llvm::BasicBlock *DoneBB = nullptr;
1537   auto IC = D.counters().begin();
1538   auto IPC = D.private_counters().begin();
1539   for (auto F : D.finals()) {
1540     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1541     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1542     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1543     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1544         OrigVD->hasGlobalStorage() || CED) {
1545       if (!DoneBB) {
1546         if (auto *Cond = CondGen(*this)) {
1547           // If the first post-update expression is found, emit conditional
1548           // block if it was requested.
1549           auto *ThenBB = createBasicBlock(".omp.final.then");
1550           DoneBB = createBasicBlock(".omp.final.done");
1551           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1552           EmitBlock(ThenBB);
1553         }
1554       }
1555       Address OrigAddr = Address::invalid();
1556       if (CED)
1557         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1558       else {
1559         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1560                         /*RefersToEnclosingVariableOrCapture=*/false,
1561                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1562         OrigAddr = EmitLValue(&DRE).getAddress();
1563       }
1564       OMPPrivateScope VarScope(*this);
1565       VarScope.addPrivate(OrigVD,
1566                           [OrigAddr]() -> Address { return OrigAddr; });
1567       (void)VarScope.Privatize();
1568       EmitIgnoredExpr(F);
1569     }
1570     ++IC;
1571     ++IPC;
1572   }
1573   if (DoneBB)
1574     EmitBlock(DoneBB, /*IsFinished=*/true);
1575 }
1576 
1577 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1578   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1579     OMPLoopScope PreInitScope(CGF, S);
1580     // if (PreCond) {
1581     //   for (IV in 0..LastIteration) BODY;
1582     //   <Final counter/linear vars updates>;
1583     // }
1584     //
1585 
1586     // Emit: if (PreCond) - begin.
1587     // If the condition constant folds and can be elided, avoid emitting the
1588     // whole loop.
1589     bool CondConstant;
1590     llvm::BasicBlock *ContBlock = nullptr;
1591     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1592       if (!CondConstant)
1593         return;
1594     } else {
1595       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1596       ContBlock = CGF.createBasicBlock("simd.if.end");
1597       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1598                   CGF.getProfileCount(&S));
1599       CGF.EmitBlock(ThenBlock);
1600       CGF.incrementProfileCounter(&S);
1601     }
1602 
1603     // Emit the loop iteration variable.
1604     const Expr *IVExpr = S.getIterationVariable();
1605     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1606     CGF.EmitVarDecl(*IVDecl);
1607     CGF.EmitIgnoredExpr(S.getInit());
1608 
1609     // Emit the iterations count variable.
1610     // If it is not a variable, Sema decided to calculate iterations count on
1611     // each iteration (e.g., it is foldable into a constant).
1612     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1613       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1614       // Emit calculation of the iterations count.
1615       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1616     }
1617 
1618     CGF.EmitOMPSimdInit(S);
1619 
1620     emitAlignedClause(CGF, S);
1621     CGF.EmitOMPLinearClauseInit(S);
1622     {
1623       OMPPrivateScope LoopScope(CGF);
1624       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1625       CGF.EmitOMPLinearClause(S, LoopScope);
1626       CGF.EmitOMPPrivateClause(S, LoopScope);
1627       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1628       bool HasLastprivateClause =
1629           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1630       (void)LoopScope.Privatize();
1631       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1632                            S.getInc(),
1633                            [&S](CodeGenFunction &CGF) {
1634                              CGF.EmitOMPLoopBody(S, JumpDest());
1635                              CGF.EmitStopPoint(&S);
1636                            },
1637                            [](CodeGenFunction &) {});
1638       CGF.EmitOMPSimdFinal(
1639           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1640       // Emit final copy of the lastprivate variables at the end of loops.
1641       if (HasLastprivateClause)
1642         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1643       CGF.EmitOMPReductionClauseFinal(S);
1644       emitPostUpdateForReductionClause(
1645           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1646     }
1647     CGF.EmitOMPLinearClauseFinal(
1648         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1649     // Emit: if (PreCond) - end.
1650     if (ContBlock) {
1651       CGF.EmitBranch(ContBlock);
1652       CGF.EmitBlock(ContBlock, true);
1653     }
1654   };
1655   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1656   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1657 }
1658 
1659 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
1660     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1661     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1662   auto &RT = CGM.getOpenMPRuntime();
1663 
1664   const Expr *IVExpr = S.getIterationVariable();
1665   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1666   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1667 
1668   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1669 
1670   // Start the loop with a block that tests the condition.
1671   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1672   EmitBlock(CondBlock);
1673   LoopStack.push(CondBlock);
1674 
1675   llvm::Value *BoolCondVal = nullptr;
1676   if (!DynamicOrOrdered) {
1677     // UB = min(UB, GlobalUB)
1678     EmitIgnoredExpr(S.getEnsureUpperBound());
1679     // IV = LB
1680     EmitIgnoredExpr(S.getInit());
1681     // IV < UB
1682     BoolCondVal = EvaluateExprAsBool(S.getCond());
1683   } else {
1684     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
1685                                  LB, UB, ST);
1686   }
1687 
1688   // If there are any cleanups between here and the loop-exit scope,
1689   // create a block to stage a loop exit along.
1690   auto ExitBlock = LoopExit.getBlock();
1691   if (LoopScope.requiresCleanups())
1692     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1693 
1694   auto LoopBody = createBasicBlock("omp.dispatch.body");
1695   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1696   if (ExitBlock != LoopExit.getBlock()) {
1697     EmitBlock(ExitBlock);
1698     EmitBranchThroughCleanup(LoopExit);
1699   }
1700   EmitBlock(LoopBody);
1701 
1702   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1703   // LB for loop condition and emitted it above).
1704   if (DynamicOrOrdered)
1705     EmitIgnoredExpr(S.getInit());
1706 
1707   // Create a block for the increment.
1708   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1709   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1710 
1711   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1712   // with dynamic/guided scheduling and without ordered clause.
1713   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1714     LoopStack.setParallel(!IsMonotonic);
1715   else
1716     EmitOMPSimdInit(S, IsMonotonic);
1717 
1718   SourceLocation Loc = S.getLocStart();
1719   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
1720                    [&S, LoopExit](CodeGenFunction &CGF) {
1721                      CGF.EmitOMPLoopBody(S, LoopExit);
1722                      CGF.EmitStopPoint(&S);
1723                    },
1724                    [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
1725                      if (Ordered) {
1726                        CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
1727                            CGF, Loc, IVSize, IVSigned);
1728                      }
1729                    });
1730 
1731   EmitBlock(Continue.getBlock());
1732   BreakContinueStack.pop_back();
1733   if (!DynamicOrOrdered) {
1734     // Emit "LB = LB + Stride", "UB = UB + Stride".
1735     EmitIgnoredExpr(S.getNextLowerBound());
1736     EmitIgnoredExpr(S.getNextUpperBound());
1737   }
1738 
1739   EmitBranch(CondBlock);
1740   LoopStack.pop();
1741   // Emit the fall-through block.
1742   EmitBlock(LoopExit.getBlock());
1743 
1744   // Tell the runtime we are done.
1745   if (!DynamicOrOrdered)
1746     RT.emitForStaticFinish(*this, S.getLocEnd());
1747 
1748 }
1749 
1750 void CodeGenFunction::EmitOMPForOuterLoop(
1751     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1752     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1753     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1754   auto &RT = CGM.getOpenMPRuntime();
1755 
1756   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1757   const bool DynamicOrOrdered =
1758       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1759 
1760   assert((Ordered ||
1761           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1762                                  /*Chunked=*/Chunk != nullptr)) &&
1763          "static non-chunked schedule does not need outer loop");
1764 
1765   // Emit outer loop.
1766   //
1767   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1768   // When schedule(dynamic,chunk_size) is specified, the iterations are
1769   // distributed to threads in the team in chunks as the threads request them.
1770   // Each thread executes a chunk of iterations, then requests another chunk,
1771   // until no chunks remain to be distributed. Each chunk contains chunk_size
1772   // iterations, except for the last chunk to be distributed, which may have
1773   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1774   //
1775   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1776   // to threads in the team in chunks as the executing threads request them.
1777   // Each thread executes a chunk of iterations, then requests another chunk,
1778   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1779   // each chunk is proportional to the number of unassigned iterations divided
1780   // by the number of threads in the team, decreasing to 1. For a chunk_size
1781   // with value k (greater than 1), the size of each chunk is determined in the
1782   // same way, with the restriction that the chunks do not contain fewer than k
1783   // iterations (except for the last chunk to be assigned, which may have fewer
1784   // than k iterations).
1785   //
1786   // When schedule(auto) is specified, the decision regarding scheduling is
1787   // delegated to the compiler and/or runtime system. The programmer gives the
1788   // implementation the freedom to choose any possible mapping of iterations to
1789   // threads in the team.
1790   //
1791   // When schedule(runtime) is specified, the decision regarding scheduling is
1792   // deferred until run time, and the schedule and chunk size are taken from the
1793   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1794   // implementation defined
1795   //
1796   // while(__kmpc_dispatch_next(&LB, &UB)) {
1797   //   idx = LB;
1798   //   while (idx <= UB) { BODY; ++idx;
1799   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1800   //   } // inner loop
1801   // }
1802   //
1803   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1804   // When schedule(static, chunk_size) is specified, iterations are divided into
1805   // chunks of size chunk_size, and the chunks are assigned to the threads in
1806   // the team in a round-robin fashion in the order of the thread number.
1807   //
1808   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1809   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1810   //   LB = LB + ST;
1811   //   UB = UB + ST;
1812   // }
1813   //
1814 
1815   const Expr *IVExpr = S.getIterationVariable();
1816   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1817   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1818 
1819   if (DynamicOrOrdered) {
1820     llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
1821     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1822                            IVSigned, Ordered, UBVal, Chunk);
1823   } else {
1824     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1825                          Ordered, IL, LB, UB, ST, Chunk);
1826   }
1827 
1828   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
1829                    ST, IL, Chunk);
1830 }
1831 
1832 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1833     OpenMPDistScheduleClauseKind ScheduleKind,
1834     const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
1835     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1836 
1837   auto &RT = CGM.getOpenMPRuntime();
1838 
1839   // Emit outer loop.
1840   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1841   // dynamic
1842   //
1843 
1844   const Expr *IVExpr = S.getIterationVariable();
1845   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1846   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1847 
1848   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
1849                               IVSize, IVSigned, /* Ordered = */ false,
1850                               IL, LB, UB, ST, Chunk);
1851 
1852   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
1853                    S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
1854 }
1855 
1856 /// \brief Emit a helper variable and return corresponding lvalue.
1857 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1858                                const DeclRefExpr *Helper) {
1859   auto VDecl = cast<VarDecl>(Helper->getDecl());
1860   CGF.EmitVarDecl(*VDecl);
1861   return CGF.EmitLValue(Helper);
1862 }
1863 
1864 namespace {
1865   struct ScheduleKindModifiersTy {
1866     OpenMPScheduleClauseKind Kind;
1867     OpenMPScheduleClauseModifier M1;
1868     OpenMPScheduleClauseModifier M2;
1869     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
1870                             OpenMPScheduleClauseModifier M1,
1871                             OpenMPScheduleClauseModifier M2)
1872         : Kind(Kind), M1(M1), M2(M2) {}
1873   };
1874 } // namespace
1875 
1876 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
1877   // Emit the loop iteration variable.
1878   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
1879   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
1880   EmitVarDecl(*IVDecl);
1881 
1882   // Emit the iterations count variable.
1883   // If it is not a variable, Sema decided to calculate iterations count on each
1884   // iteration (e.g., it is foldable into a constant).
1885   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1886     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1887     // Emit calculation of the iterations count.
1888     EmitIgnoredExpr(S.getCalcLastIteration());
1889   }
1890 
1891   auto &RT = CGM.getOpenMPRuntime();
1892 
1893   bool HasLastprivateClause;
1894   // Check pre-condition.
1895   {
1896     OMPLoopScope PreInitScope(*this, S);
1897     // Skip the entire loop if we don't meet the precondition.
1898     // If the condition constant folds and can be elided, avoid emitting the
1899     // whole loop.
1900     bool CondConstant;
1901     llvm::BasicBlock *ContBlock = nullptr;
1902     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1903       if (!CondConstant)
1904         return false;
1905     } else {
1906       auto *ThenBlock = createBasicBlock("omp.precond.then");
1907       ContBlock = createBasicBlock("omp.precond.end");
1908       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
1909                   getProfileCount(&S));
1910       EmitBlock(ThenBlock);
1911       incrementProfileCounter(&S);
1912     }
1913 
1914     llvm::DenseSet<const Expr *> EmittedFinals;
1915     emitAlignedClause(*this, S);
1916     EmitOMPLinearClauseInit(S);
1917     // Emit helper vars inits.
1918     LValue LB =
1919         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1920     LValue UB =
1921         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1922     LValue ST =
1923         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
1924     LValue IL =
1925         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
1926 
1927     // Emit 'then' code.
1928     {
1929       OMPPrivateScope LoopScope(*this);
1930       if (EmitOMPFirstprivateClause(S, LoopScope)) {
1931         // Emit implicit barrier to synchronize threads and avoid data races on
1932         // initialization of firstprivate variables and post-update of
1933         // lastprivate variables.
1934         CGM.getOpenMPRuntime().emitBarrierCall(
1935             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1936             /*ForceSimpleCall=*/true);
1937       }
1938       EmitOMPPrivateClause(S, LoopScope);
1939       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
1940       EmitOMPReductionClauseInit(S, LoopScope);
1941       EmitOMPPrivateLoopCounters(S, LoopScope);
1942       EmitOMPLinearClause(S, LoopScope);
1943       (void)LoopScope.Privatize();
1944 
1945       // Detect the loop schedule kind and chunk.
1946       llvm::Value *Chunk = nullptr;
1947       OpenMPScheduleTy ScheduleKind;
1948       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
1949         ScheduleKind.Schedule = C->getScheduleKind();
1950         ScheduleKind.M1 = C->getFirstScheduleModifier();
1951         ScheduleKind.M2 = C->getSecondScheduleModifier();
1952         if (const auto *Ch = C->getChunkSize()) {
1953           Chunk = EmitScalarExpr(Ch);
1954           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
1955                                        S.getIterationVariable()->getType(),
1956                                        S.getLocStart());
1957         }
1958       }
1959       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1960       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1961       const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
1962       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
1963       // If the static schedule kind is specified or if the ordered clause is
1964       // specified, and if no monotonic modifier is specified, the effect will
1965       // be as if the monotonic modifier was specified.
1966       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
1967                                 /* Chunked */ Chunk != nullptr) &&
1968           !Ordered) {
1969         if (isOpenMPSimdDirective(S.getDirectiveKind()))
1970           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
1971         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1972         // When no chunk_size is specified, the iteration space is divided into
1973         // chunks that are approximately equal in size, and at most one chunk is
1974         // distributed to each thread. Note that the size of the chunks is
1975         // unspecified in this case.
1976         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
1977                              IVSize, IVSigned, Ordered,
1978                              IL.getAddress(), LB.getAddress(),
1979                              UB.getAddress(), ST.getAddress());
1980         auto LoopExit =
1981             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
1982         // UB = min(UB, GlobalUB);
1983         EmitIgnoredExpr(S.getEnsureUpperBound());
1984         // IV = LB;
1985         EmitIgnoredExpr(S.getInit());
1986         // while (idx <= UB) { BODY; ++idx; }
1987         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1988                          S.getInc(),
1989                          [&S, LoopExit](CodeGenFunction &CGF) {
1990                            CGF.EmitOMPLoopBody(S, LoopExit);
1991                            CGF.EmitStopPoint(&S);
1992                          },
1993                          [](CodeGenFunction &) {});
1994         EmitBlock(LoopExit.getBlock());
1995         // Tell the runtime we are done.
1996         RT.emitForStaticFinish(*this, S.getLocStart());
1997       } else {
1998         const bool IsMonotonic =
1999             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2000             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2001             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2002             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2003         // Emit the outer loop, which requests its work chunk [LB..UB] from
2004         // runtime and runs the inner loop to process it.
2005         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2006                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2007                             IL.getAddress(), Chunk);
2008       }
2009       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2010         EmitOMPSimdFinal(S,
2011                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2012                            return CGF.Builder.CreateIsNotNull(
2013                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2014                          });
2015       }
2016       EmitOMPReductionClauseFinal(S);
2017       // Emit post-update of the reduction variables if IsLastIter != 0.
2018       emitPostUpdateForReductionClause(
2019           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2020             return CGF.Builder.CreateIsNotNull(
2021                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2022           });
2023       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2024       if (HasLastprivateClause)
2025         EmitOMPLastprivateClauseFinal(
2026             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2027             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2028     }
2029     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2030       return CGF.Builder.CreateIsNotNull(
2031           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2032     });
2033     // We're now done with the loop, so jump to the continuation block.
2034     if (ContBlock) {
2035       EmitBranch(ContBlock);
2036       EmitBlock(ContBlock, true);
2037     }
2038   }
2039   return HasLastprivateClause;
2040 }
2041 
2042 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2043   bool HasLastprivates = false;
2044   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2045                                           PrePostActionTy &) {
2046     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
2047   };
2048   {
2049     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2050     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2051                                                 S.hasCancel());
2052   }
2053 
2054   // Emit an implicit barrier at the end.
2055   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2056     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2057   }
2058 }
2059 
2060 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2061   bool HasLastprivates = false;
2062   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2063                                           PrePostActionTy &) {
2064     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
2065   };
2066   {
2067     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2068     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2069   }
2070 
2071   // Emit an implicit barrier at the end.
2072   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2073     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2074   }
2075 }
2076 
2077 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2078                                 const Twine &Name,
2079                                 llvm::Value *Init = nullptr) {
2080   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2081   if (Init)
2082     CGF.EmitScalarInit(Init, LVal);
2083   return LVal;
2084 }
2085 
2086 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2087   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2088   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2089   bool HasLastprivates = false;
2090   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2091                                                     PrePostActionTy &) {
2092     auto &C = CGF.CGM.getContext();
2093     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2094     // Emit helper vars inits.
2095     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2096                                   CGF.Builder.getInt32(0));
2097     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2098                                       : CGF.Builder.getInt32(0);
2099     LValue UB =
2100         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2101     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2102                                   CGF.Builder.getInt32(1));
2103     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2104                                   CGF.Builder.getInt32(0));
2105     // Loop counter.
2106     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2107     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2108     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2109     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2110     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2111     // Generate condition for loop.
2112     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2113                         OK_Ordinary, S.getLocStart(),
2114                         /*fpContractable=*/false);
2115     // Increment for loop counter.
2116     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2117                       S.getLocStart());
2118     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2119       // Iterate through all sections and emit a switch construct:
2120       // switch (IV) {
2121       //   case 0:
2122       //     <SectionStmt[0]>;
2123       //     break;
2124       // ...
2125       //   case <NumSection> - 1:
2126       //     <SectionStmt[<NumSection> - 1]>;
2127       //     break;
2128       // }
2129       // .omp.sections.exit:
2130       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2131       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2132           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2133           CS == nullptr ? 1 : CS->size());
2134       if (CS) {
2135         unsigned CaseNumber = 0;
2136         for (auto *SubStmt : CS->children()) {
2137           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2138           CGF.EmitBlock(CaseBB);
2139           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2140           CGF.EmitStmt(SubStmt);
2141           CGF.EmitBranch(ExitBB);
2142           ++CaseNumber;
2143         }
2144       } else {
2145         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2146         CGF.EmitBlock(CaseBB);
2147         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2148         CGF.EmitStmt(Stmt);
2149         CGF.EmitBranch(ExitBB);
2150       }
2151       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2152     };
2153 
2154     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2155     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2156       // Emit implicit barrier to synchronize threads and avoid data races on
2157       // initialization of firstprivate variables and post-update of lastprivate
2158       // variables.
2159       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2160           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2161           /*ForceSimpleCall=*/true);
2162     }
2163     CGF.EmitOMPPrivateClause(S, LoopScope);
2164     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2165     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2166     (void)LoopScope.Privatize();
2167 
2168     // Emit static non-chunked loop.
2169     OpenMPScheduleTy ScheduleKind;
2170     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2171     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2172         CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
2173         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
2174         UB.getAddress(), ST.getAddress());
2175     // UB = min(UB, GlobalUB);
2176     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2177     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2178         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2179     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2180     // IV = LB;
2181     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2182     // while (idx <= UB) { BODY; ++idx; }
2183     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2184                          [](CodeGenFunction &) {});
2185     // Tell the runtime we are done.
2186     CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
2187     CGF.EmitOMPReductionClauseFinal(S);
2188     // Emit post-update of the reduction variables if IsLastIter != 0.
2189     emitPostUpdateForReductionClause(
2190         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2191           return CGF.Builder.CreateIsNotNull(
2192               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2193         });
2194 
2195     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2196     if (HasLastprivates)
2197       CGF.EmitOMPLastprivateClauseFinal(
2198           S, /*NoFinals=*/false,
2199           CGF.Builder.CreateIsNotNull(
2200               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2201   };
2202 
2203   bool HasCancel = false;
2204   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2205     HasCancel = OSD->hasCancel();
2206   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2207     HasCancel = OPSD->hasCancel();
2208   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2209                                               HasCancel);
2210   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2211   // clause. Otherwise the barrier will be generated by the codegen for the
2212   // directive.
2213   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2214     // Emit implicit barrier to synchronize threads and avoid data races on
2215     // initialization of firstprivate variables.
2216     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2217                                            OMPD_unknown);
2218   }
2219 }
2220 
2221 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2222   {
2223     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2224     EmitSections(S);
2225   }
2226   // Emit an implicit barrier at the end.
2227   if (!S.getSingleClause<OMPNowaitClause>()) {
2228     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2229                                            OMPD_sections);
2230   }
2231 }
2232 
2233 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2234   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2235     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2236   };
2237   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2238   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2239                                               S.hasCancel());
2240 }
2241 
2242 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2243   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2244   llvm::SmallVector<const Expr *, 8> DestExprs;
2245   llvm::SmallVector<const Expr *, 8> SrcExprs;
2246   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2247   // Check if there are any 'copyprivate' clauses associated with this
2248   // 'single' construct.
2249   // Build a list of copyprivate variables along with helper expressions
2250   // (<source>, <destination>, <destination>=<source> expressions)
2251   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2252     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2253     DestExprs.append(C->destination_exprs().begin(),
2254                      C->destination_exprs().end());
2255     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2256     AssignmentOps.append(C->assignment_ops().begin(),
2257                          C->assignment_ops().end());
2258   }
2259   // Emit code for 'single' region along with 'copyprivate' clauses
2260   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2261     Action.Enter(CGF);
2262     OMPPrivateScope SingleScope(CGF);
2263     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2264     CGF.EmitOMPPrivateClause(S, SingleScope);
2265     (void)SingleScope.Privatize();
2266     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2267   };
2268   {
2269     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2270     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2271                                             CopyprivateVars, DestExprs,
2272                                             SrcExprs, AssignmentOps);
2273   }
2274   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2275   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2276   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2277     CGM.getOpenMPRuntime().emitBarrierCall(
2278         *this, S.getLocStart(),
2279         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2280   }
2281 }
2282 
2283 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2284   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2285     Action.Enter(CGF);
2286     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2287   };
2288   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2289   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2290 }
2291 
2292 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2293   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2294     Action.Enter(CGF);
2295     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2296   };
2297   Expr *Hint = nullptr;
2298   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2299     Hint = HintClause->getHint();
2300   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2301   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2302                                             S.getDirectiveName().getAsString(),
2303                                             CodeGen, S.getLocStart(), Hint);
2304 }
2305 
2306 void CodeGenFunction::EmitOMPParallelForDirective(
2307     const OMPParallelForDirective &S) {
2308   // Emit directive as a combined directive that consists of two implicit
2309   // directives: 'parallel' with 'for' directive.
2310   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2311     CGF.EmitOMPWorksharingLoop(S);
2312   };
2313   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
2314 }
2315 
2316 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2317     const OMPParallelForSimdDirective &S) {
2318   // Emit directive as a combined directive that consists of two implicit
2319   // directives: 'parallel' with 'for' directive.
2320   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2321     CGF.EmitOMPWorksharingLoop(S);
2322   };
2323   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
2324 }
2325 
2326 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2327     const OMPParallelSectionsDirective &S) {
2328   // Emit directive as a combined directive that consists of two implicit
2329   // directives: 'parallel' with 'sections' directive.
2330   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2331     CGF.EmitSections(S);
2332   };
2333   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
2334 }
2335 
2336 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2337                                                 const RegionCodeGenTy &BodyGen,
2338                                                 const TaskGenTy &TaskGen,
2339                                                 OMPTaskDataTy &Data) {
2340   // Emit outlined function for task construct.
2341   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2342   auto *I = CS->getCapturedDecl()->param_begin();
2343   auto *PartId = std::next(I);
2344   auto *TaskT = std::next(I, 4);
2345   // Check if the task is final
2346   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2347     // If the condition constant folds and can be elided, try to avoid emitting
2348     // the condition and the dead arm of the if/else.
2349     auto *Cond = Clause->getCondition();
2350     bool CondConstant;
2351     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2352       Data.Final.setInt(CondConstant);
2353     else
2354       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2355   } else {
2356     // By default the task is not final.
2357     Data.Final.setInt(/*IntVal=*/false);
2358   }
2359   // Check if the task has 'priority' clause.
2360   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2361     // Runtime currently does not support codegen for priority clause argument.
2362     // TODO: Add codegen for priority clause arg when runtime lib support it.
2363     auto *Prio = Clause->getPriority();
2364     Data.Priority.setInt(Prio);
2365   }
2366   // The first function argument for tasks is a thread id, the second one is a
2367   // part id (0 for tied tasks, >=0 for untied task).
2368   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2369   // Get list of private variables.
2370   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2371     auto IRef = C->varlist_begin();
2372     for (auto *IInit : C->private_copies()) {
2373       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2374       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2375         Data.PrivateVars.push_back(*IRef);
2376         Data.PrivateCopies.push_back(IInit);
2377       }
2378       ++IRef;
2379     }
2380   }
2381   EmittedAsPrivate.clear();
2382   // Get list of firstprivate variables.
2383   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2384     auto IRef = C->varlist_begin();
2385     auto IElemInitRef = C->inits().begin();
2386     for (auto *IInit : C->private_copies()) {
2387       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2388       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2389         Data.FirstprivateVars.push_back(*IRef);
2390         Data.FirstprivateCopies.push_back(IInit);
2391         Data.FirstprivateInits.push_back(*IElemInitRef);
2392       }
2393       ++IRef;
2394       ++IElemInitRef;
2395     }
2396   }
2397   // Get list of lastprivate variables (for taskloops).
2398   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2399   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2400     auto IRef = C->varlist_begin();
2401     auto ID = C->destination_exprs().begin();
2402     for (auto *IInit : C->private_copies()) {
2403       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2404       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2405         Data.LastprivateVars.push_back(*IRef);
2406         Data.LastprivateCopies.push_back(IInit);
2407       }
2408       LastprivateDstsOrigs.insert(
2409           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2410            cast<DeclRefExpr>(*IRef)});
2411       ++IRef;
2412       ++ID;
2413     }
2414   }
2415   // Build list of dependences.
2416   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2417     for (auto *IRef : C->varlists())
2418       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2419   auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs](
2420       CodeGenFunction &CGF, PrePostActionTy &Action) {
2421     // Set proper addresses for generated private copies.
2422     OMPPrivateScope Scope(CGF);
2423     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2424         !Data.LastprivateVars.empty()) {
2425       auto *CopyFn = CGF.Builder.CreateLoad(
2426           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2427       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2428           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2429       // Map privates.
2430       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2431       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2432       CallArgs.push_back(PrivatesPtr);
2433       for (auto *E : Data.PrivateVars) {
2434         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2435         Address PrivatePtr = CGF.CreateMemTemp(
2436             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2437         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2438         CallArgs.push_back(PrivatePtr.getPointer());
2439       }
2440       for (auto *E : Data.FirstprivateVars) {
2441         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2442         Address PrivatePtr =
2443             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2444                               ".firstpriv.ptr.addr");
2445         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2446         CallArgs.push_back(PrivatePtr.getPointer());
2447       }
2448       for (auto *E : Data.LastprivateVars) {
2449         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2450         Address PrivatePtr =
2451             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2452                               ".lastpriv.ptr.addr");
2453         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2454         CallArgs.push_back(PrivatePtr.getPointer());
2455       }
2456       CGF.EmitRuntimeCall(CopyFn, CallArgs);
2457       for (auto &&Pair : LastprivateDstsOrigs) {
2458         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2459         DeclRefExpr DRE(
2460             const_cast<VarDecl *>(OrigVD),
2461             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2462                 OrigVD) != nullptr,
2463             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2464         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2465           return CGF.EmitLValue(&DRE).getAddress();
2466         });
2467       }
2468       for (auto &&Pair : PrivatePtrs) {
2469         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2470                             CGF.getContext().getDeclAlign(Pair.first));
2471         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2472       }
2473     }
2474     (void)Scope.Privatize();
2475 
2476     Action.Enter(CGF);
2477     BodyGen(CGF);
2478   };
2479   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2480       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2481       Data.NumberOfParts);
2482   OMPLexicalScope Scope(*this, S);
2483   TaskGen(*this, OutlinedFn, Data);
2484 }
2485 
2486 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2487   // Emit outlined function for task construct.
2488   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2489   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2490   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2491   const Expr *IfCond = nullptr;
2492   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2493     if (C->getNameModifier() == OMPD_unknown ||
2494         C->getNameModifier() == OMPD_task) {
2495       IfCond = C->getCondition();
2496       break;
2497     }
2498   }
2499 
2500   OMPTaskDataTy Data;
2501   // Check if we should emit tied or untied task.
2502   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2503   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2504     CGF.EmitStmt(CS->getCapturedStmt());
2505   };
2506   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2507                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2508                             const OMPTaskDataTy &Data) {
2509     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2510                                             SharedsTy, CapturedStruct, IfCond,
2511                                             Data);
2512   };
2513   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2514 }
2515 
2516 void CodeGenFunction::EmitOMPTaskyieldDirective(
2517     const OMPTaskyieldDirective &S) {
2518   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2519 }
2520 
2521 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2522   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2523 }
2524 
2525 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2526   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2527 }
2528 
2529 void CodeGenFunction::EmitOMPTaskgroupDirective(
2530     const OMPTaskgroupDirective &S) {
2531   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2532     Action.Enter(CGF);
2533     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2534   };
2535   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2536   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2537 }
2538 
2539 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2540   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2541     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2542       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2543                                 FlushClause->varlist_end());
2544     }
2545     return llvm::None;
2546   }(), S.getLocStart());
2547 }
2548 
2549 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
2550   // Emit the loop iteration variable.
2551   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2552   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2553   EmitVarDecl(*IVDecl);
2554 
2555   // Emit the iterations count variable.
2556   // If it is not a variable, Sema decided to calculate iterations count on each
2557   // iteration (e.g., it is foldable into a constant).
2558   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2559     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2560     // Emit calculation of the iterations count.
2561     EmitIgnoredExpr(S.getCalcLastIteration());
2562   }
2563 
2564   auto &RT = CGM.getOpenMPRuntime();
2565 
2566   // Check pre-condition.
2567   {
2568     OMPLoopScope PreInitScope(*this, S);
2569     // Skip the entire loop if we don't meet the precondition.
2570     // If the condition constant folds and can be elided, avoid emitting the
2571     // whole loop.
2572     bool CondConstant;
2573     llvm::BasicBlock *ContBlock = nullptr;
2574     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2575       if (!CondConstant)
2576         return;
2577     } else {
2578       auto *ThenBlock = createBasicBlock("omp.precond.then");
2579       ContBlock = createBasicBlock("omp.precond.end");
2580       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2581                   getProfileCount(&S));
2582       EmitBlock(ThenBlock);
2583       incrementProfileCounter(&S);
2584     }
2585 
2586     // Emit 'then' code.
2587     {
2588       // Emit helper vars inits.
2589       LValue LB =
2590           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2591       LValue UB =
2592           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2593       LValue ST =
2594           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2595       LValue IL =
2596           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2597 
2598       OMPPrivateScope LoopScope(*this);
2599       EmitOMPPrivateLoopCounters(S, LoopScope);
2600       (void)LoopScope.Privatize();
2601 
2602       // Detect the distribute schedule kind and chunk.
2603       llvm::Value *Chunk = nullptr;
2604       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
2605       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
2606         ScheduleKind = C->getDistScheduleKind();
2607         if (const auto *Ch = C->getChunkSize()) {
2608           Chunk = EmitScalarExpr(Ch);
2609           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2610           S.getIterationVariable()->getType(),
2611           S.getLocStart());
2612         }
2613       }
2614       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2615       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2616 
2617       // OpenMP [2.10.8, distribute Construct, Description]
2618       // If dist_schedule is specified, kind must be static. If specified,
2619       // iterations are divided into chunks of size chunk_size, chunks are
2620       // assigned to the teams of the league in a round-robin fashion in the
2621       // order of the team number. When no chunk_size is specified, the
2622       // iteration space is divided into chunks that are approximately equal
2623       // in size, and at most one chunk is distributed to each team of the
2624       // league. The size of the chunks is unspecified in this case.
2625       if (RT.isStaticNonchunked(ScheduleKind,
2626                                 /* Chunked */ Chunk != nullptr)) {
2627         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
2628                              IVSize, IVSigned, /* Ordered = */ false,
2629                              IL.getAddress(), LB.getAddress(),
2630                              UB.getAddress(), ST.getAddress());
2631         auto LoopExit =
2632             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2633         // UB = min(UB, GlobalUB);
2634         EmitIgnoredExpr(S.getEnsureUpperBound());
2635         // IV = LB;
2636         EmitIgnoredExpr(S.getInit());
2637         // while (idx <= UB) { BODY; ++idx; }
2638         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2639                          S.getInc(),
2640                          [&S, LoopExit](CodeGenFunction &CGF) {
2641                            CGF.EmitOMPLoopBody(S, LoopExit);
2642                            CGF.EmitStopPoint(&S);
2643                          },
2644                          [](CodeGenFunction &) {});
2645         EmitBlock(LoopExit.getBlock());
2646         // Tell the runtime we are done.
2647         RT.emitForStaticFinish(*this, S.getLocStart());
2648       } else {
2649         // Emit the outer loop, which requests its work chunk [LB..UB] from
2650         // runtime and runs the inner loop to process it.
2651         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
2652                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2653                             IL.getAddress(), Chunk);
2654       }
2655     }
2656 
2657     // We're now done with the loop, so jump to the continuation block.
2658     if (ContBlock) {
2659       EmitBranch(ContBlock);
2660       EmitBlock(ContBlock, true);
2661     }
2662   }
2663 }
2664 
2665 void CodeGenFunction::EmitOMPDistributeDirective(
2666     const OMPDistributeDirective &S) {
2667   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2668     CGF.EmitOMPDistributeLoop(S);
2669   };
2670   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2671   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2672                                               false);
2673 }
2674 
2675 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
2676                                                    const CapturedStmt *S) {
2677   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
2678   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
2679   CGF.CapturedStmtInfo = &CapStmtInfo;
2680   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
2681   Fn->addFnAttr(llvm::Attribute::NoInline);
2682   return Fn;
2683 }
2684 
2685 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
2686   if (!S.getAssociatedStmt())
2687     return;
2688   auto *C = S.getSingleClause<OMPSIMDClause>();
2689   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
2690                                  PrePostActionTy &Action) {
2691     if (C) {
2692       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2693       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2694       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
2695       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
2696       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
2697     } else {
2698       Action.Enter(CGF);
2699       CGF.EmitStmt(
2700           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2701     }
2702   };
2703   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2704   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
2705 }
2706 
2707 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
2708                                          QualType SrcType, QualType DestType,
2709                                          SourceLocation Loc) {
2710   assert(CGF.hasScalarEvaluationKind(DestType) &&
2711          "DestType must have scalar evaluation kind.");
2712   assert(!Val.isAggregate() && "Must be a scalar or complex.");
2713   return Val.isScalar()
2714              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
2715                                         Loc)
2716              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
2717                                                  DestType, Loc);
2718 }
2719 
2720 static CodeGenFunction::ComplexPairTy
2721 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
2722                       QualType DestType, SourceLocation Loc) {
2723   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
2724          "DestType must have complex evaluation kind.");
2725   CodeGenFunction::ComplexPairTy ComplexVal;
2726   if (Val.isScalar()) {
2727     // Convert the input element to the element type of the complex.
2728     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2729     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
2730                                               DestElementType, Loc);
2731     ComplexVal = CodeGenFunction::ComplexPairTy(
2732         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
2733   } else {
2734     assert(Val.isComplex() && "Must be a scalar or complex.");
2735     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
2736     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2737     ComplexVal.first = CGF.EmitScalarConversion(
2738         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
2739     ComplexVal.second = CGF.EmitScalarConversion(
2740         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
2741   }
2742   return ComplexVal;
2743 }
2744 
2745 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
2746                                   LValue LVal, RValue RVal) {
2747   if (LVal.isGlobalReg()) {
2748     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
2749   } else {
2750     CGF.EmitAtomicStore(RVal, LVal,
2751                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2752                                  : llvm::AtomicOrdering::Monotonic,
2753                         LVal.isVolatile(), /*IsInit=*/false);
2754   }
2755 }
2756 
2757 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
2758                                          QualType RValTy, SourceLocation Loc) {
2759   switch (getEvaluationKind(LVal.getType())) {
2760   case TEK_Scalar:
2761     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
2762                                *this, RVal, RValTy, LVal.getType(), Loc)),
2763                            LVal);
2764     break;
2765   case TEK_Complex:
2766     EmitStoreOfComplex(
2767         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
2768         /*isInit=*/false);
2769     break;
2770   case TEK_Aggregate:
2771     llvm_unreachable("Must be a scalar or complex.");
2772   }
2773 }
2774 
2775 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
2776                                   const Expr *X, const Expr *V,
2777                                   SourceLocation Loc) {
2778   // v = x;
2779   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
2780   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
2781   LValue XLValue = CGF.EmitLValue(X);
2782   LValue VLValue = CGF.EmitLValue(V);
2783   RValue Res = XLValue.isGlobalReg()
2784                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
2785                    : CGF.EmitAtomicLoad(
2786                          XLValue, Loc,
2787                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2788                                   : llvm::AtomicOrdering::Monotonic,
2789                          XLValue.isVolatile());
2790   // OpenMP, 2.12.6, atomic Construct
2791   // Any atomic construct with a seq_cst clause forces the atomically
2792   // performed operation to include an implicit flush operation without a
2793   // list.
2794   if (IsSeqCst)
2795     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2796   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
2797 }
2798 
2799 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
2800                                    const Expr *X, const Expr *E,
2801                                    SourceLocation Loc) {
2802   // x = expr;
2803   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
2804   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
2805   // OpenMP, 2.12.6, atomic Construct
2806   // Any atomic construct with a seq_cst clause forces the atomically
2807   // performed operation to include an implicit flush operation without a
2808   // list.
2809   if (IsSeqCst)
2810     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2811 }
2812 
2813 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
2814                                                 RValue Update,
2815                                                 BinaryOperatorKind BO,
2816                                                 llvm::AtomicOrdering AO,
2817                                                 bool IsXLHSInRHSPart) {
2818   auto &Context = CGF.CGM.getContext();
2819   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
2820   // expression is simple and atomic is allowed for the given type for the
2821   // target platform.
2822   if (BO == BO_Comma || !Update.isScalar() ||
2823       !Update.getScalarVal()->getType()->isIntegerTy() ||
2824       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
2825                         (Update.getScalarVal()->getType() !=
2826                          X.getAddress().getElementType())) ||
2827       !X.getAddress().getElementType()->isIntegerTy() ||
2828       !Context.getTargetInfo().hasBuiltinAtomic(
2829           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
2830     return std::make_pair(false, RValue::get(nullptr));
2831 
2832   llvm::AtomicRMWInst::BinOp RMWOp;
2833   switch (BO) {
2834   case BO_Add:
2835     RMWOp = llvm::AtomicRMWInst::Add;
2836     break;
2837   case BO_Sub:
2838     if (!IsXLHSInRHSPart)
2839       return std::make_pair(false, RValue::get(nullptr));
2840     RMWOp = llvm::AtomicRMWInst::Sub;
2841     break;
2842   case BO_And:
2843     RMWOp = llvm::AtomicRMWInst::And;
2844     break;
2845   case BO_Or:
2846     RMWOp = llvm::AtomicRMWInst::Or;
2847     break;
2848   case BO_Xor:
2849     RMWOp = llvm::AtomicRMWInst::Xor;
2850     break;
2851   case BO_LT:
2852     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2853                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
2854                                    : llvm::AtomicRMWInst::Max)
2855                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
2856                                    : llvm::AtomicRMWInst::UMax);
2857     break;
2858   case BO_GT:
2859     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2860                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
2861                                    : llvm::AtomicRMWInst::Min)
2862                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
2863                                    : llvm::AtomicRMWInst::UMin);
2864     break;
2865   case BO_Assign:
2866     RMWOp = llvm::AtomicRMWInst::Xchg;
2867     break;
2868   case BO_Mul:
2869   case BO_Div:
2870   case BO_Rem:
2871   case BO_Shl:
2872   case BO_Shr:
2873   case BO_LAnd:
2874   case BO_LOr:
2875     return std::make_pair(false, RValue::get(nullptr));
2876   case BO_PtrMemD:
2877   case BO_PtrMemI:
2878   case BO_LE:
2879   case BO_GE:
2880   case BO_EQ:
2881   case BO_NE:
2882   case BO_AddAssign:
2883   case BO_SubAssign:
2884   case BO_AndAssign:
2885   case BO_OrAssign:
2886   case BO_XorAssign:
2887   case BO_MulAssign:
2888   case BO_DivAssign:
2889   case BO_RemAssign:
2890   case BO_ShlAssign:
2891   case BO_ShrAssign:
2892   case BO_Comma:
2893     llvm_unreachable("Unsupported atomic update operation");
2894   }
2895   auto *UpdateVal = Update.getScalarVal();
2896   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
2897     UpdateVal = CGF.Builder.CreateIntCast(
2898         IC, X.getAddress().getElementType(),
2899         X.getType()->hasSignedIntegerRepresentation());
2900   }
2901   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
2902   return std::make_pair(true, RValue::get(Res));
2903 }
2904 
2905 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
2906     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
2907     llvm::AtomicOrdering AO, SourceLocation Loc,
2908     const llvm::function_ref<RValue(RValue)> &CommonGen) {
2909   // Update expressions are allowed to have the following forms:
2910   // x binop= expr; -> xrval + expr;
2911   // x++, ++x -> xrval + 1;
2912   // x--, --x -> xrval - 1;
2913   // x = x binop expr; -> xrval binop expr
2914   // x = expr Op x; - > expr binop xrval;
2915   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
2916   if (!Res.first) {
2917     if (X.isGlobalReg()) {
2918       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
2919       // 'xrval'.
2920       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
2921     } else {
2922       // Perform compare-and-swap procedure.
2923       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
2924     }
2925   }
2926   return Res;
2927 }
2928 
2929 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
2930                                     const Expr *X, const Expr *E,
2931                                     const Expr *UE, bool IsXLHSInRHSPart,
2932                                     SourceLocation Loc) {
2933   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2934          "Update expr in 'atomic update' must be a binary operator.");
2935   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2936   // Update expressions are allowed to have the following forms:
2937   // x binop= expr; -> xrval + expr;
2938   // x++, ++x -> xrval + 1;
2939   // x--, --x -> xrval - 1;
2940   // x = x binop expr; -> xrval binop expr
2941   // x = expr Op x; - > expr binop xrval;
2942   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
2943   LValue XLValue = CGF.EmitLValue(X);
2944   RValue ExprRValue = CGF.EmitAnyExpr(E);
2945   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2946                      : llvm::AtomicOrdering::Monotonic;
2947   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2948   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2949   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2950   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2951   auto Gen =
2952       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
2953         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2954         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2955         return CGF.EmitAnyExpr(UE);
2956       };
2957   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
2958       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2959   // OpenMP, 2.12.6, atomic Construct
2960   // Any atomic construct with a seq_cst clause forces the atomically
2961   // performed operation to include an implicit flush operation without a
2962   // list.
2963   if (IsSeqCst)
2964     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2965 }
2966 
2967 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
2968                             QualType SourceType, QualType ResType,
2969                             SourceLocation Loc) {
2970   switch (CGF.getEvaluationKind(ResType)) {
2971   case TEK_Scalar:
2972     return RValue::get(
2973         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
2974   case TEK_Complex: {
2975     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
2976     return RValue::getComplex(Res.first, Res.second);
2977   }
2978   case TEK_Aggregate:
2979     break;
2980   }
2981   llvm_unreachable("Must be a scalar or complex.");
2982 }
2983 
2984 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
2985                                      bool IsPostfixUpdate, const Expr *V,
2986                                      const Expr *X, const Expr *E,
2987                                      const Expr *UE, bool IsXLHSInRHSPart,
2988                                      SourceLocation Loc) {
2989   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
2990   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
2991   RValue NewVVal;
2992   LValue VLValue = CGF.EmitLValue(V);
2993   LValue XLValue = CGF.EmitLValue(X);
2994   RValue ExprRValue = CGF.EmitAnyExpr(E);
2995   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2996                      : llvm::AtomicOrdering::Monotonic;
2997   QualType NewVValType;
2998   if (UE) {
2999     // 'x' is updated with some additional value.
3000     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3001            "Update expr in 'atomic capture' must be a binary operator.");
3002     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3003     // Update expressions are allowed to have the following forms:
3004     // x binop= expr; -> xrval + expr;
3005     // x++, ++x -> xrval + 1;
3006     // x--, --x -> xrval - 1;
3007     // x = x binop expr; -> xrval binop expr
3008     // x = expr Op x; - > expr binop xrval;
3009     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3010     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3011     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3012     NewVValType = XRValExpr->getType();
3013     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3014     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3015                   IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
3016       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3017       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3018       RValue Res = CGF.EmitAnyExpr(UE);
3019       NewVVal = IsPostfixUpdate ? XRValue : Res;
3020       return Res;
3021     };
3022     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3023         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3024     if (Res.first) {
3025       // 'atomicrmw' instruction was generated.
3026       if (IsPostfixUpdate) {
3027         // Use old value from 'atomicrmw'.
3028         NewVVal = Res.second;
3029       } else {
3030         // 'atomicrmw' does not provide new value, so evaluate it using old
3031         // value of 'x'.
3032         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3033         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3034         NewVVal = CGF.EmitAnyExpr(UE);
3035       }
3036     }
3037   } else {
3038     // 'x' is simply rewritten with some 'expr'.
3039     NewVValType = X->getType().getNonReferenceType();
3040     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3041                                X->getType().getNonReferenceType(), Loc);
3042     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
3043       NewVVal = XRValue;
3044       return ExprRValue;
3045     };
3046     // Try to perform atomicrmw xchg, otherwise simple exchange.
3047     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3048         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3049         Loc, Gen);
3050     if (Res.first) {
3051       // 'atomicrmw' instruction was generated.
3052       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3053     }
3054   }
3055   // Emit post-update store to 'v' of old/new 'x' value.
3056   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3057   // OpenMP, 2.12.6, atomic Construct
3058   // Any atomic construct with a seq_cst clause forces the atomically
3059   // performed operation to include an implicit flush operation without a
3060   // list.
3061   if (IsSeqCst)
3062     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3063 }
3064 
3065 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3066                               bool IsSeqCst, bool IsPostfixUpdate,
3067                               const Expr *X, const Expr *V, const Expr *E,
3068                               const Expr *UE, bool IsXLHSInRHSPart,
3069                               SourceLocation Loc) {
3070   switch (Kind) {
3071   case OMPC_read:
3072     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3073     break;
3074   case OMPC_write:
3075     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3076     break;
3077   case OMPC_unknown:
3078   case OMPC_update:
3079     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3080     break;
3081   case OMPC_capture:
3082     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3083                              IsXLHSInRHSPart, Loc);
3084     break;
3085   case OMPC_if:
3086   case OMPC_final:
3087   case OMPC_num_threads:
3088   case OMPC_private:
3089   case OMPC_firstprivate:
3090   case OMPC_lastprivate:
3091   case OMPC_reduction:
3092   case OMPC_safelen:
3093   case OMPC_simdlen:
3094   case OMPC_collapse:
3095   case OMPC_default:
3096   case OMPC_seq_cst:
3097   case OMPC_shared:
3098   case OMPC_linear:
3099   case OMPC_aligned:
3100   case OMPC_copyin:
3101   case OMPC_copyprivate:
3102   case OMPC_flush:
3103   case OMPC_proc_bind:
3104   case OMPC_schedule:
3105   case OMPC_ordered:
3106   case OMPC_nowait:
3107   case OMPC_untied:
3108   case OMPC_threadprivate:
3109   case OMPC_depend:
3110   case OMPC_mergeable:
3111   case OMPC_device:
3112   case OMPC_threads:
3113   case OMPC_simd:
3114   case OMPC_map:
3115   case OMPC_num_teams:
3116   case OMPC_thread_limit:
3117   case OMPC_priority:
3118   case OMPC_grainsize:
3119   case OMPC_nogroup:
3120   case OMPC_num_tasks:
3121   case OMPC_hint:
3122   case OMPC_dist_schedule:
3123   case OMPC_defaultmap:
3124   case OMPC_uniform:
3125     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3126   }
3127 }
3128 
3129 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3130   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3131   OpenMPClauseKind Kind = OMPC_unknown;
3132   for (auto *C : S.clauses()) {
3133     // Find first clause (skip seq_cst clause, if it is first).
3134     if (C->getClauseKind() != OMPC_seq_cst) {
3135       Kind = C->getClauseKind();
3136       break;
3137     }
3138   }
3139 
3140   const auto *CS =
3141       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3142   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3143     enterFullExpression(EWC);
3144   }
3145   // Processing for statements under 'atomic capture'.
3146   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3147     for (const auto *C : Compound->body()) {
3148       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3149         enterFullExpression(EWC);
3150       }
3151     }
3152   }
3153 
3154   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3155                                             PrePostActionTy &) {
3156     CGF.EmitStopPoint(CS);
3157     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3158                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3159                       S.isXLHSInRHSPart(), S.getLocStart());
3160   };
3161   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3162   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3163 }
3164 
3165 std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
3166 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
3167     CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
3168     bool IsOffloadEntry) {
3169   llvm::Function *OutlinedFn = nullptr;
3170   llvm::Constant *OutlinedFnID = nullptr;
3171   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3172     OMPPrivateScope PrivateScope(CGF);
3173     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3174     CGF.EmitOMPPrivateClause(S, PrivateScope);
3175     (void)PrivateScope.Privatize();
3176 
3177     Action.Enter(CGF);
3178     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3179   };
3180   // Emit target region as a standalone region.
3181   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3182       S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
3183   return std::make_pair(OutlinedFn, OutlinedFnID);
3184 }
3185 
3186 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3187   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3188 
3189   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3190   GenerateOpenMPCapturedVars(CS, CapturedVars);
3191 
3192   llvm::Function *Fn = nullptr;
3193   llvm::Constant *FnID = nullptr;
3194 
3195   // Check if we have any if clause associated with the directive.
3196   const Expr *IfCond = nullptr;
3197 
3198   if (auto *C = S.getSingleClause<OMPIfClause>()) {
3199     IfCond = C->getCondition();
3200   }
3201 
3202   // Check if we have any device clause associated with the directive.
3203   const Expr *Device = nullptr;
3204   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3205     Device = C->getDevice();
3206   }
3207 
3208   // Check if we have an if clause whose conditional always evaluates to false
3209   // or if we do not have any targets specified. If so the target region is not
3210   // an offload entry point.
3211   bool IsOffloadEntry = true;
3212   if (IfCond) {
3213     bool Val;
3214     if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3215       IsOffloadEntry = false;
3216   }
3217   if (CGM.getLangOpts().OMPTargetTriples.empty())
3218     IsOffloadEntry = false;
3219 
3220   assert(CurFuncDecl && "No parent declaration for target region!");
3221   StringRef ParentName;
3222   // In case we have Ctors/Dtors we use the complete type variant to produce
3223   // the mangling of the device outlined kernel.
3224   if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
3225     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3226   else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
3227     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3228   else
3229     ParentName =
3230         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
3231 
3232   std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
3233       CGM, S, ParentName, IsOffloadEntry);
3234   OMPLexicalScope Scope(*this, S);
3235   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
3236                                         CapturedVars);
3237 }
3238 
3239 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3240                                         const OMPExecutableDirective &S,
3241                                         OpenMPDirectiveKind InnermostKind,
3242                                         const RegionCodeGenTy &CodeGen) {
3243   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3244   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
3245       emitParallelOrTeamsOutlinedFunction(S,
3246           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3247 
3248   const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
3249   const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
3250   const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
3251   if (NT || TL) {
3252     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3253     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3254 
3255     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3256                                                   S.getLocStart());
3257   }
3258 
3259   OMPLexicalScope Scope(CGF, S);
3260   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3261   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3262   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3263                                            CapturedVars);
3264 }
3265 
3266 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3267   // Emit parallel region as a standalone region.
3268   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3269     OMPPrivateScope PrivateScope(CGF);
3270     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3271     CGF.EmitOMPPrivateClause(S, PrivateScope);
3272     (void)PrivateScope.Privatize();
3273     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3274   };
3275   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3276 }
3277 
3278 void CodeGenFunction::EmitOMPCancellationPointDirective(
3279     const OMPCancellationPointDirective &S) {
3280   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3281                                                    S.getCancelRegion());
3282 }
3283 
3284 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3285   const Expr *IfCond = nullptr;
3286   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3287     if (C->getNameModifier() == OMPD_unknown ||
3288         C->getNameModifier() == OMPD_cancel) {
3289       IfCond = C->getCondition();
3290       break;
3291     }
3292   }
3293   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3294                                         S.getCancelRegion());
3295 }
3296 
3297 CodeGenFunction::JumpDest
3298 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3299   if (Kind == OMPD_parallel || Kind == OMPD_task)
3300     return ReturnBlock;
3301   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3302          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
3303   return BreakContinueStack.back().BreakBlock;
3304 }
3305 
3306 // Generate the instructions for '#pragma omp target data' directive.
3307 void CodeGenFunction::EmitOMPTargetDataDirective(
3308     const OMPTargetDataDirective &S) {
3309   // The target data enclosed region is implemented just by emitting the
3310   // statement.
3311   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3312     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3313   };
3314 
3315   // If we don't have target devices, don't bother emitting the data mapping
3316   // code.
3317   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
3318     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3319 
3320     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data,
3321                                                 CodeGen);
3322     return;
3323   }
3324 
3325   // Check if we have any if clause associated with the directive.
3326   const Expr *IfCond = nullptr;
3327   if (auto *C = S.getSingleClause<OMPIfClause>())
3328     IfCond = C->getCondition();
3329 
3330   // Check if we have any device clause associated with the directive.
3331   const Expr *Device = nullptr;
3332   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3333     Device = C->getDevice();
3334 
3335   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen);
3336 }
3337 
3338 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
3339     const OMPTargetEnterDataDirective &S) {
3340   // If we don't have target devices, don't bother emitting the data mapping
3341   // code.
3342   if (CGM.getLangOpts().OMPTargetTriples.empty())
3343     return;
3344 
3345   // Check if we have any if clause associated with the directive.
3346   const Expr *IfCond = nullptr;
3347   if (auto *C = S.getSingleClause<OMPIfClause>())
3348     IfCond = C->getCondition();
3349 
3350   // Check if we have any device clause associated with the directive.
3351   const Expr *Device = nullptr;
3352   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3353     Device = C->getDevice();
3354 
3355   CGM.getOpenMPRuntime().emitTargetEnterOrExitDataCall(*this, S, IfCond,
3356                                                        Device);
3357 }
3358 
3359 void CodeGenFunction::EmitOMPTargetExitDataDirective(
3360     const OMPTargetExitDataDirective &S) {
3361   // If we don't have target devices, don't bother emitting the data mapping
3362   // code.
3363   if (CGM.getLangOpts().OMPTargetTriples.empty())
3364     return;
3365 
3366   // Check if we have any if clause associated with the directive.
3367   const Expr *IfCond = nullptr;
3368   if (auto *C = S.getSingleClause<OMPIfClause>())
3369     IfCond = C->getCondition();
3370 
3371   // Check if we have any device clause associated with the directive.
3372   const Expr *Device = nullptr;
3373   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3374     Device = C->getDevice();
3375 
3376   CGM.getOpenMPRuntime().emitTargetEnterOrExitDataCall(*this, S, IfCond,
3377                                                        Device);
3378 }
3379 
3380 void CodeGenFunction::EmitOMPTargetParallelDirective(
3381     const OMPTargetParallelDirective &S) {
3382   // TODO: codegen for target parallel.
3383 }
3384 
3385 void CodeGenFunction::EmitOMPTargetParallelForDirective(
3386     const OMPTargetParallelForDirective &S) {
3387   // TODO: codegen for target parallel for.
3388 }
3389 
3390 /// Emit a helper variable and return corresponding lvalue.
3391 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
3392                      const ImplicitParamDecl *PVD,
3393                      CodeGenFunction::OMPPrivateScope &Privates) {
3394   auto *VDecl = cast<VarDecl>(Helper->getDecl());
3395   Privates.addPrivate(
3396       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
3397 }
3398 
3399 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
3400   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
3401   // Emit outlined function for task construct.
3402   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3403   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3404   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3405   const Expr *IfCond = nullptr;
3406   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3407     if (C->getNameModifier() == OMPD_unknown ||
3408         C->getNameModifier() == OMPD_taskloop) {
3409       IfCond = C->getCondition();
3410       break;
3411     }
3412   }
3413 
3414   OMPTaskDataTy Data;
3415   // Check if taskloop must be emitted without taskgroup.
3416   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
3417   // TODO: Check if we should emit tied or untied task.
3418   Data.Tied = true;
3419   // Set scheduling for taskloop
3420   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
3421     // grainsize clause
3422     Data.Schedule.setInt(/*IntVal=*/false);
3423     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
3424   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
3425     // num_tasks clause
3426     Data.Schedule.setInt(/*IntVal=*/true);
3427     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
3428   }
3429 
3430   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
3431     // if (PreCond) {
3432     //   for (IV in 0..LastIteration) BODY;
3433     //   <Final counter/linear vars updates>;
3434     // }
3435     //
3436 
3437     // Emit: if (PreCond) - begin.
3438     // If the condition constant folds and can be elided, avoid emitting the
3439     // whole loop.
3440     bool CondConstant;
3441     llvm::BasicBlock *ContBlock = nullptr;
3442     OMPLoopScope PreInitScope(CGF, S);
3443     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3444       if (!CondConstant)
3445         return;
3446     } else {
3447       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
3448       ContBlock = CGF.createBasicBlock("taskloop.if.end");
3449       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
3450                   CGF.getProfileCount(&S));
3451       CGF.EmitBlock(ThenBlock);
3452       CGF.incrementProfileCounter(&S);
3453     }
3454 
3455     if (isOpenMPSimdDirective(S.getDirectiveKind()))
3456       CGF.EmitOMPSimdInit(S);
3457 
3458     OMPPrivateScope LoopScope(CGF);
3459     // Emit helper vars inits.
3460     enum { LowerBound = 5, UpperBound, Stride, LastIter };
3461     auto *I = CS->getCapturedDecl()->param_begin();
3462     auto *LBP = std::next(I, LowerBound);
3463     auto *UBP = std::next(I, UpperBound);
3464     auto *STP = std::next(I, Stride);
3465     auto *LIP = std::next(I, LastIter);
3466     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
3467              LoopScope);
3468     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
3469              LoopScope);
3470     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
3471     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
3472              LoopScope);
3473     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
3474     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3475     (void)LoopScope.Privatize();
3476     // Emit the loop iteration variable.
3477     const Expr *IVExpr = S.getIterationVariable();
3478     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
3479     CGF.EmitVarDecl(*IVDecl);
3480     CGF.EmitIgnoredExpr(S.getInit());
3481 
3482     // Emit the iterations count variable.
3483     // If it is not a variable, Sema decided to calculate iterations count on
3484     // each iteration (e.g., it is foldable into a constant).
3485     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3486       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3487       // Emit calculation of the iterations count.
3488       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
3489     }
3490 
3491     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
3492                          S.getInc(),
3493                          [&S](CodeGenFunction &CGF) {
3494                            CGF.EmitOMPLoopBody(S, JumpDest());
3495                            CGF.EmitStopPoint(&S);
3496                          },
3497                          [](CodeGenFunction &) {});
3498     // Emit: if (PreCond) - end.
3499     if (ContBlock) {
3500       CGF.EmitBranch(ContBlock);
3501       CGF.EmitBlock(ContBlock, true);
3502     }
3503     // Emit final copy of the lastprivate variables if IsLastIter != 0.
3504     if (HasLastprivateClause) {
3505       CGF.EmitOMPLastprivateClauseFinal(
3506           S, isOpenMPSimdDirective(S.getDirectiveKind()),
3507           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
3508               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
3509               (*LIP)->getType(), S.getLocStart())));
3510     }
3511   };
3512   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3513                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3514                             const OMPTaskDataTy &Data) {
3515     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
3516       OMPLoopScope PreInitScope(CGF, S);
3517       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
3518                                                   OutlinedFn, SharedsTy,
3519                                                   CapturedStruct, IfCond, Data);
3520     };
3521     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
3522                                                     CodeGen);
3523   };
3524   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
3525 }
3526 
3527 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
3528   EmitOMPTaskLoopBasedDirective(S);
3529 }
3530 
3531 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
3532     const OMPTaskLoopSimdDirective &S) {
3533   EmitOMPTaskLoopBasedDirective(S);
3534 }
3535