1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     emitPreInitStmt(CGF, S);
61     if (AsInlined) {
62       if (S.hasAssociatedStmt()) {
63         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
64         for (auto &C : CS->captures()) {
65           if (C.capturesVariable() || C.capturesVariableByCopy()) {
66             auto *VD = C.getCapturedVar();
67             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
68                             isCapturedVar(CGF, VD) ||
69                                 (CGF.CapturedStmtInfo &&
70                                  InlinedShareds.isGlobalVarCaptured(VD)),
71                             VD->getType().getNonReferenceType(), VK_LValue,
72                             SourceLocation());
73             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
74               return CGF.EmitLValue(&DRE).getAddress();
75             });
76           }
77         }
78         (void)InlinedShareds.Privatize();
79       }
80     }
81   }
82 };
83 
84 /// Private scope for OpenMP loop-based directives, that supports capturing
85 /// of used expression from loop statement.
86 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
87   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
88     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
89       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
90         for (const auto *I : PreInits->decls())
91           CGF.EmitVarDecl(cast<VarDecl>(*I));
92       }
93     }
94   }
95 
96 public:
97   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
98       : CodeGenFunction::RunCleanupsScope(CGF) {
99     emitPreInitStmt(CGF, S);
100   }
101 };
102 
103 } // namespace
104 
105 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
106   auto &C = getContext();
107   llvm::Value *Size = nullptr;
108   auto SizeInChars = C.getTypeSizeInChars(Ty);
109   if (SizeInChars.isZero()) {
110     // getTypeSizeInChars() returns 0 for a VLA.
111     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
112       llvm::Value *ArraySize;
113       std::tie(ArraySize, Ty) = getVLASize(VAT);
114       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
115     }
116     SizeInChars = C.getTypeSizeInChars(Ty);
117     if (SizeInChars.isZero())
118       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
119     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
120   } else
121     Size = CGM.getSize(SizeInChars);
122   return Size;
123 }
124 
125 void CodeGenFunction::GenerateOpenMPCapturedVars(
126     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
127   const RecordDecl *RD = S.getCapturedRecordDecl();
128   auto CurField = RD->field_begin();
129   auto CurCap = S.captures().begin();
130   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
131                                                  E = S.capture_init_end();
132        I != E; ++I, ++CurField, ++CurCap) {
133     if (CurField->hasCapturedVLAType()) {
134       auto VAT = CurField->getCapturedVLAType();
135       auto *Val = VLASizeMap[VAT->getSizeExpr()];
136       CapturedVars.push_back(Val);
137     } else if (CurCap->capturesThis())
138       CapturedVars.push_back(CXXThisValue);
139     else if (CurCap->capturesVariableByCopy()) {
140       llvm::Value *CV =
141           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
142 
143       // If the field is not a pointer, we need to save the actual value
144       // and load it as a void pointer.
145       if (!CurField->getType()->isAnyPointerType()) {
146         auto &Ctx = getContext();
147         auto DstAddr = CreateMemTemp(
148             Ctx.getUIntPtrType(),
149             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
150         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
151 
152         auto *SrcAddrVal = EmitScalarConversion(
153             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
154             Ctx.getPointerType(CurField->getType()), SourceLocation());
155         LValue SrcLV =
156             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
157 
158         // Store the value using the source type pointer.
159         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
160 
161         // Load the value using the destination type pointer.
162         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
163       }
164       CapturedVars.push_back(CV);
165     } else {
166       assert(CurCap->capturesVariable() && "Expected capture by reference.");
167       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
168     }
169   }
170 }
171 
172 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
173                                     StringRef Name, LValue AddrLV,
174                                     bool isReferenceType = false) {
175   ASTContext &Ctx = CGF.getContext();
176 
177   auto *CastedPtr = CGF.EmitScalarConversion(
178       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
179       Ctx.getPointerType(DstType), SourceLocation());
180   auto TmpAddr =
181       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
182           .getAddress();
183 
184   // If we are dealing with references we need to return the address of the
185   // reference instead of the reference of the value.
186   if (isReferenceType) {
187     QualType RefType = Ctx.getLValueReferenceType(DstType);
188     auto *RefVal = TmpAddr.getPointer();
189     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
190     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
191     CGF.EmitScalarInit(RefVal, TmpLVal);
192   }
193 
194   return TmpAddr;
195 }
196 
197 llvm::Function *
198 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
199   assert(
200       CapturedStmtInfo &&
201       "CapturedStmtInfo should be set when generating the captured function");
202   const CapturedDecl *CD = S.getCapturedDecl();
203   const RecordDecl *RD = S.getCapturedRecordDecl();
204   assert(CD->hasBody() && "missing CapturedDecl body");
205 
206   // Build the argument list.
207   ASTContext &Ctx = CGM.getContext();
208   FunctionArgList Args;
209   Args.append(CD->param_begin(),
210               std::next(CD->param_begin(), CD->getContextParamPosition()));
211   auto I = S.captures().begin();
212   for (auto *FD : RD->fields()) {
213     QualType ArgType = FD->getType();
214     IdentifierInfo *II = nullptr;
215     VarDecl *CapVar = nullptr;
216 
217     // If this is a capture by copy and the type is not a pointer, the outlined
218     // function argument type should be uintptr and the value properly casted to
219     // uintptr. This is necessary given that the runtime library is only able to
220     // deal with pointers. We can pass in the same way the VLA type sizes to the
221     // outlined function.
222     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
223         I->capturesVariableArrayType())
224       ArgType = Ctx.getUIntPtrType();
225 
226     if (I->capturesVariable() || I->capturesVariableByCopy()) {
227       CapVar = I->getCapturedVar();
228       II = CapVar->getIdentifier();
229     } else if (I->capturesThis())
230       II = &getContext().Idents.get("this");
231     else {
232       assert(I->capturesVariableArrayType());
233       II = &getContext().Idents.get("vla");
234     }
235     if (ArgType->isVariablyModifiedType())
236       ArgType = getContext().getVariableArrayDecayedType(ArgType);
237     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
238                                              FD->getLocation(), II, ArgType));
239     ++I;
240   }
241   Args.append(
242       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
243       CD->param_end());
244 
245   // Create the function declaration.
246   FunctionType::ExtInfo ExtInfo;
247   const CGFunctionInfo &FuncInfo =
248       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
249   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
250 
251   llvm::Function *F = llvm::Function::Create(
252       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
253       CapturedStmtInfo->getHelperName(), &CGM.getModule());
254   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
255   if (CD->isNothrow())
256     F->addFnAttr(llvm::Attribute::NoUnwind);
257 
258   // Generate the function.
259   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
260                 CD->getBody()->getLocStart());
261   unsigned Cnt = CD->getContextParamPosition();
262   I = S.captures().begin();
263   for (auto *FD : RD->fields()) {
264     // If we are capturing a pointer by copy we don't need to do anything, just
265     // use the value that we get from the arguments.
266     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
267       const VarDecl *CurVD = I->getCapturedVar();
268       Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]);
269       // If the variable is a reference we need to materialize it here.
270       if (CurVD->getType()->isReferenceType()) {
271         Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(),
272                                         ".materialized_ref");
273         EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false,
274                           CurVD->getType());
275         LocalAddr = RefAddr;
276       }
277       setAddrOfLocalVar(CurVD, LocalAddr);
278       ++Cnt;
279       ++I;
280       continue;
281     }
282 
283     LValue ArgLVal =
284         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
285                        AlignmentSource::Decl);
286     if (FD->hasCapturedVLAType()) {
287       LValue CastedArgLVal =
288           MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
289                                               Args[Cnt]->getName(), ArgLVal),
290                          FD->getType(), AlignmentSource::Decl);
291       auto *ExprArg =
292           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
293       auto VAT = FD->getCapturedVLAType();
294       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
295     } else if (I->capturesVariable()) {
296       auto *Var = I->getCapturedVar();
297       QualType VarTy = Var->getType();
298       Address ArgAddr = ArgLVal.getAddress();
299       if (!VarTy->isReferenceType()) {
300         ArgAddr = EmitLoadOfReference(
301             ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
302       }
303       setAddrOfLocalVar(
304           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
305     } else if (I->capturesVariableByCopy()) {
306       assert(!FD->getType()->isAnyPointerType() &&
307              "Not expecting a captured pointer.");
308       auto *Var = I->getCapturedVar();
309       QualType VarTy = Var->getType();
310       setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(),
311                                                   Args[Cnt]->getName(), ArgLVal,
312                                                   VarTy->isReferenceType()));
313     } else {
314       // If 'this' is captured, load it into CXXThisValue.
315       assert(I->capturesThis());
316       CXXThisValue =
317           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
318     }
319     ++Cnt;
320     ++I;
321   }
322 
323   PGO.assignRegionCounters(GlobalDecl(CD), F);
324   CapturedStmtInfo->EmitBody(*this, CD->getBody());
325   FinishFunction(CD->getBodyRBrace());
326 
327   return F;
328 }
329 
330 //===----------------------------------------------------------------------===//
331 //                              OpenMP Directive Emission
332 //===----------------------------------------------------------------------===//
333 void CodeGenFunction::EmitOMPAggregateAssign(
334     Address DestAddr, Address SrcAddr, QualType OriginalType,
335     const llvm::function_ref<void(Address, Address)> &CopyGen) {
336   // Perform element-by-element initialization.
337   QualType ElementTy;
338 
339   // Drill down to the base element type on both arrays.
340   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
341   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
342   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
343 
344   auto SrcBegin = SrcAddr.getPointer();
345   auto DestBegin = DestAddr.getPointer();
346   // Cast from pointer to array type to pointer to single element.
347   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
348   // The basic structure here is a while-do loop.
349   auto BodyBB = createBasicBlock("omp.arraycpy.body");
350   auto DoneBB = createBasicBlock("omp.arraycpy.done");
351   auto IsEmpty =
352       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
353   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
354 
355   // Enter the loop body, making that address the current address.
356   auto EntryBB = Builder.GetInsertBlock();
357   EmitBlock(BodyBB);
358 
359   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
360 
361   llvm::PHINode *SrcElementPHI =
362     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
363   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
364   Address SrcElementCurrent =
365       Address(SrcElementPHI,
366               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
367 
368   llvm::PHINode *DestElementPHI =
369     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
370   DestElementPHI->addIncoming(DestBegin, EntryBB);
371   Address DestElementCurrent =
372     Address(DestElementPHI,
373             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
374 
375   // Emit copy.
376   CopyGen(DestElementCurrent, SrcElementCurrent);
377 
378   // Shift the address forward by one element.
379   auto DestElementNext = Builder.CreateConstGEP1_32(
380       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
381   auto SrcElementNext = Builder.CreateConstGEP1_32(
382       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
383   // Check whether we've reached the end.
384   auto Done =
385       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
386   Builder.CreateCondBr(Done, DoneBB, BodyBB);
387   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
388   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
389 
390   // Done.
391   EmitBlock(DoneBB, /*IsFinished=*/true);
392 }
393 
394 /// Check if the combiner is a call to UDR combiner and if it is so return the
395 /// UDR decl used for reduction.
396 static const OMPDeclareReductionDecl *
397 getReductionInit(const Expr *ReductionOp) {
398   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
399     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
400       if (auto *DRE =
401               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
402         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
403           return DRD;
404   return nullptr;
405 }
406 
407 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
408                                              const OMPDeclareReductionDecl *DRD,
409                                              const Expr *InitOp,
410                                              Address Private, Address Original,
411                                              QualType Ty) {
412   if (DRD->getInitializer()) {
413     std::pair<llvm::Function *, llvm::Function *> Reduction =
414         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
415     auto *CE = cast<CallExpr>(InitOp);
416     auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
417     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
418     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
419     auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
420     auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
421     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
422     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
423                             [=]() -> Address { return Private; });
424     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
425                             [=]() -> Address { return Original; });
426     (void)PrivateScope.Privatize();
427     RValue Func = RValue::get(Reduction.second);
428     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
429     CGF.EmitIgnoredExpr(InitOp);
430   } else {
431     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
432     auto *GV = new llvm::GlobalVariable(
433         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
434         llvm::GlobalValue::PrivateLinkage, Init, ".init");
435     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
436     RValue InitRVal;
437     switch (CGF.getEvaluationKind(Ty)) {
438     case TEK_Scalar:
439       InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
440       break;
441     case TEK_Complex:
442       InitRVal =
443           RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
444       break;
445     case TEK_Aggregate:
446       InitRVal = RValue::getAggregate(LV.getAddress());
447       break;
448     }
449     OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
450     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
451     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
452                          /*IsInitializer=*/false);
453   }
454 }
455 
456 /// \brief Emit initialization of arrays of complex types.
457 /// \param DestAddr Address of the array.
458 /// \param Type Type of array.
459 /// \param Init Initial expression of array.
460 /// \param SrcAddr Address of the original array.
461 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
462                                  QualType Type, const Expr *Init,
463                                  Address SrcAddr = Address::invalid()) {
464   auto *DRD = getReductionInit(Init);
465   // Perform element-by-element initialization.
466   QualType ElementTy;
467 
468   // Drill down to the base element type on both arrays.
469   auto ArrayTy = Type->getAsArrayTypeUnsafe();
470   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
471   DestAddr =
472       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
473   if (DRD)
474     SrcAddr =
475         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
476 
477   llvm::Value *SrcBegin = nullptr;
478   if (DRD)
479     SrcBegin = SrcAddr.getPointer();
480   auto DestBegin = DestAddr.getPointer();
481   // Cast from pointer to array type to pointer to single element.
482   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
483   // The basic structure here is a while-do loop.
484   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
485   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
486   auto IsEmpty =
487       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
488   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
489 
490   // Enter the loop body, making that address the current address.
491   auto EntryBB = CGF.Builder.GetInsertBlock();
492   CGF.EmitBlock(BodyBB);
493 
494   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
495 
496   llvm::PHINode *SrcElementPHI = nullptr;
497   Address SrcElementCurrent = Address::invalid();
498   if (DRD) {
499     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
500                                           "omp.arraycpy.srcElementPast");
501     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
502     SrcElementCurrent =
503         Address(SrcElementPHI,
504                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
505   }
506   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
507       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
508   DestElementPHI->addIncoming(DestBegin, EntryBB);
509   Address DestElementCurrent =
510       Address(DestElementPHI,
511               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
512 
513   // Emit copy.
514   {
515     CodeGenFunction::RunCleanupsScope InitScope(CGF);
516     if (DRD && (DRD->getInitializer() || !Init)) {
517       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
518                                        SrcElementCurrent, ElementTy);
519     } else
520       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
521                            /*IsInitializer=*/false);
522   }
523 
524   if (DRD) {
525     // Shift the address forward by one element.
526     auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
527         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
528     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
529   }
530 
531   // Shift the address forward by one element.
532   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
533       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
534   // Check whether we've reached the end.
535   auto Done =
536       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
537   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
538   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
539 
540   // Done.
541   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
542 }
543 
544 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
545                                   Address SrcAddr, const VarDecl *DestVD,
546                                   const VarDecl *SrcVD, const Expr *Copy) {
547   if (OriginalType->isArrayType()) {
548     auto *BO = dyn_cast<BinaryOperator>(Copy);
549     if (BO && BO->getOpcode() == BO_Assign) {
550       // Perform simple memcpy for simple copying.
551       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
552     } else {
553       // For arrays with complex element types perform element by element
554       // copying.
555       EmitOMPAggregateAssign(
556           DestAddr, SrcAddr, OriginalType,
557           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
558             // Working with the single array element, so have to remap
559             // destination and source variables to corresponding array
560             // elements.
561             CodeGenFunction::OMPPrivateScope Remap(*this);
562             Remap.addPrivate(DestVD, [DestElement]() -> Address {
563               return DestElement;
564             });
565             Remap.addPrivate(
566                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
567             (void)Remap.Privatize();
568             EmitIgnoredExpr(Copy);
569           });
570     }
571   } else {
572     // Remap pseudo source variable to private copy.
573     CodeGenFunction::OMPPrivateScope Remap(*this);
574     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
575     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
576     (void)Remap.Privatize();
577     // Emit copying of the whole variable.
578     EmitIgnoredExpr(Copy);
579   }
580 }
581 
582 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
583                                                 OMPPrivateScope &PrivateScope) {
584   if (!HaveInsertPoint())
585     return false;
586   bool FirstprivateIsLastprivate = false;
587   llvm::DenseSet<const VarDecl *> Lastprivates;
588   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
589     for (const auto *D : C->varlists())
590       Lastprivates.insert(
591           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
592   }
593   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
594   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
595   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
596     auto IRef = C->varlist_begin();
597     auto InitsRef = C->inits().begin();
598     for (auto IInit : C->private_copies()) {
599       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
600       bool ThisFirstprivateIsLastprivate =
601           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
602       auto *CapFD = CapturesInfo.lookup(OrigVD);
603       auto *FD = CapturedStmtInfo->lookup(OrigVD);
604       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
605           !FD->getType()->isReferenceType()) {
606         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
607         ++IRef;
608         ++InitsRef;
609         continue;
610       }
611       FirstprivateIsLastprivate =
612           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
613       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
614         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
615         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
616         bool IsRegistered;
617         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
618                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
619                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
620         Address OriginalAddr = EmitLValue(&DRE).getAddress();
621         QualType Type = VD->getType();
622         if (Type->isArrayType()) {
623           // Emit VarDecl with copy init for arrays.
624           // Get the address of the original variable captured in current
625           // captured region.
626           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
627             auto Emission = EmitAutoVarAlloca(*VD);
628             auto *Init = VD->getInit();
629             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
630               // Perform simple memcpy.
631               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
632                                   Type);
633             } else {
634               EmitOMPAggregateAssign(
635                   Emission.getAllocatedAddress(), OriginalAddr, Type,
636                   [this, VDInit, Init](Address DestElement,
637                                        Address SrcElement) {
638                     // Clean up any temporaries needed by the initialization.
639                     RunCleanupsScope InitScope(*this);
640                     // Emit initialization for single element.
641                     setAddrOfLocalVar(VDInit, SrcElement);
642                     EmitAnyExprToMem(Init, DestElement,
643                                      Init->getType().getQualifiers(),
644                                      /*IsInitializer*/ false);
645                     LocalDeclMap.erase(VDInit);
646                   });
647             }
648             EmitAutoVarCleanups(Emission);
649             return Emission.getAllocatedAddress();
650           });
651         } else {
652           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
653             // Emit private VarDecl with copy init.
654             // Remap temp VDInit variable to the address of the original
655             // variable
656             // (for proper handling of captured global variables).
657             setAddrOfLocalVar(VDInit, OriginalAddr);
658             EmitDecl(*VD);
659             LocalDeclMap.erase(VDInit);
660             return GetAddrOfLocalVar(VD);
661           });
662         }
663         assert(IsRegistered &&
664                "firstprivate var already registered as private");
665         // Silence the warning about unused variable.
666         (void)IsRegistered;
667       }
668       ++IRef;
669       ++InitsRef;
670     }
671   }
672   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
673 }
674 
675 void CodeGenFunction::EmitOMPPrivateClause(
676     const OMPExecutableDirective &D,
677     CodeGenFunction::OMPPrivateScope &PrivateScope) {
678   if (!HaveInsertPoint())
679     return;
680   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
681   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
682     auto IRef = C->varlist_begin();
683     for (auto IInit : C->private_copies()) {
684       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
685       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
686         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
687         bool IsRegistered =
688             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
689               // Emit private VarDecl with copy init.
690               EmitDecl(*VD);
691               return GetAddrOfLocalVar(VD);
692             });
693         assert(IsRegistered && "private var already registered as private");
694         // Silence the warning about unused variable.
695         (void)IsRegistered;
696       }
697       ++IRef;
698     }
699   }
700 }
701 
702 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
703   if (!HaveInsertPoint())
704     return false;
705   // threadprivate_var1 = master_threadprivate_var1;
706   // operator=(threadprivate_var2, master_threadprivate_var2);
707   // ...
708   // __kmpc_barrier(&loc, global_tid);
709   llvm::DenseSet<const VarDecl *> CopiedVars;
710   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
711   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
712     auto IRef = C->varlist_begin();
713     auto ISrcRef = C->source_exprs().begin();
714     auto IDestRef = C->destination_exprs().begin();
715     for (auto *AssignOp : C->assignment_ops()) {
716       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
717       QualType Type = VD->getType();
718       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
719         // Get the address of the master variable. If we are emitting code with
720         // TLS support, the address is passed from the master as field in the
721         // captured declaration.
722         Address MasterAddr = Address::invalid();
723         if (getLangOpts().OpenMPUseTLS &&
724             getContext().getTargetInfo().isTLSSupported()) {
725           assert(CapturedStmtInfo->lookup(VD) &&
726                  "Copyin threadprivates should have been captured!");
727           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
728                           VK_LValue, (*IRef)->getExprLoc());
729           MasterAddr = EmitLValue(&DRE).getAddress();
730           LocalDeclMap.erase(VD);
731         } else {
732           MasterAddr =
733             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
734                                         : CGM.GetAddrOfGlobal(VD),
735                     getContext().getDeclAlign(VD));
736         }
737         // Get the address of the threadprivate variable.
738         Address PrivateAddr = EmitLValue(*IRef).getAddress();
739         if (CopiedVars.size() == 1) {
740           // At first check if current thread is a master thread. If it is, no
741           // need to copy data.
742           CopyBegin = createBasicBlock("copyin.not.master");
743           CopyEnd = createBasicBlock("copyin.not.master.end");
744           Builder.CreateCondBr(
745               Builder.CreateICmpNE(
746                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
747                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
748               CopyBegin, CopyEnd);
749           EmitBlock(CopyBegin);
750         }
751         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
752         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
753         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
754       }
755       ++IRef;
756       ++ISrcRef;
757       ++IDestRef;
758     }
759   }
760   if (CopyEnd) {
761     // Exit out of copying procedure for non-master thread.
762     EmitBlock(CopyEnd, /*IsFinished=*/true);
763     return true;
764   }
765   return false;
766 }
767 
768 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
769     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
770   if (!HaveInsertPoint())
771     return false;
772   bool HasAtLeastOneLastprivate = false;
773   llvm::DenseSet<const VarDecl *> SIMDLCVs;
774   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
775     auto *LoopDirective = cast<OMPLoopDirective>(&D);
776     for (auto *C : LoopDirective->counters()) {
777       SIMDLCVs.insert(
778           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
779     }
780   }
781   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
782   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
783     HasAtLeastOneLastprivate = true;
784     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
785       break;
786     auto IRef = C->varlist_begin();
787     auto IDestRef = C->destination_exprs().begin();
788     for (auto *IInit : C->private_copies()) {
789       // Keep the address of the original variable for future update at the end
790       // of the loop.
791       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
792       // Taskloops do not require additional initialization, it is done in
793       // runtime support library.
794       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
795         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
796         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
797           DeclRefExpr DRE(
798               const_cast<VarDecl *>(OrigVD),
799               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
800                   OrigVD) != nullptr,
801               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
802           return EmitLValue(&DRE).getAddress();
803         });
804         // Check if the variable is also a firstprivate: in this case IInit is
805         // not generated. Initialization of this variable will happen in codegen
806         // for 'firstprivate' clause.
807         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
808           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
809           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
810             // Emit private VarDecl with copy init.
811             EmitDecl(*VD);
812             return GetAddrOfLocalVar(VD);
813           });
814           assert(IsRegistered &&
815                  "lastprivate var already registered as private");
816           (void)IsRegistered;
817         }
818       }
819       ++IRef;
820       ++IDestRef;
821     }
822   }
823   return HasAtLeastOneLastprivate;
824 }
825 
826 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
827     const OMPExecutableDirective &D, bool NoFinals,
828     llvm::Value *IsLastIterCond) {
829   if (!HaveInsertPoint())
830     return;
831   // Emit following code:
832   // if (<IsLastIterCond>) {
833   //   orig_var1 = private_orig_var1;
834   //   ...
835   //   orig_varn = private_orig_varn;
836   // }
837   llvm::BasicBlock *ThenBB = nullptr;
838   llvm::BasicBlock *DoneBB = nullptr;
839   if (IsLastIterCond) {
840     ThenBB = createBasicBlock(".omp.lastprivate.then");
841     DoneBB = createBasicBlock(".omp.lastprivate.done");
842     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
843     EmitBlock(ThenBB);
844   }
845   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
846   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
847   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
848     auto IC = LoopDirective->counters().begin();
849     for (auto F : LoopDirective->finals()) {
850       auto *D =
851           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
852       if (NoFinals)
853         AlreadyEmittedVars.insert(D);
854       else
855         LoopCountersAndUpdates[D] = F;
856       ++IC;
857     }
858   }
859   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
860     auto IRef = C->varlist_begin();
861     auto ISrcRef = C->source_exprs().begin();
862     auto IDestRef = C->destination_exprs().begin();
863     for (auto *AssignOp : C->assignment_ops()) {
864       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
865       QualType Type = PrivateVD->getType();
866       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
867       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
868         // If lastprivate variable is a loop control variable for loop-based
869         // directive, update its value before copyin back to original
870         // variable.
871         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
872           EmitIgnoredExpr(FinalExpr);
873         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
874         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
875         // Get the address of the original variable.
876         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
877         // Get the address of the private variable.
878         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
879         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
880           PrivateAddr =
881               Address(Builder.CreateLoad(PrivateAddr),
882                       getNaturalTypeAlignment(RefTy->getPointeeType()));
883         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
884       }
885       ++IRef;
886       ++ISrcRef;
887       ++IDestRef;
888     }
889     if (auto *PostUpdate = C->getPostUpdateExpr())
890       EmitIgnoredExpr(PostUpdate);
891   }
892   if (IsLastIterCond)
893     EmitBlock(DoneBB, /*IsFinished=*/true);
894 }
895 
896 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
897                           LValue BaseLV, llvm::Value *Addr) {
898   Address Tmp = Address::invalid();
899   Address TopTmp = Address::invalid();
900   Address MostTopTmp = Address::invalid();
901   BaseTy = BaseTy.getNonReferenceType();
902   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
903          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
904     Tmp = CGF.CreateMemTemp(BaseTy);
905     if (TopTmp.isValid())
906       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
907     else
908       MostTopTmp = Tmp;
909     TopTmp = Tmp;
910     BaseTy = BaseTy->getPointeeType();
911   }
912   llvm::Type *Ty = BaseLV.getPointer()->getType();
913   if (Tmp.isValid())
914     Ty = Tmp.getElementType();
915   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
916   if (Tmp.isValid()) {
917     CGF.Builder.CreateStore(Addr, Tmp);
918     return MostTopTmp;
919   }
920   return Address(Addr, BaseLV.getAlignment());
921 }
922 
923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
924                           LValue BaseLV) {
925   BaseTy = BaseTy.getNonReferenceType();
926   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
927          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
928     if (auto *PtrTy = BaseTy->getAs<PointerType>())
929       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
930     else {
931       BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
932                                              BaseTy->castAs<ReferenceType>());
933     }
934     BaseTy = BaseTy->getPointeeType();
935   }
936   return CGF.MakeAddrLValue(
937       Address(
938           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
939               BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
940           BaseLV.getAlignment()),
941       BaseLV.getType(), BaseLV.getAlignmentSource());
942 }
943 
944 void CodeGenFunction::EmitOMPReductionClauseInit(
945     const OMPExecutableDirective &D,
946     CodeGenFunction::OMPPrivateScope &PrivateScope) {
947   if (!HaveInsertPoint())
948     return;
949   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
950     auto ILHS = C->lhs_exprs().begin();
951     auto IRHS = C->rhs_exprs().begin();
952     auto IPriv = C->privates().begin();
953     auto IRed = C->reduction_ops().begin();
954     for (auto IRef : C->varlists()) {
955       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
956       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
957       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
958       auto *DRD = getReductionInit(*IRed);
959       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
960         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
961         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
962           Base = TempOASE->getBase()->IgnoreParenImpCasts();
963         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
964           Base = TempASE->getBase()->IgnoreParenImpCasts();
965         auto *DE = cast<DeclRefExpr>(Base);
966         auto *OrigVD = cast<VarDecl>(DE->getDecl());
967         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
968         auto OASELValueUB =
969             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
970         auto OriginalBaseLValue = EmitLValue(DE);
971         LValue BaseLValue =
972             loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
973                         OriginalBaseLValue);
974         // Store the address of the original variable associated with the LHS
975         // implicit variable.
976         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
977           return OASELValueLB.getAddress();
978         });
979         // Emit reduction copy.
980         bool IsRegistered = PrivateScope.addPrivate(
981             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
982                      OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
983               // Emit VarDecl with copy init for arrays.
984               // Get the address of the original variable captured in current
985               // captured region.
986               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
987                                                  OASELValueLB.getPointer());
988               Size = Builder.CreateNUWAdd(
989                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
990               CodeGenFunction::OpaqueValueMapping OpaqueMap(
991                   *this, cast<OpaqueValueExpr>(
992                              getContext()
993                                  .getAsVariableArrayType(PrivateVD->getType())
994                                  ->getSizeExpr()),
995                   RValue::get(Size));
996               EmitVariablyModifiedType(PrivateVD->getType());
997               auto Emission = EmitAutoVarAlloca(*PrivateVD);
998               auto Addr = Emission.getAllocatedAddress();
999               auto *Init = PrivateVD->getInit();
1000               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1001                                    DRD ? *IRed : Init,
1002                                    OASELValueLB.getAddress());
1003               EmitAutoVarCleanups(Emission);
1004               // Emit private VarDecl with reduction init.
1005               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1006                                                    OASELValueLB.getPointer());
1007               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1008               return castToBase(*this, OrigVD->getType(),
1009                                 OASELValueLB.getType(), OriginalBaseLValue,
1010                                 Ptr);
1011             });
1012         assert(IsRegistered && "private var already registered as private");
1013         // Silence the warning about unused variable.
1014         (void)IsRegistered;
1015         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1016           return GetAddrOfLocalVar(PrivateVD);
1017         });
1018       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
1019         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1020         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1021           Base = TempASE->getBase()->IgnoreParenImpCasts();
1022         auto *DE = cast<DeclRefExpr>(Base);
1023         auto *OrigVD = cast<VarDecl>(DE->getDecl());
1024         auto ASELValue = EmitLValue(ASE);
1025         auto OriginalBaseLValue = EmitLValue(DE);
1026         LValue BaseLValue = loadToBegin(
1027             *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
1028         // Store the address of the original variable associated with the LHS
1029         // implicit variable.
1030         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
1031           return ASELValue.getAddress();
1032         });
1033         // Emit reduction copy.
1034         bool IsRegistered = PrivateScope.addPrivate(
1035             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
1036                      OriginalBaseLValue, DRD, IRed]() -> Address {
1037               // Emit private VarDecl with reduction init.
1038               AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1039               auto Addr = Emission.getAllocatedAddress();
1040               if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1041                 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1042                                                  ASELValue.getAddress(),
1043                                                  ASELValue.getType());
1044               } else
1045                 EmitAutoVarInit(Emission);
1046               EmitAutoVarCleanups(Emission);
1047               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1048                                                    ASELValue.getPointer());
1049               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1050               return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
1051                                 OriginalBaseLValue, Ptr);
1052             });
1053         assert(IsRegistered && "private var already registered as private");
1054         // Silence the warning about unused variable.
1055         (void)IsRegistered;
1056         PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1057           return Builder.CreateElementBitCast(
1058               GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
1059               "rhs.begin");
1060         });
1061       } else {
1062         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
1063         QualType Type = PrivateVD->getType();
1064         if (getContext().getAsArrayType(Type)) {
1065           // Store the address of the original variable associated with the LHS
1066           // implicit variable.
1067           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1068                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
1069                           IRef->getType(), VK_LValue, IRef->getExprLoc());
1070           Address OriginalAddr = EmitLValue(&DRE).getAddress();
1071           PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
1072                                           LHSVD]() -> Address {
1073             OriginalAddr = Builder.CreateElementBitCast(
1074                 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1075             return OriginalAddr;
1076           });
1077           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
1078             if (Type->isVariablyModifiedType()) {
1079               CodeGenFunction::OpaqueValueMapping OpaqueMap(
1080                   *this, cast<OpaqueValueExpr>(
1081                              getContext()
1082                                  .getAsVariableArrayType(PrivateVD->getType())
1083                                  ->getSizeExpr()),
1084                   RValue::get(
1085                       getTypeSize(OrigVD->getType().getNonReferenceType())));
1086               EmitVariablyModifiedType(Type);
1087             }
1088             auto Emission = EmitAutoVarAlloca(*PrivateVD);
1089             auto Addr = Emission.getAllocatedAddress();
1090             auto *Init = PrivateVD->getInit();
1091             EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1092                                  DRD ? *IRed : Init, OriginalAddr);
1093             EmitAutoVarCleanups(Emission);
1094             return Emission.getAllocatedAddress();
1095           });
1096           assert(IsRegistered && "private var already registered as private");
1097           // Silence the warning about unused variable.
1098           (void)IsRegistered;
1099           PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1100             return Builder.CreateElementBitCast(
1101                 GetAddrOfLocalVar(PrivateVD),
1102                 ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
1103           });
1104         } else {
1105           // Store the address of the original variable associated with the LHS
1106           // implicit variable.
1107           Address OriginalAddr = Address::invalid();
1108           PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
1109                                           &OriginalAddr]() -> Address {
1110             DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1111                             CapturedStmtInfo->lookup(OrigVD) != nullptr,
1112                             IRef->getType(), VK_LValue, IRef->getExprLoc());
1113             OriginalAddr = EmitLValue(&DRE).getAddress();
1114             return OriginalAddr;
1115           });
1116           // Emit reduction copy.
1117           bool IsRegistered = PrivateScope.addPrivate(
1118               OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
1119                 // Emit private VarDecl with reduction init.
1120                 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1121                 auto Addr = Emission.getAllocatedAddress();
1122                 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1123                   emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1124                                                    OriginalAddr,
1125                                                    PrivateVD->getType());
1126                 } else
1127                   EmitAutoVarInit(Emission);
1128                 EmitAutoVarCleanups(Emission);
1129                 return Addr;
1130               });
1131           assert(IsRegistered && "private var already registered as private");
1132           // Silence the warning about unused variable.
1133           (void)IsRegistered;
1134           PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1135             return GetAddrOfLocalVar(PrivateVD);
1136           });
1137         }
1138       }
1139       ++ILHS;
1140       ++IRHS;
1141       ++IPriv;
1142       ++IRed;
1143     }
1144   }
1145 }
1146 
1147 void CodeGenFunction::EmitOMPReductionClauseFinal(
1148     const OMPExecutableDirective &D) {
1149   if (!HaveInsertPoint())
1150     return;
1151   llvm::SmallVector<const Expr *, 8> Privates;
1152   llvm::SmallVector<const Expr *, 8> LHSExprs;
1153   llvm::SmallVector<const Expr *, 8> RHSExprs;
1154   llvm::SmallVector<const Expr *, 8> ReductionOps;
1155   bool HasAtLeastOneReduction = false;
1156   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1157     HasAtLeastOneReduction = true;
1158     Privates.append(C->privates().begin(), C->privates().end());
1159     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1160     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1161     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1162   }
1163   if (HasAtLeastOneReduction) {
1164     // Emit nowait reduction if nowait clause is present or directive is a
1165     // parallel directive (it always has implicit barrier).
1166     CGM.getOpenMPRuntime().emitReduction(
1167         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1168         D.getSingleClause<OMPNowaitClause>() ||
1169             isOpenMPParallelDirective(D.getDirectiveKind()) ||
1170             D.getDirectiveKind() == OMPD_simd,
1171         D.getDirectiveKind() == OMPD_simd);
1172   }
1173 }
1174 
1175 static void emitPostUpdateForReductionClause(
1176     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1177     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1178   if (!CGF.HaveInsertPoint())
1179     return;
1180   llvm::BasicBlock *DoneBB = nullptr;
1181   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1182     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1183       if (!DoneBB) {
1184         if (auto *Cond = CondGen(CGF)) {
1185           // If the first post-update expression is found, emit conditional
1186           // block if it was requested.
1187           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1188           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1189           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1190           CGF.EmitBlock(ThenBB);
1191         }
1192       }
1193       CGF.EmitIgnoredExpr(PostUpdate);
1194     }
1195   }
1196   if (DoneBB)
1197     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1198 }
1199 
1200 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
1201                                            const OMPExecutableDirective &S,
1202                                            OpenMPDirectiveKind InnermostKind,
1203                                            const RegionCodeGenTy &CodeGen) {
1204   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1205   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
1206       emitParallelOrTeamsOutlinedFunction(S,
1207           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1208   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1209     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1210     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1211                                          /*IgnoreResultAssign*/ true);
1212     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1213         CGF, NumThreads, NumThreadsClause->getLocStart());
1214   }
1215   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1216     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1217     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1218         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1219   }
1220   const Expr *IfCond = nullptr;
1221   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1222     if (C->getNameModifier() == OMPD_unknown ||
1223         C->getNameModifier() == OMPD_parallel) {
1224       IfCond = C->getCondition();
1225       break;
1226     }
1227   }
1228 
1229   OMPLexicalScope Scope(CGF, S);
1230   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1231   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1232   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1233                                               CapturedVars, IfCond);
1234 }
1235 
1236 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1237   // Emit parallel region as a standalone region.
1238   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1239     OMPPrivateScope PrivateScope(CGF);
1240     bool Copyins = CGF.EmitOMPCopyinClause(S);
1241     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1242     if (Copyins) {
1243       // Emit implicit barrier to synchronize threads and avoid data races on
1244       // propagation master's thread values of threadprivate variables to local
1245       // instances of that variables of all other implicit threads.
1246       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1247           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1248           /*ForceSimpleCall=*/true);
1249     }
1250     CGF.EmitOMPPrivateClause(S, PrivateScope);
1251     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1252     (void)PrivateScope.Privatize();
1253     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1254     CGF.EmitOMPReductionClauseFinal(S);
1255   };
1256   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
1257   emitPostUpdateForReductionClause(
1258       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1259 }
1260 
1261 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1262                                       JumpDest LoopExit) {
1263   RunCleanupsScope BodyScope(*this);
1264   // Update counters values on current iteration.
1265   for (auto I : D.updates()) {
1266     EmitIgnoredExpr(I);
1267   }
1268   // Update the linear variables.
1269   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1270     for (auto *U : C->updates())
1271       EmitIgnoredExpr(U);
1272   }
1273 
1274   // On a continue in the body, jump to the end.
1275   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1276   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1277   // Emit loop body.
1278   EmitStmt(D.getBody());
1279   // The end (updates/cleanups).
1280   EmitBlock(Continue.getBlock());
1281   BreakContinueStack.pop_back();
1282 }
1283 
1284 void CodeGenFunction::EmitOMPInnerLoop(
1285     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1286     const Expr *IncExpr,
1287     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1288     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1289   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1290 
1291   // Start the loop with a block that tests the condition.
1292   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1293   EmitBlock(CondBlock);
1294   LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
1295 
1296   // If there are any cleanups between here and the loop-exit scope,
1297   // create a block to stage a loop exit along.
1298   auto ExitBlock = LoopExit.getBlock();
1299   if (RequiresCleanup)
1300     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1301 
1302   auto LoopBody = createBasicBlock("omp.inner.for.body");
1303 
1304   // Emit condition.
1305   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1306   if (ExitBlock != LoopExit.getBlock()) {
1307     EmitBlock(ExitBlock);
1308     EmitBranchThroughCleanup(LoopExit);
1309   }
1310 
1311   EmitBlock(LoopBody);
1312   incrementProfileCounter(&S);
1313 
1314   // Create a block for the increment.
1315   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1316   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1317 
1318   BodyGen(*this);
1319 
1320   // Emit "IV = IV + 1" and a back-edge to the condition block.
1321   EmitBlock(Continue.getBlock());
1322   EmitIgnoredExpr(IncExpr);
1323   PostIncGen(*this);
1324   BreakContinueStack.pop_back();
1325   EmitBranch(CondBlock);
1326   LoopStack.pop();
1327   // Emit the fall-through block.
1328   EmitBlock(LoopExit.getBlock());
1329 }
1330 
1331 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1332   if (!HaveInsertPoint())
1333     return;
1334   // Emit inits for the linear variables.
1335   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1336     for (auto *Init : C->inits()) {
1337       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1338       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1339         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1340         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1341         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1342                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1343                         VD->getInit()->getType(), VK_LValue,
1344                         VD->getInit()->getExprLoc());
1345         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1346                                                 VD->getType()),
1347                        /*capturedByInit=*/false);
1348         EmitAutoVarCleanups(Emission);
1349       } else
1350         EmitVarDecl(*VD);
1351     }
1352     // Emit the linear steps for the linear clauses.
1353     // If a step is not constant, it is pre-calculated before the loop.
1354     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1355       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1356         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1357         // Emit calculation of the linear step.
1358         EmitIgnoredExpr(CS);
1359       }
1360   }
1361 }
1362 
1363 void CodeGenFunction::EmitOMPLinearClauseFinal(
1364     const OMPLoopDirective &D,
1365     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1366   if (!HaveInsertPoint())
1367     return;
1368   llvm::BasicBlock *DoneBB = nullptr;
1369   // Emit the final values of the linear variables.
1370   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1371     auto IC = C->varlist_begin();
1372     for (auto *F : C->finals()) {
1373       if (!DoneBB) {
1374         if (auto *Cond = CondGen(*this)) {
1375           // If the first post-update expression is found, emit conditional
1376           // block if it was requested.
1377           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1378           DoneBB = createBasicBlock(".omp.linear.pu.done");
1379           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1380           EmitBlock(ThenBB);
1381         }
1382       }
1383       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1384       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1385                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1386                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1387       Address OrigAddr = EmitLValue(&DRE).getAddress();
1388       CodeGenFunction::OMPPrivateScope VarScope(*this);
1389       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1390       (void)VarScope.Privatize();
1391       EmitIgnoredExpr(F);
1392       ++IC;
1393     }
1394     if (auto *PostUpdate = C->getPostUpdateExpr())
1395       EmitIgnoredExpr(PostUpdate);
1396   }
1397   if (DoneBB)
1398     EmitBlock(DoneBB, /*IsFinished=*/true);
1399 }
1400 
1401 static void emitAlignedClause(CodeGenFunction &CGF,
1402                               const OMPExecutableDirective &D) {
1403   if (!CGF.HaveInsertPoint())
1404     return;
1405   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1406     unsigned ClauseAlignment = 0;
1407     if (auto AlignmentExpr = Clause->getAlignment()) {
1408       auto AlignmentCI =
1409           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1410       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1411     }
1412     for (auto E : Clause->varlists()) {
1413       unsigned Alignment = ClauseAlignment;
1414       if (Alignment == 0) {
1415         // OpenMP [2.8.1, Description]
1416         // If no optional parameter is specified, implementation-defined default
1417         // alignments for SIMD instructions on the target platforms are assumed.
1418         Alignment =
1419             CGF.getContext()
1420                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1421                     E->getType()->getPointeeType()))
1422                 .getQuantity();
1423       }
1424       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1425              "alignment is not power of 2");
1426       if (Alignment != 0) {
1427         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1428         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1429       }
1430     }
1431   }
1432 }
1433 
1434 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1435     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1436   if (!HaveInsertPoint())
1437     return;
1438   auto I = S.private_counters().begin();
1439   for (auto *E : S.counters()) {
1440     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1441     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1442     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1443       // Emit var without initialization.
1444       if (!LocalDeclMap.count(PrivateVD)) {
1445         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1446         EmitAutoVarCleanups(VarEmission);
1447       }
1448       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1449                       /*RefersToEnclosingVariableOrCapture=*/false,
1450                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1451       return EmitLValue(&DRE).getAddress();
1452     });
1453     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1454         VD->hasGlobalStorage()) {
1455       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1456         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1457                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1458                         E->getType(), VK_LValue, E->getExprLoc());
1459         return EmitLValue(&DRE).getAddress();
1460       });
1461     }
1462     ++I;
1463   }
1464 }
1465 
1466 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1467                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1468                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1469   if (!CGF.HaveInsertPoint())
1470     return;
1471   {
1472     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1473     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1474     (void)PreCondScope.Privatize();
1475     // Get initial values of real counters.
1476     for (auto I : S.inits()) {
1477       CGF.EmitIgnoredExpr(I);
1478     }
1479   }
1480   // Check that loop is executed at least one time.
1481   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1482 }
1483 
1484 void CodeGenFunction::EmitOMPLinearClause(
1485     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1486   if (!HaveInsertPoint())
1487     return;
1488   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1489   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1490     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1491     for (auto *C : LoopDirective->counters()) {
1492       SIMDLCVs.insert(
1493           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1494     }
1495   }
1496   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1497     auto CurPrivate = C->privates().begin();
1498     for (auto *E : C->varlists()) {
1499       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1500       auto *PrivateVD =
1501           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1502       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1503         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1504           // Emit private VarDecl with copy init.
1505           EmitVarDecl(*PrivateVD);
1506           return GetAddrOfLocalVar(PrivateVD);
1507         });
1508         assert(IsRegistered && "linear var already registered as private");
1509         // Silence the warning about unused variable.
1510         (void)IsRegistered;
1511       } else
1512         EmitVarDecl(*PrivateVD);
1513       ++CurPrivate;
1514     }
1515   }
1516 }
1517 
1518 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1519                                      const OMPExecutableDirective &D,
1520                                      bool IsMonotonic) {
1521   if (!CGF.HaveInsertPoint())
1522     return;
1523   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1524     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1525                                  /*ignoreResult=*/true);
1526     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1527     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1528     // In presence of finite 'safelen', it may be unsafe to mark all
1529     // the memory instructions parallel, because loop-carried
1530     // dependences of 'safelen' iterations are possible.
1531     if (!IsMonotonic)
1532       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1533   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1534     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1535                                  /*ignoreResult=*/true);
1536     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1537     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1538     // In presence of finite 'safelen', it may be unsafe to mark all
1539     // the memory instructions parallel, because loop-carried
1540     // dependences of 'safelen' iterations are possible.
1541     CGF.LoopStack.setParallel(false);
1542   }
1543 }
1544 
1545 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1546                                       bool IsMonotonic) {
1547   // Walk clauses and process safelen/lastprivate.
1548   LoopStack.setParallel(!IsMonotonic);
1549   LoopStack.setVectorizeEnable(true);
1550   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1551 }
1552 
1553 void CodeGenFunction::EmitOMPSimdFinal(
1554     const OMPLoopDirective &D,
1555     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1556   if (!HaveInsertPoint())
1557     return;
1558   llvm::BasicBlock *DoneBB = nullptr;
1559   auto IC = D.counters().begin();
1560   auto IPC = D.private_counters().begin();
1561   for (auto F : D.finals()) {
1562     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1563     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1564     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1565     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1566         OrigVD->hasGlobalStorage() || CED) {
1567       if (!DoneBB) {
1568         if (auto *Cond = CondGen(*this)) {
1569           // If the first post-update expression is found, emit conditional
1570           // block if it was requested.
1571           auto *ThenBB = createBasicBlock(".omp.final.then");
1572           DoneBB = createBasicBlock(".omp.final.done");
1573           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1574           EmitBlock(ThenBB);
1575         }
1576       }
1577       Address OrigAddr = Address::invalid();
1578       if (CED)
1579         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1580       else {
1581         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1582                         /*RefersToEnclosingVariableOrCapture=*/false,
1583                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1584         OrigAddr = EmitLValue(&DRE).getAddress();
1585       }
1586       OMPPrivateScope VarScope(*this);
1587       VarScope.addPrivate(OrigVD,
1588                           [OrigAddr]() -> Address { return OrigAddr; });
1589       (void)VarScope.Privatize();
1590       EmitIgnoredExpr(F);
1591     }
1592     ++IC;
1593     ++IPC;
1594   }
1595   if (DoneBB)
1596     EmitBlock(DoneBB, /*IsFinished=*/true);
1597 }
1598 
1599 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1600   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1601     OMPLoopScope PreInitScope(CGF, S);
1602     // if (PreCond) {
1603     //   for (IV in 0..LastIteration) BODY;
1604     //   <Final counter/linear vars updates>;
1605     // }
1606     //
1607 
1608     // Emit: if (PreCond) - begin.
1609     // If the condition constant folds and can be elided, avoid emitting the
1610     // whole loop.
1611     bool CondConstant;
1612     llvm::BasicBlock *ContBlock = nullptr;
1613     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1614       if (!CondConstant)
1615         return;
1616     } else {
1617       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1618       ContBlock = CGF.createBasicBlock("simd.if.end");
1619       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1620                   CGF.getProfileCount(&S));
1621       CGF.EmitBlock(ThenBlock);
1622       CGF.incrementProfileCounter(&S);
1623     }
1624 
1625     // Emit the loop iteration variable.
1626     const Expr *IVExpr = S.getIterationVariable();
1627     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1628     CGF.EmitVarDecl(*IVDecl);
1629     CGF.EmitIgnoredExpr(S.getInit());
1630 
1631     // Emit the iterations count variable.
1632     // If it is not a variable, Sema decided to calculate iterations count on
1633     // each iteration (e.g., it is foldable into a constant).
1634     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1635       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1636       // Emit calculation of the iterations count.
1637       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1638     }
1639 
1640     CGF.EmitOMPSimdInit(S);
1641 
1642     emitAlignedClause(CGF, S);
1643     CGF.EmitOMPLinearClauseInit(S);
1644     {
1645       OMPPrivateScope LoopScope(CGF);
1646       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1647       CGF.EmitOMPLinearClause(S, LoopScope);
1648       CGF.EmitOMPPrivateClause(S, LoopScope);
1649       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1650       bool HasLastprivateClause =
1651           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1652       (void)LoopScope.Privatize();
1653       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1654                            S.getInc(),
1655                            [&S](CodeGenFunction &CGF) {
1656                              CGF.EmitOMPLoopBody(S, JumpDest());
1657                              CGF.EmitStopPoint(&S);
1658                            },
1659                            [](CodeGenFunction &) {});
1660       CGF.EmitOMPSimdFinal(
1661           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1662       // Emit final copy of the lastprivate variables at the end of loops.
1663       if (HasLastprivateClause)
1664         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1665       CGF.EmitOMPReductionClauseFinal(S);
1666       emitPostUpdateForReductionClause(
1667           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1668     }
1669     CGF.EmitOMPLinearClauseFinal(
1670         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1671     // Emit: if (PreCond) - end.
1672     if (ContBlock) {
1673       CGF.EmitBranch(ContBlock);
1674       CGF.EmitBlock(ContBlock, true);
1675     }
1676   };
1677   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1678   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1679 }
1680 
1681 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
1682     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1683     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1684   auto &RT = CGM.getOpenMPRuntime();
1685 
1686   const Expr *IVExpr = S.getIterationVariable();
1687   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1688   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1689 
1690   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1691 
1692   // Start the loop with a block that tests the condition.
1693   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1694   EmitBlock(CondBlock);
1695   LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
1696 
1697   llvm::Value *BoolCondVal = nullptr;
1698   if (!DynamicOrOrdered) {
1699     // UB = min(UB, GlobalUB)
1700     EmitIgnoredExpr(S.getEnsureUpperBound());
1701     // IV = LB
1702     EmitIgnoredExpr(S.getInit());
1703     // IV < UB
1704     BoolCondVal = EvaluateExprAsBool(S.getCond());
1705   } else {
1706     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
1707                                  LB, UB, ST);
1708   }
1709 
1710   // If there are any cleanups between here and the loop-exit scope,
1711   // create a block to stage a loop exit along.
1712   auto ExitBlock = LoopExit.getBlock();
1713   if (LoopScope.requiresCleanups())
1714     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1715 
1716   auto LoopBody = createBasicBlock("omp.dispatch.body");
1717   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1718   if (ExitBlock != LoopExit.getBlock()) {
1719     EmitBlock(ExitBlock);
1720     EmitBranchThroughCleanup(LoopExit);
1721   }
1722   EmitBlock(LoopBody);
1723 
1724   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1725   // LB for loop condition and emitted it above).
1726   if (DynamicOrOrdered)
1727     EmitIgnoredExpr(S.getInit());
1728 
1729   // Create a block for the increment.
1730   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1731   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1732 
1733   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1734   // with dynamic/guided scheduling and without ordered clause.
1735   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1736     LoopStack.setParallel(!IsMonotonic);
1737   else
1738     EmitOMPSimdInit(S, IsMonotonic);
1739 
1740   SourceLocation Loc = S.getLocStart();
1741   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
1742                    [&S, LoopExit](CodeGenFunction &CGF) {
1743                      CGF.EmitOMPLoopBody(S, LoopExit);
1744                      CGF.EmitStopPoint(&S);
1745                    },
1746                    [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
1747                      if (Ordered) {
1748                        CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
1749                            CGF, Loc, IVSize, IVSigned);
1750                      }
1751                    });
1752 
1753   EmitBlock(Continue.getBlock());
1754   BreakContinueStack.pop_back();
1755   if (!DynamicOrOrdered) {
1756     // Emit "LB = LB + Stride", "UB = UB + Stride".
1757     EmitIgnoredExpr(S.getNextLowerBound());
1758     EmitIgnoredExpr(S.getNextUpperBound());
1759   }
1760 
1761   EmitBranch(CondBlock);
1762   LoopStack.pop();
1763   // Emit the fall-through block.
1764   EmitBlock(LoopExit.getBlock());
1765 
1766   // Tell the runtime we are done.
1767   if (!DynamicOrOrdered)
1768     RT.emitForStaticFinish(*this, S.getLocEnd());
1769 
1770 }
1771 
1772 void CodeGenFunction::EmitOMPForOuterLoop(
1773     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1774     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1775     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1776   auto &RT = CGM.getOpenMPRuntime();
1777 
1778   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1779   const bool DynamicOrOrdered =
1780       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1781 
1782   assert((Ordered ||
1783           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1784                                  /*Chunked=*/Chunk != nullptr)) &&
1785          "static non-chunked schedule does not need outer loop");
1786 
1787   // Emit outer loop.
1788   //
1789   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1790   // When schedule(dynamic,chunk_size) is specified, the iterations are
1791   // distributed to threads in the team in chunks as the threads request them.
1792   // Each thread executes a chunk of iterations, then requests another chunk,
1793   // until no chunks remain to be distributed. Each chunk contains chunk_size
1794   // iterations, except for the last chunk to be distributed, which may have
1795   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1796   //
1797   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1798   // to threads in the team in chunks as the executing threads request them.
1799   // Each thread executes a chunk of iterations, then requests another chunk,
1800   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1801   // each chunk is proportional to the number of unassigned iterations divided
1802   // by the number of threads in the team, decreasing to 1. For a chunk_size
1803   // with value k (greater than 1), the size of each chunk is determined in the
1804   // same way, with the restriction that the chunks do not contain fewer than k
1805   // iterations (except for the last chunk to be assigned, which may have fewer
1806   // than k iterations).
1807   //
1808   // When schedule(auto) is specified, the decision regarding scheduling is
1809   // delegated to the compiler and/or runtime system. The programmer gives the
1810   // implementation the freedom to choose any possible mapping of iterations to
1811   // threads in the team.
1812   //
1813   // When schedule(runtime) is specified, the decision regarding scheduling is
1814   // deferred until run time, and the schedule and chunk size are taken from the
1815   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1816   // implementation defined
1817   //
1818   // while(__kmpc_dispatch_next(&LB, &UB)) {
1819   //   idx = LB;
1820   //   while (idx <= UB) { BODY; ++idx;
1821   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1822   //   } // inner loop
1823   // }
1824   //
1825   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1826   // When schedule(static, chunk_size) is specified, iterations are divided into
1827   // chunks of size chunk_size, and the chunks are assigned to the threads in
1828   // the team in a round-robin fashion in the order of the thread number.
1829   //
1830   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1831   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1832   //   LB = LB + ST;
1833   //   UB = UB + ST;
1834   // }
1835   //
1836 
1837   const Expr *IVExpr = S.getIterationVariable();
1838   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1839   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1840 
1841   if (DynamicOrOrdered) {
1842     llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
1843     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1844                            IVSigned, Ordered, UBVal, Chunk);
1845   } else {
1846     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1847                          Ordered, IL, LB, UB, ST, Chunk);
1848   }
1849 
1850   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
1851                    ST, IL, Chunk);
1852 }
1853 
1854 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1855     OpenMPDistScheduleClauseKind ScheduleKind,
1856     const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
1857     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1858 
1859   auto &RT = CGM.getOpenMPRuntime();
1860 
1861   // Emit outer loop.
1862   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1863   // dynamic
1864   //
1865 
1866   const Expr *IVExpr = S.getIterationVariable();
1867   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1868   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1869 
1870   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
1871                               IVSize, IVSigned, /* Ordered = */ false,
1872                               IL, LB, UB, ST, Chunk);
1873 
1874   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
1875                    S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
1876 }
1877 
1878 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1879     const OMPDistributeParallelForDirective &S) {
1880   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1881   CGM.getOpenMPRuntime().emitInlinedDirective(
1882       *this, OMPD_distribute_parallel_for,
1883       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1884         OMPLoopScope PreInitScope(CGF, S);
1885         CGF.EmitStmt(
1886             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1887       });
1888 }
1889 
1890 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
1891     const OMPDistributeParallelForSimdDirective &S) {
1892   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1893   CGM.getOpenMPRuntime().emitInlinedDirective(
1894       *this, OMPD_distribute_parallel_for_simd,
1895       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1896         OMPLoopScope PreInitScope(CGF, S);
1897         CGF.EmitStmt(
1898             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1899       });
1900 }
1901 
1902 void CodeGenFunction::EmitOMPDistributeSimdDirective(
1903     const OMPDistributeSimdDirective &S) {
1904   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1905   CGM.getOpenMPRuntime().emitInlinedDirective(
1906       *this, OMPD_distribute_simd,
1907       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1908         OMPLoopScope PreInitScope(CGF, S);
1909         CGF.EmitStmt(
1910             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1911       });
1912 }
1913 
1914 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
1915     const OMPTargetParallelForSimdDirective &S) {
1916   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1917   CGM.getOpenMPRuntime().emitInlinedDirective(
1918       *this, OMPD_target_parallel_for_simd,
1919       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1920         OMPLoopScope PreInitScope(CGF, S);
1921         CGF.EmitStmt(
1922             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1923       });
1924 }
1925 
1926 void CodeGenFunction::EmitOMPTargetSimdDirective(
1927     const OMPTargetSimdDirective &S) {
1928   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1929   CGM.getOpenMPRuntime().emitInlinedDirective(
1930       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1931         OMPLoopScope PreInitScope(CGF, S);
1932         CGF.EmitStmt(
1933             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1934       });
1935 }
1936 
1937 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
1938     const OMPTeamsDistributeDirective &S) {
1939   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1940   CGM.getOpenMPRuntime().emitInlinedDirective(
1941       *this, OMPD_teams_distribute,
1942       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1943         OMPLoopScope PreInitScope(CGF, S);
1944         CGF.EmitStmt(
1945             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1946       });
1947 }
1948 
1949 /// \brief Emit a helper variable and return corresponding lvalue.
1950 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1951                                const DeclRefExpr *Helper) {
1952   auto VDecl = cast<VarDecl>(Helper->getDecl());
1953   CGF.EmitVarDecl(*VDecl);
1954   return CGF.EmitLValue(Helper);
1955 }
1956 
1957 namespace {
1958   struct ScheduleKindModifiersTy {
1959     OpenMPScheduleClauseKind Kind;
1960     OpenMPScheduleClauseModifier M1;
1961     OpenMPScheduleClauseModifier M2;
1962     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
1963                             OpenMPScheduleClauseModifier M1,
1964                             OpenMPScheduleClauseModifier M2)
1965         : Kind(Kind), M1(M1), M2(M2) {}
1966   };
1967 } // namespace
1968 
1969 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
1970   // Emit the loop iteration variable.
1971   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
1972   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
1973   EmitVarDecl(*IVDecl);
1974 
1975   // Emit the iterations count variable.
1976   // If it is not a variable, Sema decided to calculate iterations count on each
1977   // iteration (e.g., it is foldable into a constant).
1978   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1979     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1980     // Emit calculation of the iterations count.
1981     EmitIgnoredExpr(S.getCalcLastIteration());
1982   }
1983 
1984   auto &RT = CGM.getOpenMPRuntime();
1985 
1986   bool HasLastprivateClause;
1987   // Check pre-condition.
1988   {
1989     OMPLoopScope PreInitScope(*this, S);
1990     // Skip the entire loop if we don't meet the precondition.
1991     // If the condition constant folds and can be elided, avoid emitting the
1992     // whole loop.
1993     bool CondConstant;
1994     llvm::BasicBlock *ContBlock = nullptr;
1995     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1996       if (!CondConstant)
1997         return false;
1998     } else {
1999       auto *ThenBlock = createBasicBlock("omp.precond.then");
2000       ContBlock = createBasicBlock("omp.precond.end");
2001       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2002                   getProfileCount(&S));
2003       EmitBlock(ThenBlock);
2004       incrementProfileCounter(&S);
2005     }
2006 
2007     bool Ordered = false;
2008     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2009       if (OrderedClause->getNumForLoops())
2010         RT.emitDoacrossInit(*this, S);
2011       else
2012         Ordered = true;
2013     }
2014 
2015     llvm::DenseSet<const Expr *> EmittedFinals;
2016     emitAlignedClause(*this, S);
2017     EmitOMPLinearClauseInit(S);
2018     // Emit helper vars inits.
2019     LValue LB =
2020         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2021     LValue UB =
2022         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2023     LValue ST =
2024         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2025     LValue IL =
2026         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2027 
2028     // Emit 'then' code.
2029     {
2030       OMPPrivateScope LoopScope(*this);
2031       if (EmitOMPFirstprivateClause(S, LoopScope)) {
2032         // Emit implicit barrier to synchronize threads and avoid data races on
2033         // initialization of firstprivate variables and post-update of
2034         // lastprivate variables.
2035         CGM.getOpenMPRuntime().emitBarrierCall(
2036             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2037             /*ForceSimpleCall=*/true);
2038       }
2039       EmitOMPPrivateClause(S, LoopScope);
2040       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2041       EmitOMPReductionClauseInit(S, LoopScope);
2042       EmitOMPPrivateLoopCounters(S, LoopScope);
2043       EmitOMPLinearClause(S, LoopScope);
2044       (void)LoopScope.Privatize();
2045 
2046       // Detect the loop schedule kind and chunk.
2047       llvm::Value *Chunk = nullptr;
2048       OpenMPScheduleTy ScheduleKind;
2049       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2050         ScheduleKind.Schedule = C->getScheduleKind();
2051         ScheduleKind.M1 = C->getFirstScheduleModifier();
2052         ScheduleKind.M2 = C->getSecondScheduleModifier();
2053         if (const auto *Ch = C->getChunkSize()) {
2054           Chunk = EmitScalarExpr(Ch);
2055           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2056                                        S.getIterationVariable()->getType(),
2057                                        S.getLocStart());
2058         }
2059       }
2060       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2061       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2062       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2063       // If the static schedule kind is specified or if the ordered clause is
2064       // specified, and if no monotonic modifier is specified, the effect will
2065       // be as if the monotonic modifier was specified.
2066       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2067                                 /* Chunked */ Chunk != nullptr) &&
2068           !Ordered) {
2069         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2070           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2071         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2072         // When no chunk_size is specified, the iteration space is divided into
2073         // chunks that are approximately equal in size, and at most one chunk is
2074         // distributed to each thread. Note that the size of the chunks is
2075         // unspecified in this case.
2076         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
2077                              IVSize, IVSigned, Ordered,
2078                              IL.getAddress(), LB.getAddress(),
2079                              UB.getAddress(), ST.getAddress());
2080         auto LoopExit =
2081             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2082         // UB = min(UB, GlobalUB);
2083         EmitIgnoredExpr(S.getEnsureUpperBound());
2084         // IV = LB;
2085         EmitIgnoredExpr(S.getInit());
2086         // while (idx <= UB) { BODY; ++idx; }
2087         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2088                          S.getInc(),
2089                          [&S, LoopExit](CodeGenFunction &CGF) {
2090                            CGF.EmitOMPLoopBody(S, LoopExit);
2091                            CGF.EmitStopPoint(&S);
2092                          },
2093                          [](CodeGenFunction &) {});
2094         EmitBlock(LoopExit.getBlock());
2095         // Tell the runtime we are done.
2096         RT.emitForStaticFinish(*this, S.getLocStart());
2097       } else {
2098         const bool IsMonotonic =
2099             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2100             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2101             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2102             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2103         // Emit the outer loop, which requests its work chunk [LB..UB] from
2104         // runtime and runs the inner loop to process it.
2105         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2106                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2107                             IL.getAddress(), Chunk);
2108       }
2109       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2110         EmitOMPSimdFinal(S,
2111                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2112                            return CGF.Builder.CreateIsNotNull(
2113                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2114                          });
2115       }
2116       EmitOMPReductionClauseFinal(S);
2117       // Emit post-update of the reduction variables if IsLastIter != 0.
2118       emitPostUpdateForReductionClause(
2119           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2120             return CGF.Builder.CreateIsNotNull(
2121                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2122           });
2123       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2124       if (HasLastprivateClause)
2125         EmitOMPLastprivateClauseFinal(
2126             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2127             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2128     }
2129     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2130       return CGF.Builder.CreateIsNotNull(
2131           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2132     });
2133     // We're now done with the loop, so jump to the continuation block.
2134     if (ContBlock) {
2135       EmitBranch(ContBlock);
2136       EmitBlock(ContBlock, true);
2137     }
2138   }
2139   return HasLastprivateClause;
2140 }
2141 
2142 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2143   bool HasLastprivates = false;
2144   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2145                                           PrePostActionTy &) {
2146     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
2147   };
2148   {
2149     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2150     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2151                                                 S.hasCancel());
2152   }
2153 
2154   // Emit an implicit barrier at the end.
2155   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2156     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2157   }
2158 }
2159 
2160 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2161   bool HasLastprivates = false;
2162   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2163                                           PrePostActionTy &) {
2164     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
2165   };
2166   {
2167     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2168     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2169   }
2170 
2171   // Emit an implicit barrier at the end.
2172   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2173     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2174   }
2175 }
2176 
2177 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2178                                 const Twine &Name,
2179                                 llvm::Value *Init = nullptr) {
2180   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2181   if (Init)
2182     CGF.EmitScalarInit(Init, LVal);
2183   return LVal;
2184 }
2185 
2186 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2187   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2188   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2189   bool HasLastprivates = false;
2190   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2191                                                     PrePostActionTy &) {
2192     auto &C = CGF.CGM.getContext();
2193     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2194     // Emit helper vars inits.
2195     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2196                                   CGF.Builder.getInt32(0));
2197     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2198                                       : CGF.Builder.getInt32(0);
2199     LValue UB =
2200         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2201     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2202                                   CGF.Builder.getInt32(1));
2203     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2204                                   CGF.Builder.getInt32(0));
2205     // Loop counter.
2206     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2207     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2208     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2209     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2210     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2211     // Generate condition for loop.
2212     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2213                         OK_Ordinary, S.getLocStart(),
2214                         /*fpContractable=*/false);
2215     // Increment for loop counter.
2216     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2217                       S.getLocStart());
2218     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2219       // Iterate through all sections and emit a switch construct:
2220       // switch (IV) {
2221       //   case 0:
2222       //     <SectionStmt[0]>;
2223       //     break;
2224       // ...
2225       //   case <NumSection> - 1:
2226       //     <SectionStmt[<NumSection> - 1]>;
2227       //     break;
2228       // }
2229       // .omp.sections.exit:
2230       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2231       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2232           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2233           CS == nullptr ? 1 : CS->size());
2234       if (CS) {
2235         unsigned CaseNumber = 0;
2236         for (auto *SubStmt : CS->children()) {
2237           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2238           CGF.EmitBlock(CaseBB);
2239           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2240           CGF.EmitStmt(SubStmt);
2241           CGF.EmitBranch(ExitBB);
2242           ++CaseNumber;
2243         }
2244       } else {
2245         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2246         CGF.EmitBlock(CaseBB);
2247         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2248         CGF.EmitStmt(Stmt);
2249         CGF.EmitBranch(ExitBB);
2250       }
2251       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2252     };
2253 
2254     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2255     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2256       // Emit implicit barrier to synchronize threads and avoid data races on
2257       // initialization of firstprivate variables and post-update of lastprivate
2258       // variables.
2259       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2260           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2261           /*ForceSimpleCall=*/true);
2262     }
2263     CGF.EmitOMPPrivateClause(S, LoopScope);
2264     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2265     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2266     (void)LoopScope.Privatize();
2267 
2268     // Emit static non-chunked loop.
2269     OpenMPScheduleTy ScheduleKind;
2270     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2271     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2272         CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
2273         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
2274         UB.getAddress(), ST.getAddress());
2275     // UB = min(UB, GlobalUB);
2276     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2277     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2278         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2279     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2280     // IV = LB;
2281     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2282     // while (idx <= UB) { BODY; ++idx; }
2283     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2284                          [](CodeGenFunction &) {});
2285     // Tell the runtime we are done.
2286     CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
2287     CGF.EmitOMPReductionClauseFinal(S);
2288     // Emit post-update of the reduction variables if IsLastIter != 0.
2289     emitPostUpdateForReductionClause(
2290         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2291           return CGF.Builder.CreateIsNotNull(
2292               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2293         });
2294 
2295     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2296     if (HasLastprivates)
2297       CGF.EmitOMPLastprivateClauseFinal(
2298           S, /*NoFinals=*/false,
2299           CGF.Builder.CreateIsNotNull(
2300               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2301   };
2302 
2303   bool HasCancel = false;
2304   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2305     HasCancel = OSD->hasCancel();
2306   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2307     HasCancel = OPSD->hasCancel();
2308   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2309                                               HasCancel);
2310   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2311   // clause. Otherwise the barrier will be generated by the codegen for the
2312   // directive.
2313   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2314     // Emit implicit barrier to synchronize threads and avoid data races on
2315     // initialization of firstprivate variables.
2316     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2317                                            OMPD_unknown);
2318   }
2319 }
2320 
2321 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2322   {
2323     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2324     EmitSections(S);
2325   }
2326   // Emit an implicit barrier at the end.
2327   if (!S.getSingleClause<OMPNowaitClause>()) {
2328     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2329                                            OMPD_sections);
2330   }
2331 }
2332 
2333 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2334   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2335     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2336   };
2337   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2338   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2339                                               S.hasCancel());
2340 }
2341 
2342 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2343   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2344   llvm::SmallVector<const Expr *, 8> DestExprs;
2345   llvm::SmallVector<const Expr *, 8> SrcExprs;
2346   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2347   // Check if there are any 'copyprivate' clauses associated with this
2348   // 'single' construct.
2349   // Build a list of copyprivate variables along with helper expressions
2350   // (<source>, <destination>, <destination>=<source> expressions)
2351   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2352     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2353     DestExprs.append(C->destination_exprs().begin(),
2354                      C->destination_exprs().end());
2355     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2356     AssignmentOps.append(C->assignment_ops().begin(),
2357                          C->assignment_ops().end());
2358   }
2359   // Emit code for 'single' region along with 'copyprivate' clauses
2360   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2361     Action.Enter(CGF);
2362     OMPPrivateScope SingleScope(CGF);
2363     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2364     CGF.EmitOMPPrivateClause(S, SingleScope);
2365     (void)SingleScope.Privatize();
2366     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2367   };
2368   {
2369     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2370     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2371                                             CopyprivateVars, DestExprs,
2372                                             SrcExprs, AssignmentOps);
2373   }
2374   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2375   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2376   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2377     CGM.getOpenMPRuntime().emitBarrierCall(
2378         *this, S.getLocStart(),
2379         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2380   }
2381 }
2382 
2383 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2384   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2385     Action.Enter(CGF);
2386     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2387   };
2388   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2389   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2390 }
2391 
2392 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2393   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2394     Action.Enter(CGF);
2395     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2396   };
2397   Expr *Hint = nullptr;
2398   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2399     Hint = HintClause->getHint();
2400   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2401   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2402                                             S.getDirectiveName().getAsString(),
2403                                             CodeGen, S.getLocStart(), Hint);
2404 }
2405 
2406 void CodeGenFunction::EmitOMPParallelForDirective(
2407     const OMPParallelForDirective &S) {
2408   // Emit directive as a combined directive that consists of two implicit
2409   // directives: 'parallel' with 'for' directive.
2410   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2411     CGF.EmitOMPWorksharingLoop(S);
2412   };
2413   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
2414 }
2415 
2416 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2417     const OMPParallelForSimdDirective &S) {
2418   // Emit directive as a combined directive that consists of two implicit
2419   // directives: 'parallel' with 'for' directive.
2420   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2421     CGF.EmitOMPWorksharingLoop(S);
2422   };
2423   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
2424 }
2425 
2426 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2427     const OMPParallelSectionsDirective &S) {
2428   // Emit directive as a combined directive that consists of two implicit
2429   // directives: 'parallel' with 'sections' directive.
2430   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2431     CGF.EmitSections(S);
2432   };
2433   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
2434 }
2435 
2436 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2437                                                 const RegionCodeGenTy &BodyGen,
2438                                                 const TaskGenTy &TaskGen,
2439                                                 OMPTaskDataTy &Data) {
2440   // Emit outlined function for task construct.
2441   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2442   auto *I = CS->getCapturedDecl()->param_begin();
2443   auto *PartId = std::next(I);
2444   auto *TaskT = std::next(I, 4);
2445   // Check if the task is final
2446   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2447     // If the condition constant folds and can be elided, try to avoid emitting
2448     // the condition and the dead arm of the if/else.
2449     auto *Cond = Clause->getCondition();
2450     bool CondConstant;
2451     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2452       Data.Final.setInt(CondConstant);
2453     else
2454       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2455   } else {
2456     // By default the task is not final.
2457     Data.Final.setInt(/*IntVal=*/false);
2458   }
2459   // Check if the task has 'priority' clause.
2460   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2461     auto *Prio = Clause->getPriority();
2462     Data.Priority.setInt(/*IntVal=*/true);
2463     Data.Priority.setPointer(EmitScalarConversion(
2464         EmitScalarExpr(Prio), Prio->getType(),
2465         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2466         Prio->getExprLoc()));
2467   }
2468   // The first function argument for tasks is a thread id, the second one is a
2469   // part id (0 for tied tasks, >=0 for untied task).
2470   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2471   // Get list of private variables.
2472   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2473     auto IRef = C->varlist_begin();
2474     for (auto *IInit : C->private_copies()) {
2475       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2476       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2477         Data.PrivateVars.push_back(*IRef);
2478         Data.PrivateCopies.push_back(IInit);
2479       }
2480       ++IRef;
2481     }
2482   }
2483   EmittedAsPrivate.clear();
2484   // Get list of firstprivate variables.
2485   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2486     auto IRef = C->varlist_begin();
2487     auto IElemInitRef = C->inits().begin();
2488     for (auto *IInit : C->private_copies()) {
2489       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2490       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2491         Data.FirstprivateVars.push_back(*IRef);
2492         Data.FirstprivateCopies.push_back(IInit);
2493         Data.FirstprivateInits.push_back(*IElemInitRef);
2494       }
2495       ++IRef;
2496       ++IElemInitRef;
2497     }
2498   }
2499   // Get list of lastprivate variables (for taskloops).
2500   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2501   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2502     auto IRef = C->varlist_begin();
2503     auto ID = C->destination_exprs().begin();
2504     for (auto *IInit : C->private_copies()) {
2505       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2506       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2507         Data.LastprivateVars.push_back(*IRef);
2508         Data.LastprivateCopies.push_back(IInit);
2509       }
2510       LastprivateDstsOrigs.insert(
2511           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2512            cast<DeclRefExpr>(*IRef)});
2513       ++IRef;
2514       ++ID;
2515     }
2516   }
2517   // Build list of dependences.
2518   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2519     for (auto *IRef : C->varlists())
2520       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2521   auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs](
2522       CodeGenFunction &CGF, PrePostActionTy &Action) {
2523     // Set proper addresses for generated private copies.
2524     OMPPrivateScope Scope(CGF);
2525     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2526         !Data.LastprivateVars.empty()) {
2527       auto *CopyFn = CGF.Builder.CreateLoad(
2528           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2529       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2530           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2531       // Map privates.
2532       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2533       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2534       CallArgs.push_back(PrivatesPtr);
2535       for (auto *E : Data.PrivateVars) {
2536         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2537         Address PrivatePtr = CGF.CreateMemTemp(
2538             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2539         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2540         CallArgs.push_back(PrivatePtr.getPointer());
2541       }
2542       for (auto *E : Data.FirstprivateVars) {
2543         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2544         Address PrivatePtr =
2545             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2546                               ".firstpriv.ptr.addr");
2547         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2548         CallArgs.push_back(PrivatePtr.getPointer());
2549       }
2550       for (auto *E : Data.LastprivateVars) {
2551         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2552         Address PrivatePtr =
2553             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2554                               ".lastpriv.ptr.addr");
2555         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2556         CallArgs.push_back(PrivatePtr.getPointer());
2557       }
2558       CGF.EmitRuntimeCall(CopyFn, CallArgs);
2559       for (auto &&Pair : LastprivateDstsOrigs) {
2560         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2561         DeclRefExpr DRE(
2562             const_cast<VarDecl *>(OrigVD),
2563             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2564                 OrigVD) != nullptr,
2565             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2566         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2567           return CGF.EmitLValue(&DRE).getAddress();
2568         });
2569       }
2570       for (auto &&Pair : PrivatePtrs) {
2571         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2572                             CGF.getContext().getDeclAlign(Pair.first));
2573         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2574       }
2575     }
2576     (void)Scope.Privatize();
2577 
2578     Action.Enter(CGF);
2579     BodyGen(CGF);
2580   };
2581   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2582       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2583       Data.NumberOfParts);
2584   OMPLexicalScope Scope(*this, S);
2585   TaskGen(*this, OutlinedFn, Data);
2586 }
2587 
2588 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2589   // Emit outlined function for task construct.
2590   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2591   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2592   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2593   const Expr *IfCond = nullptr;
2594   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2595     if (C->getNameModifier() == OMPD_unknown ||
2596         C->getNameModifier() == OMPD_task) {
2597       IfCond = C->getCondition();
2598       break;
2599     }
2600   }
2601 
2602   OMPTaskDataTy Data;
2603   // Check if we should emit tied or untied task.
2604   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2605   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2606     CGF.EmitStmt(CS->getCapturedStmt());
2607   };
2608   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2609                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2610                             const OMPTaskDataTy &Data) {
2611     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2612                                             SharedsTy, CapturedStruct, IfCond,
2613                                             Data);
2614   };
2615   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2616 }
2617 
2618 void CodeGenFunction::EmitOMPTaskyieldDirective(
2619     const OMPTaskyieldDirective &S) {
2620   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2621 }
2622 
2623 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2624   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2625 }
2626 
2627 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2628   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2629 }
2630 
2631 void CodeGenFunction::EmitOMPTaskgroupDirective(
2632     const OMPTaskgroupDirective &S) {
2633   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2634     Action.Enter(CGF);
2635     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2636   };
2637   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2638   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2639 }
2640 
2641 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2642   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2643     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2644       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2645                                 FlushClause->varlist_end());
2646     }
2647     return llvm::None;
2648   }(), S.getLocStart());
2649 }
2650 
2651 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
2652   // Emit the loop iteration variable.
2653   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2654   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2655   EmitVarDecl(*IVDecl);
2656 
2657   // Emit the iterations count variable.
2658   // If it is not a variable, Sema decided to calculate iterations count on each
2659   // iteration (e.g., it is foldable into a constant).
2660   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2661     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2662     // Emit calculation of the iterations count.
2663     EmitIgnoredExpr(S.getCalcLastIteration());
2664   }
2665 
2666   auto &RT = CGM.getOpenMPRuntime();
2667 
2668   // Check pre-condition.
2669   {
2670     OMPLoopScope PreInitScope(*this, S);
2671     // Skip the entire loop if we don't meet the precondition.
2672     // If the condition constant folds and can be elided, avoid emitting the
2673     // whole loop.
2674     bool CondConstant;
2675     llvm::BasicBlock *ContBlock = nullptr;
2676     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2677       if (!CondConstant)
2678         return;
2679     } else {
2680       auto *ThenBlock = createBasicBlock("omp.precond.then");
2681       ContBlock = createBasicBlock("omp.precond.end");
2682       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2683                   getProfileCount(&S));
2684       EmitBlock(ThenBlock);
2685       incrementProfileCounter(&S);
2686     }
2687 
2688     // Emit 'then' code.
2689     {
2690       // Emit helper vars inits.
2691       LValue LB =
2692           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2693       LValue UB =
2694           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2695       LValue ST =
2696           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2697       LValue IL =
2698           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2699 
2700       OMPPrivateScope LoopScope(*this);
2701       EmitOMPPrivateLoopCounters(S, LoopScope);
2702       (void)LoopScope.Privatize();
2703 
2704       // Detect the distribute schedule kind and chunk.
2705       llvm::Value *Chunk = nullptr;
2706       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
2707       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
2708         ScheduleKind = C->getDistScheduleKind();
2709         if (const auto *Ch = C->getChunkSize()) {
2710           Chunk = EmitScalarExpr(Ch);
2711           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2712           S.getIterationVariable()->getType(),
2713           S.getLocStart());
2714         }
2715       }
2716       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2717       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2718 
2719       // OpenMP [2.10.8, distribute Construct, Description]
2720       // If dist_schedule is specified, kind must be static. If specified,
2721       // iterations are divided into chunks of size chunk_size, chunks are
2722       // assigned to the teams of the league in a round-robin fashion in the
2723       // order of the team number. When no chunk_size is specified, the
2724       // iteration space is divided into chunks that are approximately equal
2725       // in size, and at most one chunk is distributed to each team of the
2726       // league. The size of the chunks is unspecified in this case.
2727       if (RT.isStaticNonchunked(ScheduleKind,
2728                                 /* Chunked */ Chunk != nullptr)) {
2729         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
2730                              IVSize, IVSigned, /* Ordered = */ false,
2731                              IL.getAddress(), LB.getAddress(),
2732                              UB.getAddress(), ST.getAddress());
2733         auto LoopExit =
2734             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2735         // UB = min(UB, GlobalUB);
2736         EmitIgnoredExpr(S.getEnsureUpperBound());
2737         // IV = LB;
2738         EmitIgnoredExpr(S.getInit());
2739         // while (idx <= UB) { BODY; ++idx; }
2740         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2741                          S.getInc(),
2742                          [&S, LoopExit](CodeGenFunction &CGF) {
2743                            CGF.EmitOMPLoopBody(S, LoopExit);
2744                            CGF.EmitStopPoint(&S);
2745                          },
2746                          [](CodeGenFunction &) {});
2747         EmitBlock(LoopExit.getBlock());
2748         // Tell the runtime we are done.
2749         RT.emitForStaticFinish(*this, S.getLocStart());
2750       } else {
2751         // Emit the outer loop, which requests its work chunk [LB..UB] from
2752         // runtime and runs the inner loop to process it.
2753         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
2754                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2755                             IL.getAddress(), Chunk);
2756       }
2757     }
2758 
2759     // We're now done with the loop, so jump to the continuation block.
2760     if (ContBlock) {
2761       EmitBranch(ContBlock);
2762       EmitBlock(ContBlock, true);
2763     }
2764   }
2765 }
2766 
2767 void CodeGenFunction::EmitOMPDistributeDirective(
2768     const OMPDistributeDirective &S) {
2769   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2770     CGF.EmitOMPDistributeLoop(S);
2771   };
2772   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2773   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2774                                               false);
2775 }
2776 
2777 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
2778                                                    const CapturedStmt *S) {
2779   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
2780   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
2781   CGF.CapturedStmtInfo = &CapStmtInfo;
2782   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
2783   Fn->addFnAttr(llvm::Attribute::NoInline);
2784   return Fn;
2785 }
2786 
2787 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
2788   if (!S.getAssociatedStmt()) {
2789     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
2790       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
2791     return;
2792   }
2793   auto *C = S.getSingleClause<OMPSIMDClause>();
2794   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
2795                                  PrePostActionTy &Action) {
2796     if (C) {
2797       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2798       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2799       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
2800       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
2801       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
2802     } else {
2803       Action.Enter(CGF);
2804       CGF.EmitStmt(
2805           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2806     }
2807   };
2808   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2809   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
2810 }
2811 
2812 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
2813                                          QualType SrcType, QualType DestType,
2814                                          SourceLocation Loc) {
2815   assert(CGF.hasScalarEvaluationKind(DestType) &&
2816          "DestType must have scalar evaluation kind.");
2817   assert(!Val.isAggregate() && "Must be a scalar or complex.");
2818   return Val.isScalar()
2819              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
2820                                         Loc)
2821              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
2822                                                  DestType, Loc);
2823 }
2824 
2825 static CodeGenFunction::ComplexPairTy
2826 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
2827                       QualType DestType, SourceLocation Loc) {
2828   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
2829          "DestType must have complex evaluation kind.");
2830   CodeGenFunction::ComplexPairTy ComplexVal;
2831   if (Val.isScalar()) {
2832     // Convert the input element to the element type of the complex.
2833     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2834     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
2835                                               DestElementType, Loc);
2836     ComplexVal = CodeGenFunction::ComplexPairTy(
2837         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
2838   } else {
2839     assert(Val.isComplex() && "Must be a scalar or complex.");
2840     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
2841     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2842     ComplexVal.first = CGF.EmitScalarConversion(
2843         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
2844     ComplexVal.second = CGF.EmitScalarConversion(
2845         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
2846   }
2847   return ComplexVal;
2848 }
2849 
2850 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
2851                                   LValue LVal, RValue RVal) {
2852   if (LVal.isGlobalReg()) {
2853     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
2854   } else {
2855     CGF.EmitAtomicStore(RVal, LVal,
2856                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2857                                  : llvm::AtomicOrdering::Monotonic,
2858                         LVal.isVolatile(), /*IsInit=*/false);
2859   }
2860 }
2861 
2862 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
2863                                          QualType RValTy, SourceLocation Loc) {
2864   switch (getEvaluationKind(LVal.getType())) {
2865   case TEK_Scalar:
2866     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
2867                                *this, RVal, RValTy, LVal.getType(), Loc)),
2868                            LVal);
2869     break;
2870   case TEK_Complex:
2871     EmitStoreOfComplex(
2872         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
2873         /*isInit=*/false);
2874     break;
2875   case TEK_Aggregate:
2876     llvm_unreachable("Must be a scalar or complex.");
2877   }
2878 }
2879 
2880 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
2881                                   const Expr *X, const Expr *V,
2882                                   SourceLocation Loc) {
2883   // v = x;
2884   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
2885   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
2886   LValue XLValue = CGF.EmitLValue(X);
2887   LValue VLValue = CGF.EmitLValue(V);
2888   RValue Res = XLValue.isGlobalReg()
2889                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
2890                    : CGF.EmitAtomicLoad(
2891                          XLValue, Loc,
2892                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2893                                   : llvm::AtomicOrdering::Monotonic,
2894                          XLValue.isVolatile());
2895   // OpenMP, 2.12.6, atomic Construct
2896   // Any atomic construct with a seq_cst clause forces the atomically
2897   // performed operation to include an implicit flush operation without a
2898   // list.
2899   if (IsSeqCst)
2900     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2901   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
2902 }
2903 
2904 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
2905                                    const Expr *X, const Expr *E,
2906                                    SourceLocation Loc) {
2907   // x = expr;
2908   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
2909   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
2910   // OpenMP, 2.12.6, atomic Construct
2911   // Any atomic construct with a seq_cst clause forces the atomically
2912   // performed operation to include an implicit flush operation without a
2913   // list.
2914   if (IsSeqCst)
2915     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2916 }
2917 
2918 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
2919                                                 RValue Update,
2920                                                 BinaryOperatorKind BO,
2921                                                 llvm::AtomicOrdering AO,
2922                                                 bool IsXLHSInRHSPart) {
2923   auto &Context = CGF.CGM.getContext();
2924   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
2925   // expression is simple and atomic is allowed for the given type for the
2926   // target platform.
2927   if (BO == BO_Comma || !Update.isScalar() ||
2928       !Update.getScalarVal()->getType()->isIntegerTy() ||
2929       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
2930                         (Update.getScalarVal()->getType() !=
2931                          X.getAddress().getElementType())) ||
2932       !X.getAddress().getElementType()->isIntegerTy() ||
2933       !Context.getTargetInfo().hasBuiltinAtomic(
2934           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
2935     return std::make_pair(false, RValue::get(nullptr));
2936 
2937   llvm::AtomicRMWInst::BinOp RMWOp;
2938   switch (BO) {
2939   case BO_Add:
2940     RMWOp = llvm::AtomicRMWInst::Add;
2941     break;
2942   case BO_Sub:
2943     if (!IsXLHSInRHSPart)
2944       return std::make_pair(false, RValue::get(nullptr));
2945     RMWOp = llvm::AtomicRMWInst::Sub;
2946     break;
2947   case BO_And:
2948     RMWOp = llvm::AtomicRMWInst::And;
2949     break;
2950   case BO_Or:
2951     RMWOp = llvm::AtomicRMWInst::Or;
2952     break;
2953   case BO_Xor:
2954     RMWOp = llvm::AtomicRMWInst::Xor;
2955     break;
2956   case BO_LT:
2957     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2958                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
2959                                    : llvm::AtomicRMWInst::Max)
2960                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
2961                                    : llvm::AtomicRMWInst::UMax);
2962     break;
2963   case BO_GT:
2964     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2965                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
2966                                    : llvm::AtomicRMWInst::Min)
2967                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
2968                                    : llvm::AtomicRMWInst::UMin);
2969     break;
2970   case BO_Assign:
2971     RMWOp = llvm::AtomicRMWInst::Xchg;
2972     break;
2973   case BO_Mul:
2974   case BO_Div:
2975   case BO_Rem:
2976   case BO_Shl:
2977   case BO_Shr:
2978   case BO_LAnd:
2979   case BO_LOr:
2980     return std::make_pair(false, RValue::get(nullptr));
2981   case BO_PtrMemD:
2982   case BO_PtrMemI:
2983   case BO_LE:
2984   case BO_GE:
2985   case BO_EQ:
2986   case BO_NE:
2987   case BO_AddAssign:
2988   case BO_SubAssign:
2989   case BO_AndAssign:
2990   case BO_OrAssign:
2991   case BO_XorAssign:
2992   case BO_MulAssign:
2993   case BO_DivAssign:
2994   case BO_RemAssign:
2995   case BO_ShlAssign:
2996   case BO_ShrAssign:
2997   case BO_Comma:
2998     llvm_unreachable("Unsupported atomic update operation");
2999   }
3000   auto *UpdateVal = Update.getScalarVal();
3001   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3002     UpdateVal = CGF.Builder.CreateIntCast(
3003         IC, X.getAddress().getElementType(),
3004         X.getType()->hasSignedIntegerRepresentation());
3005   }
3006   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3007   return std::make_pair(true, RValue::get(Res));
3008 }
3009 
3010 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3011     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3012     llvm::AtomicOrdering AO, SourceLocation Loc,
3013     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3014   // Update expressions are allowed to have the following forms:
3015   // x binop= expr; -> xrval + expr;
3016   // x++, ++x -> xrval + 1;
3017   // x--, --x -> xrval - 1;
3018   // x = x binop expr; -> xrval binop expr
3019   // x = expr Op x; - > expr binop xrval;
3020   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3021   if (!Res.first) {
3022     if (X.isGlobalReg()) {
3023       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3024       // 'xrval'.
3025       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3026     } else {
3027       // Perform compare-and-swap procedure.
3028       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3029     }
3030   }
3031   return Res;
3032 }
3033 
3034 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3035                                     const Expr *X, const Expr *E,
3036                                     const Expr *UE, bool IsXLHSInRHSPart,
3037                                     SourceLocation Loc) {
3038   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3039          "Update expr in 'atomic update' must be a binary operator.");
3040   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3041   // Update expressions are allowed to have the following forms:
3042   // x binop= expr; -> xrval + expr;
3043   // x++, ++x -> xrval + 1;
3044   // x--, --x -> xrval - 1;
3045   // x = x binop expr; -> xrval binop expr
3046   // x = expr Op x; - > expr binop xrval;
3047   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3048   LValue XLValue = CGF.EmitLValue(X);
3049   RValue ExprRValue = CGF.EmitAnyExpr(E);
3050   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3051                      : llvm::AtomicOrdering::Monotonic;
3052   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3053   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3054   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3055   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3056   auto Gen =
3057       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3058         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3059         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3060         return CGF.EmitAnyExpr(UE);
3061       };
3062   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3063       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3064   // OpenMP, 2.12.6, atomic Construct
3065   // Any atomic construct with a seq_cst clause forces the atomically
3066   // performed operation to include an implicit flush operation without a
3067   // list.
3068   if (IsSeqCst)
3069     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3070 }
3071 
3072 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3073                             QualType SourceType, QualType ResType,
3074                             SourceLocation Loc) {
3075   switch (CGF.getEvaluationKind(ResType)) {
3076   case TEK_Scalar:
3077     return RValue::get(
3078         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3079   case TEK_Complex: {
3080     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3081     return RValue::getComplex(Res.first, Res.second);
3082   }
3083   case TEK_Aggregate:
3084     break;
3085   }
3086   llvm_unreachable("Must be a scalar or complex.");
3087 }
3088 
3089 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3090                                      bool IsPostfixUpdate, const Expr *V,
3091                                      const Expr *X, const Expr *E,
3092                                      const Expr *UE, bool IsXLHSInRHSPart,
3093                                      SourceLocation Loc) {
3094   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3095   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3096   RValue NewVVal;
3097   LValue VLValue = CGF.EmitLValue(V);
3098   LValue XLValue = CGF.EmitLValue(X);
3099   RValue ExprRValue = CGF.EmitAnyExpr(E);
3100   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3101                      : llvm::AtomicOrdering::Monotonic;
3102   QualType NewVValType;
3103   if (UE) {
3104     // 'x' is updated with some additional value.
3105     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3106            "Update expr in 'atomic capture' must be a binary operator.");
3107     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3108     // Update expressions are allowed to have the following forms:
3109     // x binop= expr; -> xrval + expr;
3110     // x++, ++x -> xrval + 1;
3111     // x--, --x -> xrval - 1;
3112     // x = x binop expr; -> xrval binop expr
3113     // x = expr Op x; - > expr binop xrval;
3114     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3115     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3116     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3117     NewVValType = XRValExpr->getType();
3118     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3119     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3120                   IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
3121       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3122       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3123       RValue Res = CGF.EmitAnyExpr(UE);
3124       NewVVal = IsPostfixUpdate ? XRValue : Res;
3125       return Res;
3126     };
3127     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3128         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3129     if (Res.first) {
3130       // 'atomicrmw' instruction was generated.
3131       if (IsPostfixUpdate) {
3132         // Use old value from 'atomicrmw'.
3133         NewVVal = Res.second;
3134       } else {
3135         // 'atomicrmw' does not provide new value, so evaluate it using old
3136         // value of 'x'.
3137         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3138         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3139         NewVVal = CGF.EmitAnyExpr(UE);
3140       }
3141     }
3142   } else {
3143     // 'x' is simply rewritten with some 'expr'.
3144     NewVValType = X->getType().getNonReferenceType();
3145     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3146                                X->getType().getNonReferenceType(), Loc);
3147     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
3148       NewVVal = XRValue;
3149       return ExprRValue;
3150     };
3151     // Try to perform atomicrmw xchg, otherwise simple exchange.
3152     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3153         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3154         Loc, Gen);
3155     if (Res.first) {
3156       // 'atomicrmw' instruction was generated.
3157       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3158     }
3159   }
3160   // Emit post-update store to 'v' of old/new 'x' value.
3161   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3162   // OpenMP, 2.12.6, atomic Construct
3163   // Any atomic construct with a seq_cst clause forces the atomically
3164   // performed operation to include an implicit flush operation without a
3165   // list.
3166   if (IsSeqCst)
3167     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3168 }
3169 
3170 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3171                               bool IsSeqCst, bool IsPostfixUpdate,
3172                               const Expr *X, const Expr *V, const Expr *E,
3173                               const Expr *UE, bool IsXLHSInRHSPart,
3174                               SourceLocation Loc) {
3175   switch (Kind) {
3176   case OMPC_read:
3177     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3178     break;
3179   case OMPC_write:
3180     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3181     break;
3182   case OMPC_unknown:
3183   case OMPC_update:
3184     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3185     break;
3186   case OMPC_capture:
3187     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3188                              IsXLHSInRHSPart, Loc);
3189     break;
3190   case OMPC_if:
3191   case OMPC_final:
3192   case OMPC_num_threads:
3193   case OMPC_private:
3194   case OMPC_firstprivate:
3195   case OMPC_lastprivate:
3196   case OMPC_reduction:
3197   case OMPC_safelen:
3198   case OMPC_simdlen:
3199   case OMPC_collapse:
3200   case OMPC_default:
3201   case OMPC_seq_cst:
3202   case OMPC_shared:
3203   case OMPC_linear:
3204   case OMPC_aligned:
3205   case OMPC_copyin:
3206   case OMPC_copyprivate:
3207   case OMPC_flush:
3208   case OMPC_proc_bind:
3209   case OMPC_schedule:
3210   case OMPC_ordered:
3211   case OMPC_nowait:
3212   case OMPC_untied:
3213   case OMPC_threadprivate:
3214   case OMPC_depend:
3215   case OMPC_mergeable:
3216   case OMPC_device:
3217   case OMPC_threads:
3218   case OMPC_simd:
3219   case OMPC_map:
3220   case OMPC_num_teams:
3221   case OMPC_thread_limit:
3222   case OMPC_priority:
3223   case OMPC_grainsize:
3224   case OMPC_nogroup:
3225   case OMPC_num_tasks:
3226   case OMPC_hint:
3227   case OMPC_dist_schedule:
3228   case OMPC_defaultmap:
3229   case OMPC_uniform:
3230   case OMPC_to:
3231   case OMPC_from:
3232   case OMPC_use_device_ptr:
3233   case OMPC_is_device_ptr:
3234     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3235   }
3236 }
3237 
3238 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3239   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3240   OpenMPClauseKind Kind = OMPC_unknown;
3241   for (auto *C : S.clauses()) {
3242     // Find first clause (skip seq_cst clause, if it is first).
3243     if (C->getClauseKind() != OMPC_seq_cst) {
3244       Kind = C->getClauseKind();
3245       break;
3246     }
3247   }
3248 
3249   const auto *CS =
3250       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3251   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3252     enterFullExpression(EWC);
3253   }
3254   // Processing for statements under 'atomic capture'.
3255   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3256     for (const auto *C : Compound->body()) {
3257       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3258         enterFullExpression(EWC);
3259       }
3260     }
3261   }
3262 
3263   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3264                                             PrePostActionTy &) {
3265     CGF.EmitStopPoint(CS);
3266     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3267                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3268                       S.isXLHSInRHSPart(), S.getLocStart());
3269   };
3270   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3271   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3272 }
3273 
3274 std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
3275 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
3276     CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
3277     bool IsOffloadEntry) {
3278   llvm::Function *OutlinedFn = nullptr;
3279   llvm::Constant *OutlinedFnID = nullptr;
3280   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3281     OMPPrivateScope PrivateScope(CGF);
3282     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3283     CGF.EmitOMPPrivateClause(S, PrivateScope);
3284     (void)PrivateScope.Privatize();
3285 
3286     Action.Enter(CGF);
3287     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3288   };
3289   // Emit target region as a standalone region.
3290   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3291       S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
3292   return std::make_pair(OutlinedFn, OutlinedFnID);
3293 }
3294 
3295 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3296   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3297 
3298   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3299   GenerateOpenMPCapturedVars(CS, CapturedVars);
3300 
3301   llvm::Function *Fn = nullptr;
3302   llvm::Constant *FnID = nullptr;
3303 
3304   // Check if we have any if clause associated with the directive.
3305   const Expr *IfCond = nullptr;
3306 
3307   if (auto *C = S.getSingleClause<OMPIfClause>()) {
3308     IfCond = C->getCondition();
3309   }
3310 
3311   // Check if we have any device clause associated with the directive.
3312   const Expr *Device = nullptr;
3313   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3314     Device = C->getDevice();
3315   }
3316 
3317   // Check if we have an if clause whose conditional always evaluates to false
3318   // or if we do not have any targets specified. If so the target region is not
3319   // an offload entry point.
3320   bool IsOffloadEntry = true;
3321   if (IfCond) {
3322     bool Val;
3323     if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3324       IsOffloadEntry = false;
3325   }
3326   if (CGM.getLangOpts().OMPTargetTriples.empty())
3327     IsOffloadEntry = false;
3328 
3329   assert(CurFuncDecl && "No parent declaration for target region!");
3330   StringRef ParentName;
3331   // In case we have Ctors/Dtors we use the complete type variant to produce
3332   // the mangling of the device outlined kernel.
3333   if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
3334     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3335   else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
3336     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3337   else
3338     ParentName =
3339         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
3340 
3341   std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
3342       CGM, S, ParentName, IsOffloadEntry);
3343   OMPLexicalScope Scope(*this, S);
3344   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
3345                                         CapturedVars);
3346 }
3347 
3348 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3349                                         const OMPExecutableDirective &S,
3350                                         OpenMPDirectiveKind InnermostKind,
3351                                         const RegionCodeGenTy &CodeGen) {
3352   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3353   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
3354       emitParallelOrTeamsOutlinedFunction(S,
3355           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3356 
3357   const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
3358   const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
3359   const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
3360   if (NT || TL) {
3361     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3362     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3363 
3364     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3365                                                   S.getLocStart());
3366   }
3367 
3368   OMPLexicalScope Scope(CGF, S);
3369   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3370   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3371   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3372                                            CapturedVars);
3373 }
3374 
3375 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3376   // Emit parallel region as a standalone region.
3377   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3378     OMPPrivateScope PrivateScope(CGF);
3379     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3380     CGF.EmitOMPPrivateClause(S, PrivateScope);
3381     (void)PrivateScope.Privatize();
3382     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3383   };
3384   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3385 }
3386 
3387 void CodeGenFunction::EmitOMPCancellationPointDirective(
3388     const OMPCancellationPointDirective &S) {
3389   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3390                                                    S.getCancelRegion());
3391 }
3392 
3393 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3394   const Expr *IfCond = nullptr;
3395   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3396     if (C->getNameModifier() == OMPD_unknown ||
3397         C->getNameModifier() == OMPD_cancel) {
3398       IfCond = C->getCondition();
3399       break;
3400     }
3401   }
3402   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3403                                         S.getCancelRegion());
3404 }
3405 
3406 CodeGenFunction::JumpDest
3407 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3408   if (Kind == OMPD_parallel || Kind == OMPD_task)
3409     return ReturnBlock;
3410   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3411          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
3412   return BreakContinueStack.back().BreakBlock;
3413 }
3414 
3415 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3416     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3417     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3418   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3419   auto OrigVarIt = C.varlist_begin();
3420   auto InitIt = C.inits().begin();
3421   for (auto PvtVarIt : C.private_copies()) {
3422     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3423     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3424     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3425 
3426     // In order to identify the right initializer we need to match the
3427     // declaration used by the mapping logic. In some cases we may get
3428     // OMPCapturedExprDecl that refers to the original declaration.
3429     const ValueDecl *MatchingVD = OrigVD;
3430     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3431       // OMPCapturedExprDecl are used to privative fields of the current
3432       // structure.
3433       auto *ME = cast<MemberExpr>(OED->getInit());
3434       assert(isa<CXXThisExpr>(ME->getBase()) &&
3435              "Base should be the current struct!");
3436       MatchingVD = ME->getMemberDecl();
3437     }
3438 
3439     // If we don't have information about the current list item, move on to
3440     // the next one.
3441     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3442     if (InitAddrIt == CaptureDeviceAddrMap.end())
3443       continue;
3444 
3445     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3446       // Initialize the temporary initialization variable with the address we
3447       // get from the runtime library. We have to cast the source address
3448       // because it is always a void *. References are materialized in the
3449       // privatization scope, so the initialization here disregards the fact
3450       // the original variable is a reference.
3451       QualType AddrQTy =
3452           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3453       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3454       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3455       setAddrOfLocalVar(InitVD, InitAddr);
3456 
3457       // Emit private declaration, it will be initialized by the value we
3458       // declaration we just added to the local declarations map.
3459       EmitDecl(*PvtVD);
3460 
3461       // The initialization variables reached its purpose in the emission
3462       // ofthe previous declaration, so we don't need it anymore.
3463       LocalDeclMap.erase(InitVD);
3464 
3465       // Return the address of the private variable.
3466       return GetAddrOfLocalVar(PvtVD);
3467     });
3468     assert(IsRegistered && "firstprivate var already registered as private");
3469     // Silence the warning about unused variable.
3470     (void)IsRegistered;
3471 
3472     ++OrigVarIt;
3473     ++InitIt;
3474   }
3475 }
3476 
3477 // Generate the instructions for '#pragma omp target data' directive.
3478 void CodeGenFunction::EmitOMPTargetDataDirective(
3479     const OMPTargetDataDirective &S) {
3480   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3481 
3482   // Create a pre/post action to signal the privatization of the device pointer.
3483   // This action can be replaced by the OpenMP runtime code generation to
3484   // deactivate privatization.
3485   bool PrivatizeDevicePointers = false;
3486   class DevicePointerPrivActionTy : public PrePostActionTy {
3487     bool &PrivatizeDevicePointers;
3488 
3489   public:
3490     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3491         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3492     void Enter(CodeGenFunction &CGF) override {
3493       PrivatizeDevicePointers = true;
3494     }
3495   };
3496   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3497 
3498   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3499       CodeGenFunction &CGF, PrePostActionTy &Action) {
3500     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3501       CGF.EmitStmt(
3502           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3503     };
3504 
3505     // Codegen that selects wheather to generate the privatization code or not.
3506     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3507                           &InnermostCodeGen](CodeGenFunction &CGF,
3508                                              PrePostActionTy &Action) {
3509       RegionCodeGenTy RCG(InnermostCodeGen);
3510       PrivatizeDevicePointers = false;
3511 
3512       // Call the pre-action to change the status of PrivatizeDevicePointers if
3513       // needed.
3514       Action.Enter(CGF);
3515 
3516       if (PrivatizeDevicePointers) {
3517         OMPPrivateScope PrivateScope(CGF);
3518         // Emit all instances of the use_device_ptr clause.
3519         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
3520           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
3521                                         Info.CaptureDeviceAddrMap);
3522         (void)PrivateScope.Privatize();
3523         RCG(CGF);
3524       } else
3525         RCG(CGF);
3526     };
3527 
3528     // Forward the provided action to the privatization codegen.
3529     RegionCodeGenTy PrivRCG(PrivCodeGen);
3530     PrivRCG.setAction(Action);
3531 
3532     // Notwithstanding the body of the region is emitted as inlined directive,
3533     // we don't use an inline scope as changes in the references inside the
3534     // region are expected to be visible outside, so we do not privative them.
3535     OMPLexicalScope Scope(CGF, S);
3536     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
3537                                                     PrivRCG);
3538   };
3539 
3540   RegionCodeGenTy RCG(CodeGen);
3541 
3542   // If we don't have target devices, don't bother emitting the data mapping
3543   // code.
3544   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
3545     RCG(*this);
3546     return;
3547   }
3548 
3549   // Check if we have any if clause associated with the directive.
3550   const Expr *IfCond = nullptr;
3551   if (auto *C = S.getSingleClause<OMPIfClause>())
3552     IfCond = C->getCondition();
3553 
3554   // Check if we have any device clause associated with the directive.
3555   const Expr *Device = nullptr;
3556   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3557     Device = C->getDevice();
3558 
3559   // Set the action to signal privatization of device pointers.
3560   RCG.setAction(PrivAction);
3561 
3562   // Emit region code.
3563   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
3564                                              Info);
3565 }
3566 
3567 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
3568     const OMPTargetEnterDataDirective &S) {
3569   // If we don't have target devices, don't bother emitting the data mapping
3570   // code.
3571   if (CGM.getLangOpts().OMPTargetTriples.empty())
3572     return;
3573 
3574   // Check if we have any if clause associated with the directive.
3575   const Expr *IfCond = nullptr;
3576   if (auto *C = S.getSingleClause<OMPIfClause>())
3577     IfCond = C->getCondition();
3578 
3579   // Check if we have any device clause associated with the directive.
3580   const Expr *Device = nullptr;
3581   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3582     Device = C->getDevice();
3583 
3584   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3585 }
3586 
3587 void CodeGenFunction::EmitOMPTargetExitDataDirective(
3588     const OMPTargetExitDataDirective &S) {
3589   // If we don't have target devices, don't bother emitting the data mapping
3590   // code.
3591   if (CGM.getLangOpts().OMPTargetTriples.empty())
3592     return;
3593 
3594   // Check if we have any if clause associated with the directive.
3595   const Expr *IfCond = nullptr;
3596   if (auto *C = S.getSingleClause<OMPIfClause>())
3597     IfCond = C->getCondition();
3598 
3599   // Check if we have any device clause associated with the directive.
3600   const Expr *Device = nullptr;
3601   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3602     Device = C->getDevice();
3603 
3604   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3605 }
3606 
3607 void CodeGenFunction::EmitOMPTargetParallelDirective(
3608     const OMPTargetParallelDirective &S) {
3609   // TODO: codegen for target parallel.
3610 }
3611 
3612 void CodeGenFunction::EmitOMPTargetParallelForDirective(
3613     const OMPTargetParallelForDirective &S) {
3614   // TODO: codegen for target parallel for.
3615 }
3616 
3617 /// Emit a helper variable and return corresponding lvalue.
3618 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
3619                      const ImplicitParamDecl *PVD,
3620                      CodeGenFunction::OMPPrivateScope &Privates) {
3621   auto *VDecl = cast<VarDecl>(Helper->getDecl());
3622   Privates.addPrivate(
3623       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
3624 }
3625 
3626 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
3627   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
3628   // Emit outlined function for task construct.
3629   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3630   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3631   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3632   const Expr *IfCond = nullptr;
3633   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3634     if (C->getNameModifier() == OMPD_unknown ||
3635         C->getNameModifier() == OMPD_taskloop) {
3636       IfCond = C->getCondition();
3637       break;
3638     }
3639   }
3640 
3641   OMPTaskDataTy Data;
3642   // Check if taskloop must be emitted without taskgroup.
3643   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
3644   // TODO: Check if we should emit tied or untied task.
3645   Data.Tied = true;
3646   // Set scheduling for taskloop
3647   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
3648     // grainsize clause
3649     Data.Schedule.setInt(/*IntVal=*/false);
3650     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
3651   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
3652     // num_tasks clause
3653     Data.Schedule.setInt(/*IntVal=*/true);
3654     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
3655   }
3656 
3657   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
3658     // if (PreCond) {
3659     //   for (IV in 0..LastIteration) BODY;
3660     //   <Final counter/linear vars updates>;
3661     // }
3662     //
3663 
3664     // Emit: if (PreCond) - begin.
3665     // If the condition constant folds and can be elided, avoid emitting the
3666     // whole loop.
3667     bool CondConstant;
3668     llvm::BasicBlock *ContBlock = nullptr;
3669     OMPLoopScope PreInitScope(CGF, S);
3670     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3671       if (!CondConstant)
3672         return;
3673     } else {
3674       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
3675       ContBlock = CGF.createBasicBlock("taskloop.if.end");
3676       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
3677                   CGF.getProfileCount(&S));
3678       CGF.EmitBlock(ThenBlock);
3679       CGF.incrementProfileCounter(&S);
3680     }
3681 
3682     if (isOpenMPSimdDirective(S.getDirectiveKind()))
3683       CGF.EmitOMPSimdInit(S);
3684 
3685     OMPPrivateScope LoopScope(CGF);
3686     // Emit helper vars inits.
3687     enum { LowerBound = 5, UpperBound, Stride, LastIter };
3688     auto *I = CS->getCapturedDecl()->param_begin();
3689     auto *LBP = std::next(I, LowerBound);
3690     auto *UBP = std::next(I, UpperBound);
3691     auto *STP = std::next(I, Stride);
3692     auto *LIP = std::next(I, LastIter);
3693     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
3694              LoopScope);
3695     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
3696              LoopScope);
3697     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
3698     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
3699              LoopScope);
3700     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
3701     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3702     (void)LoopScope.Privatize();
3703     // Emit the loop iteration variable.
3704     const Expr *IVExpr = S.getIterationVariable();
3705     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
3706     CGF.EmitVarDecl(*IVDecl);
3707     CGF.EmitIgnoredExpr(S.getInit());
3708 
3709     // Emit the iterations count variable.
3710     // If it is not a variable, Sema decided to calculate iterations count on
3711     // each iteration (e.g., it is foldable into a constant).
3712     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3713       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3714       // Emit calculation of the iterations count.
3715       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
3716     }
3717 
3718     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
3719                          S.getInc(),
3720                          [&S](CodeGenFunction &CGF) {
3721                            CGF.EmitOMPLoopBody(S, JumpDest());
3722                            CGF.EmitStopPoint(&S);
3723                          },
3724                          [](CodeGenFunction &) {});
3725     // Emit: if (PreCond) - end.
3726     if (ContBlock) {
3727       CGF.EmitBranch(ContBlock);
3728       CGF.EmitBlock(ContBlock, true);
3729     }
3730     // Emit final copy of the lastprivate variables if IsLastIter != 0.
3731     if (HasLastprivateClause) {
3732       CGF.EmitOMPLastprivateClauseFinal(
3733           S, isOpenMPSimdDirective(S.getDirectiveKind()),
3734           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
3735               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
3736               (*LIP)->getType(), S.getLocStart())));
3737     }
3738   };
3739   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3740                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3741                             const OMPTaskDataTy &Data) {
3742     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
3743       OMPLoopScope PreInitScope(CGF, S);
3744       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
3745                                                   OutlinedFn, SharedsTy,
3746                                                   CapturedStruct, IfCond, Data);
3747     };
3748     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
3749                                                     CodeGen);
3750   };
3751   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
3752 }
3753 
3754 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
3755   EmitOMPTaskLoopBasedDirective(S);
3756 }
3757 
3758 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
3759     const OMPTaskLoopSimdDirective &S) {
3760   EmitOMPTaskLoopBasedDirective(S);
3761 }
3762 
3763 // Generate the instructions for '#pragma omp target update' directive.
3764 void CodeGenFunction::EmitOMPTargetUpdateDirective(
3765     const OMPTargetUpdateDirective &S) {
3766   // If we don't have target devices, don't bother emitting the data mapping
3767   // code.
3768   if (CGM.getLangOpts().OMPTargetTriples.empty())
3769     return;
3770 
3771   // Check if we have any if clause associated with the directive.
3772   const Expr *IfCond = nullptr;
3773   if (auto *C = S.getSingleClause<OMPIfClause>())
3774     IfCond = C->getCondition();
3775 
3776   // Check if we have any device clause associated with the directive.
3777   const Expr *Device = nullptr;
3778   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3779     Device = C->getDevice();
3780 
3781   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3782 }
3783