1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     emitPreInitStmt(CGF, S);
61     if (AsInlined) {
62       if (S.hasAssociatedStmt()) {
63         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
64         for (auto &C : CS->captures()) {
65           if (C.capturesVariable() || C.capturesVariableByCopy()) {
66             auto *VD = C.getCapturedVar();
67             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
68                             isCapturedVar(CGF, VD) ||
69                                 (CGF.CapturedStmtInfo &&
70                                  InlinedShareds.isGlobalVarCaptured(VD)),
71                             VD->getType().getNonReferenceType(), VK_LValue,
72                             SourceLocation());
73             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
74               return CGF.EmitLValue(&DRE).getAddress();
75             });
76           }
77         }
78         (void)InlinedShareds.Privatize();
79       }
80     }
81   }
82 };
83 
84 /// Private scope for OpenMP loop-based directives, that supports capturing
85 /// of used expression from loop statement.
86 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
87   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
88     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
89       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
90         for (const auto *I : PreInits->decls())
91           CGF.EmitVarDecl(cast<VarDecl>(*I));
92       }
93     }
94   }
95 
96 public:
97   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
98       : CodeGenFunction::RunCleanupsScope(CGF) {
99     emitPreInitStmt(CGF, S);
100   }
101 };
102 
103 } // namespace
104 
105 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
106   auto &C = getContext();
107   llvm::Value *Size = nullptr;
108   auto SizeInChars = C.getTypeSizeInChars(Ty);
109   if (SizeInChars.isZero()) {
110     // getTypeSizeInChars() returns 0 for a VLA.
111     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
112       llvm::Value *ArraySize;
113       std::tie(ArraySize, Ty) = getVLASize(VAT);
114       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
115     }
116     SizeInChars = C.getTypeSizeInChars(Ty);
117     if (SizeInChars.isZero())
118       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
119     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
120   } else
121     Size = CGM.getSize(SizeInChars);
122   return Size;
123 }
124 
125 void CodeGenFunction::GenerateOpenMPCapturedVars(
126     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
127   const RecordDecl *RD = S.getCapturedRecordDecl();
128   auto CurField = RD->field_begin();
129   auto CurCap = S.captures().begin();
130   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
131                                                  E = S.capture_init_end();
132        I != E; ++I, ++CurField, ++CurCap) {
133     if (CurField->hasCapturedVLAType()) {
134       auto VAT = CurField->getCapturedVLAType();
135       auto *Val = VLASizeMap[VAT->getSizeExpr()];
136       CapturedVars.push_back(Val);
137     } else if (CurCap->capturesThis())
138       CapturedVars.push_back(CXXThisValue);
139     else if (CurCap->capturesVariableByCopy()) {
140       llvm::Value *CV =
141           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
142 
143       // If the field is not a pointer, we need to save the actual value
144       // and load it as a void pointer.
145       if (!CurField->getType()->isAnyPointerType()) {
146         auto &Ctx = getContext();
147         auto DstAddr = CreateMemTemp(
148             Ctx.getUIntPtrType(),
149             Twine(CurCap->getCapturedVar()->getName()) + ".casted");
150         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
151 
152         auto *SrcAddrVal = EmitScalarConversion(
153             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
154             Ctx.getPointerType(CurField->getType()), SourceLocation());
155         LValue SrcLV =
156             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
157 
158         // Store the value using the source type pointer.
159         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
160 
161         // Load the value using the destination type pointer.
162         CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
163       }
164       CapturedVars.push_back(CV);
165     } else {
166       assert(CurCap->capturesVariable() && "Expected capture by reference.");
167       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
168     }
169   }
170 }
171 
172 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
173                                     StringRef Name, LValue AddrLV,
174                                     bool isReferenceType = false) {
175   ASTContext &Ctx = CGF.getContext();
176 
177   auto *CastedPtr = CGF.EmitScalarConversion(
178       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
179       Ctx.getPointerType(DstType), SourceLocation());
180   auto TmpAddr =
181       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
182           .getAddress();
183 
184   // If we are dealing with references we need to return the address of the
185   // reference instead of the reference of the value.
186   if (isReferenceType) {
187     QualType RefType = Ctx.getLValueReferenceType(DstType);
188     auto *RefVal = TmpAddr.getPointer();
189     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
190     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
191     CGF.EmitScalarInit(RefVal, TmpLVal);
192   }
193 
194   return TmpAddr;
195 }
196 
197 llvm::Function *
198 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
199   assert(
200       CapturedStmtInfo &&
201       "CapturedStmtInfo should be set when generating the captured function");
202   const CapturedDecl *CD = S.getCapturedDecl();
203   const RecordDecl *RD = S.getCapturedRecordDecl();
204   assert(CD->hasBody() && "missing CapturedDecl body");
205 
206   // Build the argument list.
207   ASTContext &Ctx = CGM.getContext();
208   FunctionArgList Args;
209   Args.append(CD->param_begin(),
210               std::next(CD->param_begin(), CD->getContextParamPosition()));
211   auto I = S.captures().begin();
212   for (auto *FD : RD->fields()) {
213     QualType ArgType = FD->getType();
214     IdentifierInfo *II = nullptr;
215     VarDecl *CapVar = nullptr;
216 
217     // If this is a capture by copy and the type is not a pointer, the outlined
218     // function argument type should be uintptr and the value properly casted to
219     // uintptr. This is necessary given that the runtime library is only able to
220     // deal with pointers. We can pass in the same way the VLA type sizes to the
221     // outlined function.
222     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
223         I->capturesVariableArrayType())
224       ArgType = Ctx.getUIntPtrType();
225 
226     if (I->capturesVariable() || I->capturesVariableByCopy()) {
227       CapVar = I->getCapturedVar();
228       II = CapVar->getIdentifier();
229     } else if (I->capturesThis())
230       II = &getContext().Idents.get("this");
231     else {
232       assert(I->capturesVariableArrayType());
233       II = &getContext().Idents.get("vla");
234     }
235     if (ArgType->isVariablyModifiedType())
236       ArgType = getContext().getVariableArrayDecayedType(ArgType);
237     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
238                                              FD->getLocation(), II, ArgType));
239     ++I;
240   }
241   Args.append(
242       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
243       CD->param_end());
244 
245   // Create the function declaration.
246   FunctionType::ExtInfo ExtInfo;
247   const CGFunctionInfo &FuncInfo =
248       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
249   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
250 
251   llvm::Function *F = llvm::Function::Create(
252       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
253       CapturedStmtInfo->getHelperName(), &CGM.getModule());
254   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
255   if (CD->isNothrow())
256     F->addFnAttr(llvm::Attribute::NoUnwind);
257 
258   // Generate the function.
259   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
260                 CD->getBody()->getLocStart());
261   unsigned Cnt = CD->getContextParamPosition();
262   I = S.captures().begin();
263   for (auto *FD : RD->fields()) {
264     // If we are capturing a pointer by copy we don't need to do anything, just
265     // use the value that we get from the arguments.
266     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
267       const VarDecl *CurVD = I->getCapturedVar();
268       Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]);
269       // If the variable is a reference we need to materialize it here.
270       if (CurVD->getType()->isReferenceType()) {
271         Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(),
272                                         ".materialized_ref");
273         EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false,
274                           CurVD->getType());
275         LocalAddr = RefAddr;
276       }
277       setAddrOfLocalVar(CurVD, LocalAddr);
278       ++Cnt;
279       ++I;
280       continue;
281     }
282 
283     LValue ArgLVal =
284         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
285                        AlignmentSource::Decl);
286     if (FD->hasCapturedVLAType()) {
287       LValue CastedArgLVal =
288           MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
289                                               Args[Cnt]->getName(), ArgLVal),
290                          FD->getType(), AlignmentSource::Decl);
291       auto *ExprArg =
292           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
293       auto VAT = FD->getCapturedVLAType();
294       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
295     } else if (I->capturesVariable()) {
296       auto *Var = I->getCapturedVar();
297       QualType VarTy = Var->getType();
298       Address ArgAddr = ArgLVal.getAddress();
299       if (!VarTy->isReferenceType()) {
300         ArgAddr = EmitLoadOfReference(
301             ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
302       }
303       setAddrOfLocalVar(
304           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
305     } else if (I->capturesVariableByCopy()) {
306       assert(!FD->getType()->isAnyPointerType() &&
307              "Not expecting a captured pointer.");
308       auto *Var = I->getCapturedVar();
309       QualType VarTy = Var->getType();
310       setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(),
311                                                   Args[Cnt]->getName(), ArgLVal,
312                                                   VarTy->isReferenceType()));
313     } else {
314       // If 'this' is captured, load it into CXXThisValue.
315       assert(I->capturesThis());
316       CXXThisValue =
317           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
318     }
319     ++Cnt;
320     ++I;
321   }
322 
323   PGO.assignRegionCounters(GlobalDecl(CD), F);
324   CapturedStmtInfo->EmitBody(*this, CD->getBody());
325   FinishFunction(CD->getBodyRBrace());
326 
327   return F;
328 }
329 
330 //===----------------------------------------------------------------------===//
331 //                              OpenMP Directive Emission
332 //===----------------------------------------------------------------------===//
333 void CodeGenFunction::EmitOMPAggregateAssign(
334     Address DestAddr, Address SrcAddr, QualType OriginalType,
335     const llvm::function_ref<void(Address, Address)> &CopyGen) {
336   // Perform element-by-element initialization.
337   QualType ElementTy;
338 
339   // Drill down to the base element type on both arrays.
340   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
341   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
342   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
343 
344   auto SrcBegin = SrcAddr.getPointer();
345   auto DestBegin = DestAddr.getPointer();
346   // Cast from pointer to array type to pointer to single element.
347   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
348   // The basic structure here is a while-do loop.
349   auto BodyBB = createBasicBlock("omp.arraycpy.body");
350   auto DoneBB = createBasicBlock("omp.arraycpy.done");
351   auto IsEmpty =
352       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
353   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
354 
355   // Enter the loop body, making that address the current address.
356   auto EntryBB = Builder.GetInsertBlock();
357   EmitBlock(BodyBB);
358 
359   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
360 
361   llvm::PHINode *SrcElementPHI =
362     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
363   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
364   Address SrcElementCurrent =
365       Address(SrcElementPHI,
366               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
367 
368   llvm::PHINode *DestElementPHI =
369     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
370   DestElementPHI->addIncoming(DestBegin, EntryBB);
371   Address DestElementCurrent =
372     Address(DestElementPHI,
373             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
374 
375   // Emit copy.
376   CopyGen(DestElementCurrent, SrcElementCurrent);
377 
378   // Shift the address forward by one element.
379   auto DestElementNext = Builder.CreateConstGEP1_32(
380       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
381   auto SrcElementNext = Builder.CreateConstGEP1_32(
382       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
383   // Check whether we've reached the end.
384   auto Done =
385       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
386   Builder.CreateCondBr(Done, DoneBB, BodyBB);
387   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
388   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
389 
390   // Done.
391   EmitBlock(DoneBB, /*IsFinished=*/true);
392 }
393 
394 /// Check if the combiner is a call to UDR combiner and if it is so return the
395 /// UDR decl used for reduction.
396 static const OMPDeclareReductionDecl *
397 getReductionInit(const Expr *ReductionOp) {
398   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
399     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
400       if (auto *DRE =
401               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
402         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
403           return DRD;
404   return nullptr;
405 }
406 
407 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
408                                              const OMPDeclareReductionDecl *DRD,
409                                              const Expr *InitOp,
410                                              Address Private, Address Original,
411                                              QualType Ty) {
412   if (DRD->getInitializer()) {
413     std::pair<llvm::Function *, llvm::Function *> Reduction =
414         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
415     auto *CE = cast<CallExpr>(InitOp);
416     auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
417     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
418     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
419     auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
420     auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
421     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
422     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
423                             [=]() -> Address { return Private; });
424     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
425                             [=]() -> Address { return Original; });
426     (void)PrivateScope.Privatize();
427     RValue Func = RValue::get(Reduction.second);
428     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
429     CGF.EmitIgnoredExpr(InitOp);
430   } else {
431     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
432     auto *GV = new llvm::GlobalVariable(
433         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
434         llvm::GlobalValue::PrivateLinkage, Init, ".init");
435     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
436     RValue InitRVal;
437     switch (CGF.getEvaluationKind(Ty)) {
438     case TEK_Scalar:
439       InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
440       break;
441     case TEK_Complex:
442       InitRVal =
443           RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
444       break;
445     case TEK_Aggregate:
446       InitRVal = RValue::getAggregate(LV.getAddress());
447       break;
448     }
449     OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
450     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
451     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
452                          /*IsInitializer=*/false);
453   }
454 }
455 
456 /// \brief Emit initialization of arrays of complex types.
457 /// \param DestAddr Address of the array.
458 /// \param Type Type of array.
459 /// \param Init Initial expression of array.
460 /// \param SrcAddr Address of the original array.
461 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
462                                  QualType Type, const Expr *Init,
463                                  Address SrcAddr = Address::invalid()) {
464   auto *DRD = getReductionInit(Init);
465   // Perform element-by-element initialization.
466   QualType ElementTy;
467 
468   // Drill down to the base element type on both arrays.
469   auto ArrayTy = Type->getAsArrayTypeUnsafe();
470   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
471   DestAddr =
472       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
473   if (DRD)
474     SrcAddr =
475         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
476 
477   llvm::Value *SrcBegin = nullptr;
478   if (DRD)
479     SrcBegin = SrcAddr.getPointer();
480   auto DestBegin = DestAddr.getPointer();
481   // Cast from pointer to array type to pointer to single element.
482   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
483   // The basic structure here is a while-do loop.
484   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
485   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
486   auto IsEmpty =
487       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
488   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
489 
490   // Enter the loop body, making that address the current address.
491   auto EntryBB = CGF.Builder.GetInsertBlock();
492   CGF.EmitBlock(BodyBB);
493 
494   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
495 
496   llvm::PHINode *SrcElementPHI = nullptr;
497   Address SrcElementCurrent = Address::invalid();
498   if (DRD) {
499     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
500                                           "omp.arraycpy.srcElementPast");
501     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
502     SrcElementCurrent =
503         Address(SrcElementPHI,
504                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
505   }
506   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
507       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
508   DestElementPHI->addIncoming(DestBegin, EntryBB);
509   Address DestElementCurrent =
510       Address(DestElementPHI,
511               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
512 
513   // Emit copy.
514   {
515     CodeGenFunction::RunCleanupsScope InitScope(CGF);
516     if (DRD && (DRD->getInitializer() || !Init)) {
517       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
518                                        SrcElementCurrent, ElementTy);
519     } else
520       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
521                            /*IsInitializer=*/false);
522   }
523 
524   if (DRD) {
525     // Shift the address forward by one element.
526     auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
527         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
528     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
529   }
530 
531   // Shift the address forward by one element.
532   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
533       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
534   // Check whether we've reached the end.
535   auto Done =
536       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
537   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
538   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
539 
540   // Done.
541   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
542 }
543 
544 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
545                                   Address SrcAddr, const VarDecl *DestVD,
546                                   const VarDecl *SrcVD, const Expr *Copy) {
547   if (OriginalType->isArrayType()) {
548     auto *BO = dyn_cast<BinaryOperator>(Copy);
549     if (BO && BO->getOpcode() == BO_Assign) {
550       // Perform simple memcpy for simple copying.
551       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
552     } else {
553       // For arrays with complex element types perform element by element
554       // copying.
555       EmitOMPAggregateAssign(
556           DestAddr, SrcAddr, OriginalType,
557           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
558             // Working with the single array element, so have to remap
559             // destination and source variables to corresponding array
560             // elements.
561             CodeGenFunction::OMPPrivateScope Remap(*this);
562             Remap.addPrivate(DestVD, [DestElement]() -> Address {
563               return DestElement;
564             });
565             Remap.addPrivate(
566                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
567             (void)Remap.Privatize();
568             EmitIgnoredExpr(Copy);
569           });
570     }
571   } else {
572     // Remap pseudo source variable to private copy.
573     CodeGenFunction::OMPPrivateScope Remap(*this);
574     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
575     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
576     (void)Remap.Privatize();
577     // Emit copying of the whole variable.
578     EmitIgnoredExpr(Copy);
579   }
580 }
581 
582 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
583                                                 OMPPrivateScope &PrivateScope) {
584   if (!HaveInsertPoint())
585     return false;
586   bool FirstprivateIsLastprivate = false;
587   llvm::DenseSet<const VarDecl *> Lastprivates;
588   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
589     for (const auto *D : C->varlists())
590       Lastprivates.insert(
591           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
592   }
593   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
594   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
595   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
596     auto IRef = C->varlist_begin();
597     auto InitsRef = C->inits().begin();
598     for (auto IInit : C->private_copies()) {
599       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
600       bool ThisFirstprivateIsLastprivate =
601           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
602       auto *CapFD = CapturesInfo.lookup(OrigVD);
603       auto *FD = CapturedStmtInfo->lookup(OrigVD);
604       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
605           !FD->getType()->isReferenceType()) {
606         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
607         ++IRef;
608         ++InitsRef;
609         continue;
610       }
611       FirstprivateIsLastprivate =
612           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
613       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
614         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
615         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
616         bool IsRegistered;
617         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
618                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
619                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
620         Address OriginalAddr = EmitLValue(&DRE).getAddress();
621         QualType Type = VD->getType();
622         if (Type->isArrayType()) {
623           // Emit VarDecl with copy init for arrays.
624           // Get the address of the original variable captured in current
625           // captured region.
626           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
627             auto Emission = EmitAutoVarAlloca(*VD);
628             auto *Init = VD->getInit();
629             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
630               // Perform simple memcpy.
631               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
632                                   Type);
633             } else {
634               EmitOMPAggregateAssign(
635                   Emission.getAllocatedAddress(), OriginalAddr, Type,
636                   [this, VDInit, Init](Address DestElement,
637                                        Address SrcElement) {
638                     // Clean up any temporaries needed by the initialization.
639                     RunCleanupsScope InitScope(*this);
640                     // Emit initialization for single element.
641                     setAddrOfLocalVar(VDInit, SrcElement);
642                     EmitAnyExprToMem(Init, DestElement,
643                                      Init->getType().getQualifiers(),
644                                      /*IsInitializer*/ false);
645                     LocalDeclMap.erase(VDInit);
646                   });
647             }
648             EmitAutoVarCleanups(Emission);
649             return Emission.getAllocatedAddress();
650           });
651         } else {
652           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
653             // Emit private VarDecl with copy init.
654             // Remap temp VDInit variable to the address of the original
655             // variable
656             // (for proper handling of captured global variables).
657             setAddrOfLocalVar(VDInit, OriginalAddr);
658             EmitDecl(*VD);
659             LocalDeclMap.erase(VDInit);
660             return GetAddrOfLocalVar(VD);
661           });
662         }
663         assert(IsRegistered &&
664                "firstprivate var already registered as private");
665         // Silence the warning about unused variable.
666         (void)IsRegistered;
667       }
668       ++IRef;
669       ++InitsRef;
670     }
671   }
672   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
673 }
674 
675 void CodeGenFunction::EmitOMPPrivateClause(
676     const OMPExecutableDirective &D,
677     CodeGenFunction::OMPPrivateScope &PrivateScope) {
678   if (!HaveInsertPoint())
679     return;
680   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
681   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
682     auto IRef = C->varlist_begin();
683     for (auto IInit : C->private_copies()) {
684       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
685       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
686         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
687         bool IsRegistered =
688             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
689               // Emit private VarDecl with copy init.
690               EmitDecl(*VD);
691               return GetAddrOfLocalVar(VD);
692             });
693         assert(IsRegistered && "private var already registered as private");
694         // Silence the warning about unused variable.
695         (void)IsRegistered;
696       }
697       ++IRef;
698     }
699   }
700 }
701 
702 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
703   if (!HaveInsertPoint())
704     return false;
705   // threadprivate_var1 = master_threadprivate_var1;
706   // operator=(threadprivate_var2, master_threadprivate_var2);
707   // ...
708   // __kmpc_barrier(&loc, global_tid);
709   llvm::DenseSet<const VarDecl *> CopiedVars;
710   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
711   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
712     auto IRef = C->varlist_begin();
713     auto ISrcRef = C->source_exprs().begin();
714     auto IDestRef = C->destination_exprs().begin();
715     for (auto *AssignOp : C->assignment_ops()) {
716       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
717       QualType Type = VD->getType();
718       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
719         // Get the address of the master variable. If we are emitting code with
720         // TLS support, the address is passed from the master as field in the
721         // captured declaration.
722         Address MasterAddr = Address::invalid();
723         if (getLangOpts().OpenMPUseTLS &&
724             getContext().getTargetInfo().isTLSSupported()) {
725           assert(CapturedStmtInfo->lookup(VD) &&
726                  "Copyin threadprivates should have been captured!");
727           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
728                           VK_LValue, (*IRef)->getExprLoc());
729           MasterAddr = EmitLValue(&DRE).getAddress();
730           LocalDeclMap.erase(VD);
731         } else {
732           MasterAddr =
733             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
734                                         : CGM.GetAddrOfGlobal(VD),
735                     getContext().getDeclAlign(VD));
736         }
737         // Get the address of the threadprivate variable.
738         Address PrivateAddr = EmitLValue(*IRef).getAddress();
739         if (CopiedVars.size() == 1) {
740           // At first check if current thread is a master thread. If it is, no
741           // need to copy data.
742           CopyBegin = createBasicBlock("copyin.not.master");
743           CopyEnd = createBasicBlock("copyin.not.master.end");
744           Builder.CreateCondBr(
745               Builder.CreateICmpNE(
746                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
747                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
748               CopyBegin, CopyEnd);
749           EmitBlock(CopyBegin);
750         }
751         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
752         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
753         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
754       }
755       ++IRef;
756       ++ISrcRef;
757       ++IDestRef;
758     }
759   }
760   if (CopyEnd) {
761     // Exit out of copying procedure for non-master thread.
762     EmitBlock(CopyEnd, /*IsFinished=*/true);
763     return true;
764   }
765   return false;
766 }
767 
768 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
769     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
770   if (!HaveInsertPoint())
771     return false;
772   bool HasAtLeastOneLastprivate = false;
773   llvm::DenseSet<const VarDecl *> SIMDLCVs;
774   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
775     auto *LoopDirective = cast<OMPLoopDirective>(&D);
776     for (auto *C : LoopDirective->counters()) {
777       SIMDLCVs.insert(
778           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
779     }
780   }
781   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
782   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
783     HasAtLeastOneLastprivate = true;
784     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
785       break;
786     auto IRef = C->varlist_begin();
787     auto IDestRef = C->destination_exprs().begin();
788     for (auto *IInit : C->private_copies()) {
789       // Keep the address of the original variable for future update at the end
790       // of the loop.
791       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
792       // Taskloops do not require additional initialization, it is done in
793       // runtime support library.
794       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
795         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
796         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
797           DeclRefExpr DRE(
798               const_cast<VarDecl *>(OrigVD),
799               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
800                   OrigVD) != nullptr,
801               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
802           return EmitLValue(&DRE).getAddress();
803         });
804         // Check if the variable is also a firstprivate: in this case IInit is
805         // not generated. Initialization of this variable will happen in codegen
806         // for 'firstprivate' clause.
807         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
808           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
809           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
810             // Emit private VarDecl with copy init.
811             EmitDecl(*VD);
812             return GetAddrOfLocalVar(VD);
813           });
814           assert(IsRegistered &&
815                  "lastprivate var already registered as private");
816           (void)IsRegistered;
817         }
818       }
819       ++IRef;
820       ++IDestRef;
821     }
822   }
823   return HasAtLeastOneLastprivate;
824 }
825 
826 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
827     const OMPExecutableDirective &D, bool NoFinals,
828     llvm::Value *IsLastIterCond) {
829   if (!HaveInsertPoint())
830     return;
831   // Emit following code:
832   // if (<IsLastIterCond>) {
833   //   orig_var1 = private_orig_var1;
834   //   ...
835   //   orig_varn = private_orig_varn;
836   // }
837   llvm::BasicBlock *ThenBB = nullptr;
838   llvm::BasicBlock *DoneBB = nullptr;
839   if (IsLastIterCond) {
840     ThenBB = createBasicBlock(".omp.lastprivate.then");
841     DoneBB = createBasicBlock(".omp.lastprivate.done");
842     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
843     EmitBlock(ThenBB);
844   }
845   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
846   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
847   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
848     auto IC = LoopDirective->counters().begin();
849     for (auto F : LoopDirective->finals()) {
850       auto *D =
851           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
852       if (NoFinals)
853         AlreadyEmittedVars.insert(D);
854       else
855         LoopCountersAndUpdates[D] = F;
856       ++IC;
857     }
858   }
859   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
860     auto IRef = C->varlist_begin();
861     auto ISrcRef = C->source_exprs().begin();
862     auto IDestRef = C->destination_exprs().begin();
863     for (auto *AssignOp : C->assignment_ops()) {
864       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
865       QualType Type = PrivateVD->getType();
866       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
867       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
868         // If lastprivate variable is a loop control variable for loop-based
869         // directive, update its value before copyin back to original
870         // variable.
871         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
872           EmitIgnoredExpr(FinalExpr);
873         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
874         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
875         // Get the address of the original variable.
876         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
877         // Get the address of the private variable.
878         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
879         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
880           PrivateAddr =
881               Address(Builder.CreateLoad(PrivateAddr),
882                       getNaturalTypeAlignment(RefTy->getPointeeType()));
883         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
884       }
885       ++IRef;
886       ++ISrcRef;
887       ++IDestRef;
888     }
889     if (auto *PostUpdate = C->getPostUpdateExpr())
890       EmitIgnoredExpr(PostUpdate);
891   }
892   if (IsLastIterCond)
893     EmitBlock(DoneBB, /*IsFinished=*/true);
894 }
895 
896 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
897                           LValue BaseLV, llvm::Value *Addr) {
898   Address Tmp = Address::invalid();
899   Address TopTmp = Address::invalid();
900   Address MostTopTmp = Address::invalid();
901   BaseTy = BaseTy.getNonReferenceType();
902   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
903          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
904     Tmp = CGF.CreateMemTemp(BaseTy);
905     if (TopTmp.isValid())
906       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
907     else
908       MostTopTmp = Tmp;
909     TopTmp = Tmp;
910     BaseTy = BaseTy->getPointeeType();
911   }
912   llvm::Type *Ty = BaseLV.getPointer()->getType();
913   if (Tmp.isValid())
914     Ty = Tmp.getElementType();
915   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
916   if (Tmp.isValid()) {
917     CGF.Builder.CreateStore(Addr, Tmp);
918     return MostTopTmp;
919   }
920   return Address(Addr, BaseLV.getAlignment());
921 }
922 
923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
924                           LValue BaseLV) {
925   BaseTy = BaseTy.getNonReferenceType();
926   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
927          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
928     if (auto *PtrTy = BaseTy->getAs<PointerType>())
929       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
930     else {
931       BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
932                                              BaseTy->castAs<ReferenceType>());
933     }
934     BaseTy = BaseTy->getPointeeType();
935   }
936   return CGF.MakeAddrLValue(
937       Address(
938           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
939               BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
940           BaseLV.getAlignment()),
941       BaseLV.getType(), BaseLV.getAlignmentSource());
942 }
943 
944 void CodeGenFunction::EmitOMPReductionClauseInit(
945     const OMPExecutableDirective &D,
946     CodeGenFunction::OMPPrivateScope &PrivateScope) {
947   if (!HaveInsertPoint())
948     return;
949   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
950     auto ILHS = C->lhs_exprs().begin();
951     auto IRHS = C->rhs_exprs().begin();
952     auto IPriv = C->privates().begin();
953     auto IRed = C->reduction_ops().begin();
954     for (auto IRef : C->varlists()) {
955       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
956       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
957       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
958       auto *DRD = getReductionInit(*IRed);
959       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
960         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
961         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
962           Base = TempOASE->getBase()->IgnoreParenImpCasts();
963         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
964           Base = TempASE->getBase()->IgnoreParenImpCasts();
965         auto *DE = cast<DeclRefExpr>(Base);
966         auto *OrigVD = cast<VarDecl>(DE->getDecl());
967         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
968         auto OASELValueUB =
969             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
970         auto OriginalBaseLValue = EmitLValue(DE);
971         LValue BaseLValue =
972             loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
973                         OriginalBaseLValue);
974         // Store the address of the original variable associated with the LHS
975         // implicit variable.
976         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
977           return OASELValueLB.getAddress();
978         });
979         // Emit reduction copy.
980         bool IsRegistered = PrivateScope.addPrivate(
981             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
982                      OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
983               // Emit VarDecl with copy init for arrays.
984               // Get the address of the original variable captured in current
985               // captured region.
986               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
987                                                  OASELValueLB.getPointer());
988               Size = Builder.CreateNUWAdd(
989                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
990               CodeGenFunction::OpaqueValueMapping OpaqueMap(
991                   *this, cast<OpaqueValueExpr>(
992                              getContext()
993                                  .getAsVariableArrayType(PrivateVD->getType())
994                                  ->getSizeExpr()),
995                   RValue::get(Size));
996               EmitVariablyModifiedType(PrivateVD->getType());
997               auto Emission = EmitAutoVarAlloca(*PrivateVD);
998               auto Addr = Emission.getAllocatedAddress();
999               auto *Init = PrivateVD->getInit();
1000               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1001                                    DRD ? *IRed : Init,
1002                                    OASELValueLB.getAddress());
1003               EmitAutoVarCleanups(Emission);
1004               // Emit private VarDecl with reduction init.
1005               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1006                                                    OASELValueLB.getPointer());
1007               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1008               return castToBase(*this, OrigVD->getType(),
1009                                 OASELValueLB.getType(), OriginalBaseLValue,
1010                                 Ptr);
1011             });
1012         assert(IsRegistered && "private var already registered as private");
1013         // Silence the warning about unused variable.
1014         (void)IsRegistered;
1015         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1016           return GetAddrOfLocalVar(PrivateVD);
1017         });
1018       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
1019         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1020         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1021           Base = TempASE->getBase()->IgnoreParenImpCasts();
1022         auto *DE = cast<DeclRefExpr>(Base);
1023         auto *OrigVD = cast<VarDecl>(DE->getDecl());
1024         auto ASELValue = EmitLValue(ASE);
1025         auto OriginalBaseLValue = EmitLValue(DE);
1026         LValue BaseLValue = loadToBegin(
1027             *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
1028         // Store the address of the original variable associated with the LHS
1029         // implicit variable.
1030         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
1031           return ASELValue.getAddress();
1032         });
1033         // Emit reduction copy.
1034         bool IsRegistered = PrivateScope.addPrivate(
1035             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
1036                      OriginalBaseLValue, DRD, IRed]() -> Address {
1037               // Emit private VarDecl with reduction init.
1038               AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1039               auto Addr = Emission.getAllocatedAddress();
1040               if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1041                 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1042                                                  ASELValue.getAddress(),
1043                                                  ASELValue.getType());
1044               } else
1045                 EmitAutoVarInit(Emission);
1046               EmitAutoVarCleanups(Emission);
1047               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1048                                                    ASELValue.getPointer());
1049               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1050               return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
1051                                 OriginalBaseLValue, Ptr);
1052             });
1053         assert(IsRegistered && "private var already registered as private");
1054         // Silence the warning about unused variable.
1055         (void)IsRegistered;
1056         PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1057           return Builder.CreateElementBitCast(
1058               GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
1059               "rhs.begin");
1060         });
1061       } else {
1062         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
1063         QualType Type = PrivateVD->getType();
1064         if (getContext().getAsArrayType(Type)) {
1065           // Store the address of the original variable associated with the LHS
1066           // implicit variable.
1067           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1068                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
1069                           IRef->getType(), VK_LValue, IRef->getExprLoc());
1070           Address OriginalAddr = EmitLValue(&DRE).getAddress();
1071           PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
1072                                           LHSVD]() -> Address {
1073             OriginalAddr = Builder.CreateElementBitCast(
1074                 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1075             return OriginalAddr;
1076           });
1077           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
1078             if (Type->isVariablyModifiedType()) {
1079               CodeGenFunction::OpaqueValueMapping OpaqueMap(
1080                   *this, cast<OpaqueValueExpr>(
1081                              getContext()
1082                                  .getAsVariableArrayType(PrivateVD->getType())
1083                                  ->getSizeExpr()),
1084                   RValue::get(
1085                       getTypeSize(OrigVD->getType().getNonReferenceType())));
1086               EmitVariablyModifiedType(Type);
1087             }
1088             auto Emission = EmitAutoVarAlloca(*PrivateVD);
1089             auto Addr = Emission.getAllocatedAddress();
1090             auto *Init = PrivateVD->getInit();
1091             EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1092                                  DRD ? *IRed : Init, OriginalAddr);
1093             EmitAutoVarCleanups(Emission);
1094             return Emission.getAllocatedAddress();
1095           });
1096           assert(IsRegistered && "private var already registered as private");
1097           // Silence the warning about unused variable.
1098           (void)IsRegistered;
1099           PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1100             return Builder.CreateElementBitCast(
1101                 GetAddrOfLocalVar(PrivateVD),
1102                 ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
1103           });
1104         } else {
1105           // Store the address of the original variable associated with the LHS
1106           // implicit variable.
1107           Address OriginalAddr = Address::invalid();
1108           PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
1109                                           &OriginalAddr]() -> Address {
1110             DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1111                             CapturedStmtInfo->lookup(OrigVD) != nullptr,
1112                             IRef->getType(), VK_LValue, IRef->getExprLoc());
1113             OriginalAddr = EmitLValue(&DRE).getAddress();
1114             return OriginalAddr;
1115           });
1116           // Emit reduction copy.
1117           bool IsRegistered = PrivateScope.addPrivate(
1118               OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
1119                 // Emit private VarDecl with reduction init.
1120                 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1121                 auto Addr = Emission.getAllocatedAddress();
1122                 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1123                   emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1124                                                    OriginalAddr,
1125                                                    PrivateVD->getType());
1126                 } else
1127                   EmitAutoVarInit(Emission);
1128                 EmitAutoVarCleanups(Emission);
1129                 return Addr;
1130               });
1131           assert(IsRegistered && "private var already registered as private");
1132           // Silence the warning about unused variable.
1133           (void)IsRegistered;
1134           PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1135             return GetAddrOfLocalVar(PrivateVD);
1136           });
1137         }
1138       }
1139       ++ILHS;
1140       ++IRHS;
1141       ++IPriv;
1142       ++IRed;
1143     }
1144   }
1145 }
1146 
1147 void CodeGenFunction::EmitOMPReductionClauseFinal(
1148     const OMPExecutableDirective &D) {
1149   if (!HaveInsertPoint())
1150     return;
1151   llvm::SmallVector<const Expr *, 8> Privates;
1152   llvm::SmallVector<const Expr *, 8> LHSExprs;
1153   llvm::SmallVector<const Expr *, 8> RHSExprs;
1154   llvm::SmallVector<const Expr *, 8> ReductionOps;
1155   bool HasAtLeastOneReduction = false;
1156   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1157     HasAtLeastOneReduction = true;
1158     Privates.append(C->privates().begin(), C->privates().end());
1159     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1160     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1161     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1162   }
1163   if (HasAtLeastOneReduction) {
1164     // Emit nowait reduction if nowait clause is present or directive is a
1165     // parallel directive (it always has implicit barrier).
1166     CGM.getOpenMPRuntime().emitReduction(
1167         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1168         D.getSingleClause<OMPNowaitClause>() ||
1169             isOpenMPParallelDirective(D.getDirectiveKind()) ||
1170             D.getDirectiveKind() == OMPD_simd,
1171         D.getDirectiveKind() == OMPD_simd);
1172   }
1173 }
1174 
1175 static void emitPostUpdateForReductionClause(
1176     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1177     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1178   if (!CGF.HaveInsertPoint())
1179     return;
1180   llvm::BasicBlock *DoneBB = nullptr;
1181   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1182     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1183       if (!DoneBB) {
1184         if (auto *Cond = CondGen(CGF)) {
1185           // If the first post-update expression is found, emit conditional
1186           // block if it was requested.
1187           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1188           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1189           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1190           CGF.EmitBlock(ThenBB);
1191         }
1192       }
1193       CGF.EmitIgnoredExpr(PostUpdate);
1194     }
1195   }
1196   if (DoneBB)
1197     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1198 }
1199 
1200 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
1201                                            const OMPExecutableDirective &S,
1202                                            OpenMPDirectiveKind InnermostKind,
1203                                            const RegionCodeGenTy &CodeGen) {
1204   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1205   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
1206       emitParallelOrTeamsOutlinedFunction(S,
1207           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1208   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1209     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1210     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1211                                          /*IgnoreResultAssign*/ true);
1212     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1213         CGF, NumThreads, NumThreadsClause->getLocStart());
1214   }
1215   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1216     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1217     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1218         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1219   }
1220   const Expr *IfCond = nullptr;
1221   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1222     if (C->getNameModifier() == OMPD_unknown ||
1223         C->getNameModifier() == OMPD_parallel) {
1224       IfCond = C->getCondition();
1225       break;
1226     }
1227   }
1228 
1229   OMPLexicalScope Scope(CGF, S);
1230   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1231   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1232   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1233                                               CapturedVars, IfCond);
1234 }
1235 
1236 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1237   // Emit parallel region as a standalone region.
1238   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1239     OMPPrivateScope PrivateScope(CGF);
1240     bool Copyins = CGF.EmitOMPCopyinClause(S);
1241     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1242     if (Copyins) {
1243       // Emit implicit barrier to synchronize threads and avoid data races on
1244       // propagation master's thread values of threadprivate variables to local
1245       // instances of that variables of all other implicit threads.
1246       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1247           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1248           /*ForceSimpleCall=*/true);
1249     }
1250     CGF.EmitOMPPrivateClause(S, PrivateScope);
1251     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1252     (void)PrivateScope.Privatize();
1253     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1254     CGF.EmitOMPReductionClauseFinal(S);
1255   };
1256   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
1257   emitPostUpdateForReductionClause(
1258       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1259 }
1260 
1261 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1262                                       JumpDest LoopExit) {
1263   RunCleanupsScope BodyScope(*this);
1264   // Update counters values on current iteration.
1265   for (auto I : D.updates()) {
1266     EmitIgnoredExpr(I);
1267   }
1268   // Update the linear variables.
1269   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1270     for (auto *U : C->updates())
1271       EmitIgnoredExpr(U);
1272   }
1273 
1274   // On a continue in the body, jump to the end.
1275   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1276   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1277   // Emit loop body.
1278   EmitStmt(D.getBody());
1279   // The end (updates/cleanups).
1280   EmitBlock(Continue.getBlock());
1281   BreakContinueStack.pop_back();
1282 }
1283 
1284 void CodeGenFunction::EmitOMPInnerLoop(
1285     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1286     const Expr *IncExpr,
1287     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1288     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1289   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1290 
1291   // Start the loop with a block that tests the condition.
1292   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1293   EmitBlock(CondBlock);
1294   LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
1295 
1296   // If there are any cleanups between here and the loop-exit scope,
1297   // create a block to stage a loop exit along.
1298   auto ExitBlock = LoopExit.getBlock();
1299   if (RequiresCleanup)
1300     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1301 
1302   auto LoopBody = createBasicBlock("omp.inner.for.body");
1303 
1304   // Emit condition.
1305   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1306   if (ExitBlock != LoopExit.getBlock()) {
1307     EmitBlock(ExitBlock);
1308     EmitBranchThroughCleanup(LoopExit);
1309   }
1310 
1311   EmitBlock(LoopBody);
1312   incrementProfileCounter(&S);
1313 
1314   // Create a block for the increment.
1315   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1316   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1317 
1318   BodyGen(*this);
1319 
1320   // Emit "IV = IV + 1" and a back-edge to the condition block.
1321   EmitBlock(Continue.getBlock());
1322   EmitIgnoredExpr(IncExpr);
1323   PostIncGen(*this);
1324   BreakContinueStack.pop_back();
1325   EmitBranch(CondBlock);
1326   LoopStack.pop();
1327   // Emit the fall-through block.
1328   EmitBlock(LoopExit.getBlock());
1329 }
1330 
1331 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1332   if (!HaveInsertPoint())
1333     return;
1334   // Emit inits for the linear variables.
1335   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1336     for (auto *Init : C->inits()) {
1337       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1338       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1339         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1340         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1341         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1342                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1343                         VD->getInit()->getType(), VK_LValue,
1344                         VD->getInit()->getExprLoc());
1345         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1346                                                 VD->getType()),
1347                        /*capturedByInit=*/false);
1348         EmitAutoVarCleanups(Emission);
1349       } else
1350         EmitVarDecl(*VD);
1351     }
1352     // Emit the linear steps for the linear clauses.
1353     // If a step is not constant, it is pre-calculated before the loop.
1354     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1355       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1356         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1357         // Emit calculation of the linear step.
1358         EmitIgnoredExpr(CS);
1359       }
1360   }
1361 }
1362 
1363 void CodeGenFunction::EmitOMPLinearClauseFinal(
1364     const OMPLoopDirective &D,
1365     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1366   if (!HaveInsertPoint())
1367     return;
1368   llvm::BasicBlock *DoneBB = nullptr;
1369   // Emit the final values of the linear variables.
1370   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1371     auto IC = C->varlist_begin();
1372     for (auto *F : C->finals()) {
1373       if (!DoneBB) {
1374         if (auto *Cond = CondGen(*this)) {
1375           // If the first post-update expression is found, emit conditional
1376           // block if it was requested.
1377           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1378           DoneBB = createBasicBlock(".omp.linear.pu.done");
1379           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1380           EmitBlock(ThenBB);
1381         }
1382       }
1383       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1384       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1385                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1386                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1387       Address OrigAddr = EmitLValue(&DRE).getAddress();
1388       CodeGenFunction::OMPPrivateScope VarScope(*this);
1389       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1390       (void)VarScope.Privatize();
1391       EmitIgnoredExpr(F);
1392       ++IC;
1393     }
1394     if (auto *PostUpdate = C->getPostUpdateExpr())
1395       EmitIgnoredExpr(PostUpdate);
1396   }
1397   if (DoneBB)
1398     EmitBlock(DoneBB, /*IsFinished=*/true);
1399 }
1400 
1401 static void emitAlignedClause(CodeGenFunction &CGF,
1402                               const OMPExecutableDirective &D) {
1403   if (!CGF.HaveInsertPoint())
1404     return;
1405   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1406     unsigned ClauseAlignment = 0;
1407     if (auto AlignmentExpr = Clause->getAlignment()) {
1408       auto AlignmentCI =
1409           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1410       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1411     }
1412     for (auto E : Clause->varlists()) {
1413       unsigned Alignment = ClauseAlignment;
1414       if (Alignment == 0) {
1415         // OpenMP [2.8.1, Description]
1416         // If no optional parameter is specified, implementation-defined default
1417         // alignments for SIMD instructions on the target platforms are assumed.
1418         Alignment =
1419             CGF.getContext()
1420                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1421                     E->getType()->getPointeeType()))
1422                 .getQuantity();
1423       }
1424       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1425              "alignment is not power of 2");
1426       if (Alignment != 0) {
1427         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1428         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1429       }
1430     }
1431   }
1432 }
1433 
1434 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1435     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1436   if (!HaveInsertPoint())
1437     return;
1438   auto I = S.private_counters().begin();
1439   for (auto *E : S.counters()) {
1440     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1441     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1442     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1443       // Emit var without initialization.
1444       if (!LocalDeclMap.count(PrivateVD)) {
1445         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1446         EmitAutoVarCleanups(VarEmission);
1447       }
1448       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1449                       /*RefersToEnclosingVariableOrCapture=*/false,
1450                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1451       return EmitLValue(&DRE).getAddress();
1452     });
1453     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1454         VD->hasGlobalStorage()) {
1455       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1456         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1457                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1458                         E->getType(), VK_LValue, E->getExprLoc());
1459         return EmitLValue(&DRE).getAddress();
1460       });
1461     }
1462     ++I;
1463   }
1464 }
1465 
1466 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1467                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1468                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1469   if (!CGF.HaveInsertPoint())
1470     return;
1471   {
1472     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1473     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1474     (void)PreCondScope.Privatize();
1475     // Get initial values of real counters.
1476     for (auto I : S.inits()) {
1477       CGF.EmitIgnoredExpr(I);
1478     }
1479   }
1480   // Check that loop is executed at least one time.
1481   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1482 }
1483 
1484 void CodeGenFunction::EmitOMPLinearClause(
1485     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1486   if (!HaveInsertPoint())
1487     return;
1488   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1489   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1490     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1491     for (auto *C : LoopDirective->counters()) {
1492       SIMDLCVs.insert(
1493           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1494     }
1495   }
1496   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1497     auto CurPrivate = C->privates().begin();
1498     for (auto *E : C->varlists()) {
1499       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1500       auto *PrivateVD =
1501           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1502       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1503         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1504           // Emit private VarDecl with copy init.
1505           EmitVarDecl(*PrivateVD);
1506           return GetAddrOfLocalVar(PrivateVD);
1507         });
1508         assert(IsRegistered && "linear var already registered as private");
1509         // Silence the warning about unused variable.
1510         (void)IsRegistered;
1511       } else
1512         EmitVarDecl(*PrivateVD);
1513       ++CurPrivate;
1514     }
1515   }
1516 }
1517 
1518 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1519                                      const OMPExecutableDirective &D,
1520                                      bool IsMonotonic) {
1521   if (!CGF.HaveInsertPoint())
1522     return;
1523   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1524     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1525                                  /*ignoreResult=*/true);
1526     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1527     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1528     // In presence of finite 'safelen', it may be unsafe to mark all
1529     // the memory instructions parallel, because loop-carried
1530     // dependences of 'safelen' iterations are possible.
1531     if (!IsMonotonic)
1532       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1533   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1534     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1535                                  /*ignoreResult=*/true);
1536     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1537     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1538     // In presence of finite 'safelen', it may be unsafe to mark all
1539     // the memory instructions parallel, because loop-carried
1540     // dependences of 'safelen' iterations are possible.
1541     CGF.LoopStack.setParallel(false);
1542   }
1543 }
1544 
1545 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1546                                       bool IsMonotonic) {
1547   // Walk clauses and process safelen/lastprivate.
1548   LoopStack.setParallel(!IsMonotonic);
1549   LoopStack.setVectorizeEnable(true);
1550   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1551 }
1552 
1553 void CodeGenFunction::EmitOMPSimdFinal(
1554     const OMPLoopDirective &D,
1555     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1556   if (!HaveInsertPoint())
1557     return;
1558   llvm::BasicBlock *DoneBB = nullptr;
1559   auto IC = D.counters().begin();
1560   auto IPC = D.private_counters().begin();
1561   for (auto F : D.finals()) {
1562     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1563     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1564     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1565     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1566         OrigVD->hasGlobalStorage() || CED) {
1567       if (!DoneBB) {
1568         if (auto *Cond = CondGen(*this)) {
1569           // If the first post-update expression is found, emit conditional
1570           // block if it was requested.
1571           auto *ThenBB = createBasicBlock(".omp.final.then");
1572           DoneBB = createBasicBlock(".omp.final.done");
1573           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1574           EmitBlock(ThenBB);
1575         }
1576       }
1577       Address OrigAddr = Address::invalid();
1578       if (CED)
1579         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1580       else {
1581         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1582                         /*RefersToEnclosingVariableOrCapture=*/false,
1583                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1584         OrigAddr = EmitLValue(&DRE).getAddress();
1585       }
1586       OMPPrivateScope VarScope(*this);
1587       VarScope.addPrivate(OrigVD,
1588                           [OrigAddr]() -> Address { return OrigAddr; });
1589       (void)VarScope.Privatize();
1590       EmitIgnoredExpr(F);
1591     }
1592     ++IC;
1593     ++IPC;
1594   }
1595   if (DoneBB)
1596     EmitBlock(DoneBB, /*IsFinished=*/true);
1597 }
1598 
1599 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1600   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1601     OMPLoopScope PreInitScope(CGF, S);
1602     // if (PreCond) {
1603     //   for (IV in 0..LastIteration) BODY;
1604     //   <Final counter/linear vars updates>;
1605     // }
1606     //
1607 
1608     // Emit: if (PreCond) - begin.
1609     // If the condition constant folds and can be elided, avoid emitting the
1610     // whole loop.
1611     bool CondConstant;
1612     llvm::BasicBlock *ContBlock = nullptr;
1613     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1614       if (!CondConstant)
1615         return;
1616     } else {
1617       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1618       ContBlock = CGF.createBasicBlock("simd.if.end");
1619       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1620                   CGF.getProfileCount(&S));
1621       CGF.EmitBlock(ThenBlock);
1622       CGF.incrementProfileCounter(&S);
1623     }
1624 
1625     // Emit the loop iteration variable.
1626     const Expr *IVExpr = S.getIterationVariable();
1627     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1628     CGF.EmitVarDecl(*IVDecl);
1629     CGF.EmitIgnoredExpr(S.getInit());
1630 
1631     // Emit the iterations count variable.
1632     // If it is not a variable, Sema decided to calculate iterations count on
1633     // each iteration (e.g., it is foldable into a constant).
1634     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1635       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1636       // Emit calculation of the iterations count.
1637       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1638     }
1639 
1640     CGF.EmitOMPSimdInit(S);
1641 
1642     emitAlignedClause(CGF, S);
1643     CGF.EmitOMPLinearClauseInit(S);
1644     {
1645       OMPPrivateScope LoopScope(CGF);
1646       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1647       CGF.EmitOMPLinearClause(S, LoopScope);
1648       CGF.EmitOMPPrivateClause(S, LoopScope);
1649       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1650       bool HasLastprivateClause =
1651           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1652       (void)LoopScope.Privatize();
1653       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1654                            S.getInc(),
1655                            [&S](CodeGenFunction &CGF) {
1656                              CGF.EmitOMPLoopBody(S, JumpDest());
1657                              CGF.EmitStopPoint(&S);
1658                            },
1659                            [](CodeGenFunction &) {});
1660       CGF.EmitOMPSimdFinal(
1661           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1662       // Emit final copy of the lastprivate variables at the end of loops.
1663       if (HasLastprivateClause)
1664         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1665       CGF.EmitOMPReductionClauseFinal(S);
1666       emitPostUpdateForReductionClause(
1667           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1668     }
1669     CGF.EmitOMPLinearClauseFinal(
1670         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1671     // Emit: if (PreCond) - end.
1672     if (ContBlock) {
1673       CGF.EmitBranch(ContBlock);
1674       CGF.EmitBlock(ContBlock, true);
1675     }
1676   };
1677   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1678   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1679 }
1680 
1681 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
1682     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1683     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1684   auto &RT = CGM.getOpenMPRuntime();
1685 
1686   const Expr *IVExpr = S.getIterationVariable();
1687   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1688   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1689 
1690   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1691 
1692   // Start the loop with a block that tests the condition.
1693   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1694   EmitBlock(CondBlock);
1695   LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
1696 
1697   llvm::Value *BoolCondVal = nullptr;
1698   if (!DynamicOrOrdered) {
1699     // UB = min(UB, GlobalUB)
1700     EmitIgnoredExpr(S.getEnsureUpperBound());
1701     // IV = LB
1702     EmitIgnoredExpr(S.getInit());
1703     // IV < UB
1704     BoolCondVal = EvaluateExprAsBool(S.getCond());
1705   } else {
1706     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
1707                                  LB, UB, ST);
1708   }
1709 
1710   // If there are any cleanups between here and the loop-exit scope,
1711   // create a block to stage a loop exit along.
1712   auto ExitBlock = LoopExit.getBlock();
1713   if (LoopScope.requiresCleanups())
1714     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1715 
1716   auto LoopBody = createBasicBlock("omp.dispatch.body");
1717   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1718   if (ExitBlock != LoopExit.getBlock()) {
1719     EmitBlock(ExitBlock);
1720     EmitBranchThroughCleanup(LoopExit);
1721   }
1722   EmitBlock(LoopBody);
1723 
1724   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1725   // LB for loop condition and emitted it above).
1726   if (DynamicOrOrdered)
1727     EmitIgnoredExpr(S.getInit());
1728 
1729   // Create a block for the increment.
1730   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1731   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1732 
1733   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1734   // with dynamic/guided scheduling and without ordered clause.
1735   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1736     LoopStack.setParallel(!IsMonotonic);
1737   else
1738     EmitOMPSimdInit(S, IsMonotonic);
1739 
1740   SourceLocation Loc = S.getLocStart();
1741   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
1742                    [&S, LoopExit](CodeGenFunction &CGF) {
1743                      CGF.EmitOMPLoopBody(S, LoopExit);
1744                      CGF.EmitStopPoint(&S);
1745                    },
1746                    [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
1747                      if (Ordered) {
1748                        CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
1749                            CGF, Loc, IVSize, IVSigned);
1750                      }
1751                    });
1752 
1753   EmitBlock(Continue.getBlock());
1754   BreakContinueStack.pop_back();
1755   if (!DynamicOrOrdered) {
1756     // Emit "LB = LB + Stride", "UB = UB + Stride".
1757     EmitIgnoredExpr(S.getNextLowerBound());
1758     EmitIgnoredExpr(S.getNextUpperBound());
1759   }
1760 
1761   EmitBranch(CondBlock);
1762   LoopStack.pop();
1763   // Emit the fall-through block.
1764   EmitBlock(LoopExit.getBlock());
1765 
1766   // Tell the runtime we are done.
1767   if (!DynamicOrOrdered)
1768     RT.emitForStaticFinish(*this, S.getLocEnd());
1769 
1770 }
1771 
1772 void CodeGenFunction::EmitOMPForOuterLoop(
1773     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1774     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1775     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1776   auto &RT = CGM.getOpenMPRuntime();
1777 
1778   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1779   const bool DynamicOrOrdered =
1780       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1781 
1782   assert((Ordered ||
1783           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1784                                  /*Chunked=*/Chunk != nullptr)) &&
1785          "static non-chunked schedule does not need outer loop");
1786 
1787   // Emit outer loop.
1788   //
1789   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1790   // When schedule(dynamic,chunk_size) is specified, the iterations are
1791   // distributed to threads in the team in chunks as the threads request them.
1792   // Each thread executes a chunk of iterations, then requests another chunk,
1793   // until no chunks remain to be distributed. Each chunk contains chunk_size
1794   // iterations, except for the last chunk to be distributed, which may have
1795   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1796   //
1797   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1798   // to threads in the team in chunks as the executing threads request them.
1799   // Each thread executes a chunk of iterations, then requests another chunk,
1800   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1801   // each chunk is proportional to the number of unassigned iterations divided
1802   // by the number of threads in the team, decreasing to 1. For a chunk_size
1803   // with value k (greater than 1), the size of each chunk is determined in the
1804   // same way, with the restriction that the chunks do not contain fewer than k
1805   // iterations (except for the last chunk to be assigned, which may have fewer
1806   // than k iterations).
1807   //
1808   // When schedule(auto) is specified, the decision regarding scheduling is
1809   // delegated to the compiler and/or runtime system. The programmer gives the
1810   // implementation the freedom to choose any possible mapping of iterations to
1811   // threads in the team.
1812   //
1813   // When schedule(runtime) is specified, the decision regarding scheduling is
1814   // deferred until run time, and the schedule and chunk size are taken from the
1815   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1816   // implementation defined
1817   //
1818   // while(__kmpc_dispatch_next(&LB, &UB)) {
1819   //   idx = LB;
1820   //   while (idx <= UB) { BODY; ++idx;
1821   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1822   //   } // inner loop
1823   // }
1824   //
1825   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1826   // When schedule(static, chunk_size) is specified, iterations are divided into
1827   // chunks of size chunk_size, and the chunks are assigned to the threads in
1828   // the team in a round-robin fashion in the order of the thread number.
1829   //
1830   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1831   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1832   //   LB = LB + ST;
1833   //   UB = UB + ST;
1834   // }
1835   //
1836 
1837   const Expr *IVExpr = S.getIterationVariable();
1838   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1839   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1840 
1841   if (DynamicOrOrdered) {
1842     llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
1843     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1844                            IVSigned, Ordered, UBVal, Chunk);
1845   } else {
1846     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1847                          Ordered, IL, LB, UB, ST, Chunk);
1848   }
1849 
1850   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
1851                    ST, IL, Chunk);
1852 }
1853 
1854 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1855     OpenMPDistScheduleClauseKind ScheduleKind,
1856     const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
1857     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1858 
1859   auto &RT = CGM.getOpenMPRuntime();
1860 
1861   // Emit outer loop.
1862   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1863   // dynamic
1864   //
1865 
1866   const Expr *IVExpr = S.getIterationVariable();
1867   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1868   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1869 
1870   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
1871                               IVSize, IVSigned, /* Ordered = */ false,
1872                               IL, LB, UB, ST, Chunk);
1873 
1874   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
1875                    S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
1876 }
1877 
1878 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
1879     const OMPDistributeParallelForDirective &S) {
1880   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1881   CGM.getOpenMPRuntime().emitInlinedDirective(
1882       *this, OMPD_distribute_parallel_for,
1883       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1884         OMPLoopScope PreInitScope(CGF, S);
1885         CGF.EmitStmt(
1886             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1887       });
1888 }
1889 
1890 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
1891     const OMPDistributeParallelForSimdDirective &S) {
1892   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1893   CGM.getOpenMPRuntime().emitInlinedDirective(
1894       *this, OMPD_distribute_parallel_for_simd,
1895       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1896         OMPLoopScope PreInitScope(CGF, S);
1897         CGF.EmitStmt(
1898             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1899       });
1900 }
1901 
1902 void CodeGenFunction::EmitOMPDistributeSimdDirective(
1903     const OMPDistributeSimdDirective &S) {
1904   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1905   CGM.getOpenMPRuntime().emitInlinedDirective(
1906       *this, OMPD_distribute_simd,
1907       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1908         OMPLoopScope PreInitScope(CGF, S);
1909         CGF.EmitStmt(
1910             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1911       });
1912 }
1913 
1914 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
1915     const OMPTargetParallelForSimdDirective &S) {
1916   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1917   CGM.getOpenMPRuntime().emitInlinedDirective(
1918       *this, OMPD_target_parallel_for_simd,
1919       [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1920         OMPLoopScope PreInitScope(CGF, S);
1921         CGF.EmitStmt(
1922             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1923       });
1924 }
1925 
1926 void CodeGenFunction::EmitOMPTargetSimdDirective(
1927     const OMPTargetSimdDirective &S) {
1928   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1929   CGM.getOpenMPRuntime().emitInlinedDirective(
1930       *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1931         OMPLoopScope PreInitScope(CGF, S);
1932         CGF.EmitStmt(
1933             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1934       });
1935 }
1936 
1937 /// \brief Emit a helper variable and return corresponding lvalue.
1938 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1939                                const DeclRefExpr *Helper) {
1940   auto VDecl = cast<VarDecl>(Helper->getDecl());
1941   CGF.EmitVarDecl(*VDecl);
1942   return CGF.EmitLValue(Helper);
1943 }
1944 
1945 namespace {
1946   struct ScheduleKindModifiersTy {
1947     OpenMPScheduleClauseKind Kind;
1948     OpenMPScheduleClauseModifier M1;
1949     OpenMPScheduleClauseModifier M2;
1950     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
1951                             OpenMPScheduleClauseModifier M1,
1952                             OpenMPScheduleClauseModifier M2)
1953         : Kind(Kind), M1(M1), M2(M2) {}
1954   };
1955 } // namespace
1956 
1957 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
1958   // Emit the loop iteration variable.
1959   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
1960   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
1961   EmitVarDecl(*IVDecl);
1962 
1963   // Emit the iterations count variable.
1964   // If it is not a variable, Sema decided to calculate iterations count on each
1965   // iteration (e.g., it is foldable into a constant).
1966   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1967     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1968     // Emit calculation of the iterations count.
1969     EmitIgnoredExpr(S.getCalcLastIteration());
1970   }
1971 
1972   auto &RT = CGM.getOpenMPRuntime();
1973 
1974   bool HasLastprivateClause;
1975   // Check pre-condition.
1976   {
1977     OMPLoopScope PreInitScope(*this, S);
1978     // Skip the entire loop if we don't meet the precondition.
1979     // If the condition constant folds and can be elided, avoid emitting the
1980     // whole loop.
1981     bool CondConstant;
1982     llvm::BasicBlock *ContBlock = nullptr;
1983     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1984       if (!CondConstant)
1985         return false;
1986     } else {
1987       auto *ThenBlock = createBasicBlock("omp.precond.then");
1988       ContBlock = createBasicBlock("omp.precond.end");
1989       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
1990                   getProfileCount(&S));
1991       EmitBlock(ThenBlock);
1992       incrementProfileCounter(&S);
1993     }
1994 
1995     bool Ordered = false;
1996     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
1997       if (OrderedClause->getNumForLoops())
1998         RT.emitDoacrossInit(*this, S);
1999       else
2000         Ordered = true;
2001     }
2002 
2003     llvm::DenseSet<const Expr *> EmittedFinals;
2004     emitAlignedClause(*this, S);
2005     EmitOMPLinearClauseInit(S);
2006     // Emit helper vars inits.
2007     LValue LB =
2008         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2009     LValue UB =
2010         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2011     LValue ST =
2012         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2013     LValue IL =
2014         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2015 
2016     // Emit 'then' code.
2017     {
2018       OMPPrivateScope LoopScope(*this);
2019       if (EmitOMPFirstprivateClause(S, LoopScope)) {
2020         // Emit implicit barrier to synchronize threads and avoid data races on
2021         // initialization of firstprivate variables and post-update of
2022         // lastprivate variables.
2023         CGM.getOpenMPRuntime().emitBarrierCall(
2024             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2025             /*ForceSimpleCall=*/true);
2026       }
2027       EmitOMPPrivateClause(S, LoopScope);
2028       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2029       EmitOMPReductionClauseInit(S, LoopScope);
2030       EmitOMPPrivateLoopCounters(S, LoopScope);
2031       EmitOMPLinearClause(S, LoopScope);
2032       (void)LoopScope.Privatize();
2033 
2034       // Detect the loop schedule kind and chunk.
2035       llvm::Value *Chunk = nullptr;
2036       OpenMPScheduleTy ScheduleKind;
2037       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2038         ScheduleKind.Schedule = C->getScheduleKind();
2039         ScheduleKind.M1 = C->getFirstScheduleModifier();
2040         ScheduleKind.M2 = C->getSecondScheduleModifier();
2041         if (const auto *Ch = C->getChunkSize()) {
2042           Chunk = EmitScalarExpr(Ch);
2043           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2044                                        S.getIterationVariable()->getType(),
2045                                        S.getLocStart());
2046         }
2047       }
2048       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2049       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2050       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2051       // If the static schedule kind is specified or if the ordered clause is
2052       // specified, and if no monotonic modifier is specified, the effect will
2053       // be as if the monotonic modifier was specified.
2054       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2055                                 /* Chunked */ Chunk != nullptr) &&
2056           !Ordered) {
2057         if (isOpenMPSimdDirective(S.getDirectiveKind()))
2058           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2059         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2060         // When no chunk_size is specified, the iteration space is divided into
2061         // chunks that are approximately equal in size, and at most one chunk is
2062         // distributed to each thread. Note that the size of the chunks is
2063         // unspecified in this case.
2064         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
2065                              IVSize, IVSigned, Ordered,
2066                              IL.getAddress(), LB.getAddress(),
2067                              UB.getAddress(), ST.getAddress());
2068         auto LoopExit =
2069             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2070         // UB = min(UB, GlobalUB);
2071         EmitIgnoredExpr(S.getEnsureUpperBound());
2072         // IV = LB;
2073         EmitIgnoredExpr(S.getInit());
2074         // while (idx <= UB) { BODY; ++idx; }
2075         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2076                          S.getInc(),
2077                          [&S, LoopExit](CodeGenFunction &CGF) {
2078                            CGF.EmitOMPLoopBody(S, LoopExit);
2079                            CGF.EmitStopPoint(&S);
2080                          },
2081                          [](CodeGenFunction &) {});
2082         EmitBlock(LoopExit.getBlock());
2083         // Tell the runtime we are done.
2084         RT.emitForStaticFinish(*this, S.getLocStart());
2085       } else {
2086         const bool IsMonotonic =
2087             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2088             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2089             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2090             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2091         // Emit the outer loop, which requests its work chunk [LB..UB] from
2092         // runtime and runs the inner loop to process it.
2093         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2094                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2095                             IL.getAddress(), Chunk);
2096       }
2097       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2098         EmitOMPSimdFinal(S,
2099                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2100                            return CGF.Builder.CreateIsNotNull(
2101                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2102                          });
2103       }
2104       EmitOMPReductionClauseFinal(S);
2105       // Emit post-update of the reduction variables if IsLastIter != 0.
2106       emitPostUpdateForReductionClause(
2107           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2108             return CGF.Builder.CreateIsNotNull(
2109                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2110           });
2111       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2112       if (HasLastprivateClause)
2113         EmitOMPLastprivateClauseFinal(
2114             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2115             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2116     }
2117     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2118       return CGF.Builder.CreateIsNotNull(
2119           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2120     });
2121     // We're now done with the loop, so jump to the continuation block.
2122     if (ContBlock) {
2123       EmitBranch(ContBlock);
2124       EmitBlock(ContBlock, true);
2125     }
2126   }
2127   return HasLastprivateClause;
2128 }
2129 
2130 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2131   bool HasLastprivates = false;
2132   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2133                                           PrePostActionTy &) {
2134     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
2135   };
2136   {
2137     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2138     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2139                                                 S.hasCancel());
2140   }
2141 
2142   // Emit an implicit barrier at the end.
2143   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2144     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2145   }
2146 }
2147 
2148 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2149   bool HasLastprivates = false;
2150   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2151                                           PrePostActionTy &) {
2152     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
2153   };
2154   {
2155     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2156     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2157   }
2158 
2159   // Emit an implicit barrier at the end.
2160   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2161     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2162   }
2163 }
2164 
2165 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2166                                 const Twine &Name,
2167                                 llvm::Value *Init = nullptr) {
2168   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2169   if (Init)
2170     CGF.EmitScalarInit(Init, LVal);
2171   return LVal;
2172 }
2173 
2174 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2175   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2176   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2177   bool HasLastprivates = false;
2178   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2179                                                     PrePostActionTy &) {
2180     auto &C = CGF.CGM.getContext();
2181     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2182     // Emit helper vars inits.
2183     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2184                                   CGF.Builder.getInt32(0));
2185     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2186                                       : CGF.Builder.getInt32(0);
2187     LValue UB =
2188         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2189     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2190                                   CGF.Builder.getInt32(1));
2191     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2192                                   CGF.Builder.getInt32(0));
2193     // Loop counter.
2194     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2195     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2196     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2197     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2198     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2199     // Generate condition for loop.
2200     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2201                         OK_Ordinary, S.getLocStart(),
2202                         /*fpContractable=*/false);
2203     // Increment for loop counter.
2204     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2205                       S.getLocStart());
2206     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2207       // Iterate through all sections and emit a switch construct:
2208       // switch (IV) {
2209       //   case 0:
2210       //     <SectionStmt[0]>;
2211       //     break;
2212       // ...
2213       //   case <NumSection> - 1:
2214       //     <SectionStmt[<NumSection> - 1]>;
2215       //     break;
2216       // }
2217       // .omp.sections.exit:
2218       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2219       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2220           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2221           CS == nullptr ? 1 : CS->size());
2222       if (CS) {
2223         unsigned CaseNumber = 0;
2224         for (auto *SubStmt : CS->children()) {
2225           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2226           CGF.EmitBlock(CaseBB);
2227           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2228           CGF.EmitStmt(SubStmt);
2229           CGF.EmitBranch(ExitBB);
2230           ++CaseNumber;
2231         }
2232       } else {
2233         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2234         CGF.EmitBlock(CaseBB);
2235         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2236         CGF.EmitStmt(Stmt);
2237         CGF.EmitBranch(ExitBB);
2238       }
2239       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2240     };
2241 
2242     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2243     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2244       // Emit implicit barrier to synchronize threads and avoid data races on
2245       // initialization of firstprivate variables and post-update of lastprivate
2246       // variables.
2247       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2248           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2249           /*ForceSimpleCall=*/true);
2250     }
2251     CGF.EmitOMPPrivateClause(S, LoopScope);
2252     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2253     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2254     (void)LoopScope.Privatize();
2255 
2256     // Emit static non-chunked loop.
2257     OpenMPScheduleTy ScheduleKind;
2258     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2259     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2260         CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
2261         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
2262         UB.getAddress(), ST.getAddress());
2263     // UB = min(UB, GlobalUB);
2264     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2265     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2266         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2267     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2268     // IV = LB;
2269     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2270     // while (idx <= UB) { BODY; ++idx; }
2271     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2272                          [](CodeGenFunction &) {});
2273     // Tell the runtime we are done.
2274     CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
2275     CGF.EmitOMPReductionClauseFinal(S);
2276     // Emit post-update of the reduction variables if IsLastIter != 0.
2277     emitPostUpdateForReductionClause(
2278         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2279           return CGF.Builder.CreateIsNotNull(
2280               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2281         });
2282 
2283     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2284     if (HasLastprivates)
2285       CGF.EmitOMPLastprivateClauseFinal(
2286           S, /*NoFinals=*/false,
2287           CGF.Builder.CreateIsNotNull(
2288               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2289   };
2290 
2291   bool HasCancel = false;
2292   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2293     HasCancel = OSD->hasCancel();
2294   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2295     HasCancel = OPSD->hasCancel();
2296   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2297                                               HasCancel);
2298   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2299   // clause. Otherwise the barrier will be generated by the codegen for the
2300   // directive.
2301   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2302     // Emit implicit barrier to synchronize threads and avoid data races on
2303     // initialization of firstprivate variables.
2304     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2305                                            OMPD_unknown);
2306   }
2307 }
2308 
2309 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2310   {
2311     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2312     EmitSections(S);
2313   }
2314   // Emit an implicit barrier at the end.
2315   if (!S.getSingleClause<OMPNowaitClause>()) {
2316     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2317                                            OMPD_sections);
2318   }
2319 }
2320 
2321 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2322   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2323     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2324   };
2325   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2326   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2327                                               S.hasCancel());
2328 }
2329 
2330 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2331   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2332   llvm::SmallVector<const Expr *, 8> DestExprs;
2333   llvm::SmallVector<const Expr *, 8> SrcExprs;
2334   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2335   // Check if there are any 'copyprivate' clauses associated with this
2336   // 'single' construct.
2337   // Build a list of copyprivate variables along with helper expressions
2338   // (<source>, <destination>, <destination>=<source> expressions)
2339   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2340     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2341     DestExprs.append(C->destination_exprs().begin(),
2342                      C->destination_exprs().end());
2343     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2344     AssignmentOps.append(C->assignment_ops().begin(),
2345                          C->assignment_ops().end());
2346   }
2347   // Emit code for 'single' region along with 'copyprivate' clauses
2348   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2349     Action.Enter(CGF);
2350     OMPPrivateScope SingleScope(CGF);
2351     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2352     CGF.EmitOMPPrivateClause(S, SingleScope);
2353     (void)SingleScope.Privatize();
2354     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2355   };
2356   {
2357     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2358     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2359                                             CopyprivateVars, DestExprs,
2360                                             SrcExprs, AssignmentOps);
2361   }
2362   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2363   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2364   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2365     CGM.getOpenMPRuntime().emitBarrierCall(
2366         *this, S.getLocStart(),
2367         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2368   }
2369 }
2370 
2371 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2372   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2373     Action.Enter(CGF);
2374     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2375   };
2376   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2377   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2378 }
2379 
2380 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2381   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2382     Action.Enter(CGF);
2383     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2384   };
2385   Expr *Hint = nullptr;
2386   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2387     Hint = HintClause->getHint();
2388   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2389   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2390                                             S.getDirectiveName().getAsString(),
2391                                             CodeGen, S.getLocStart(), Hint);
2392 }
2393 
2394 void CodeGenFunction::EmitOMPParallelForDirective(
2395     const OMPParallelForDirective &S) {
2396   // Emit directive as a combined directive that consists of two implicit
2397   // directives: 'parallel' with 'for' directive.
2398   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2399     CGF.EmitOMPWorksharingLoop(S);
2400   };
2401   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
2402 }
2403 
2404 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2405     const OMPParallelForSimdDirective &S) {
2406   // Emit directive as a combined directive that consists of two implicit
2407   // directives: 'parallel' with 'for' directive.
2408   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2409     CGF.EmitOMPWorksharingLoop(S);
2410   };
2411   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
2412 }
2413 
2414 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2415     const OMPParallelSectionsDirective &S) {
2416   // Emit directive as a combined directive that consists of two implicit
2417   // directives: 'parallel' with 'sections' directive.
2418   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2419     CGF.EmitSections(S);
2420   };
2421   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
2422 }
2423 
2424 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2425                                                 const RegionCodeGenTy &BodyGen,
2426                                                 const TaskGenTy &TaskGen,
2427                                                 OMPTaskDataTy &Data) {
2428   // Emit outlined function for task construct.
2429   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2430   auto *I = CS->getCapturedDecl()->param_begin();
2431   auto *PartId = std::next(I);
2432   auto *TaskT = std::next(I, 4);
2433   // Check if the task is final
2434   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2435     // If the condition constant folds and can be elided, try to avoid emitting
2436     // the condition and the dead arm of the if/else.
2437     auto *Cond = Clause->getCondition();
2438     bool CondConstant;
2439     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2440       Data.Final.setInt(CondConstant);
2441     else
2442       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2443   } else {
2444     // By default the task is not final.
2445     Data.Final.setInt(/*IntVal=*/false);
2446   }
2447   // Check if the task has 'priority' clause.
2448   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2449     auto *Prio = Clause->getPriority();
2450     Data.Priority.setInt(/*IntVal=*/true);
2451     Data.Priority.setPointer(EmitScalarConversion(
2452         EmitScalarExpr(Prio), Prio->getType(),
2453         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2454         Prio->getExprLoc()));
2455   }
2456   // The first function argument for tasks is a thread id, the second one is a
2457   // part id (0 for tied tasks, >=0 for untied task).
2458   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2459   // Get list of private variables.
2460   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2461     auto IRef = C->varlist_begin();
2462     for (auto *IInit : C->private_copies()) {
2463       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2464       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2465         Data.PrivateVars.push_back(*IRef);
2466         Data.PrivateCopies.push_back(IInit);
2467       }
2468       ++IRef;
2469     }
2470   }
2471   EmittedAsPrivate.clear();
2472   // Get list of firstprivate variables.
2473   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2474     auto IRef = C->varlist_begin();
2475     auto IElemInitRef = C->inits().begin();
2476     for (auto *IInit : C->private_copies()) {
2477       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2478       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2479         Data.FirstprivateVars.push_back(*IRef);
2480         Data.FirstprivateCopies.push_back(IInit);
2481         Data.FirstprivateInits.push_back(*IElemInitRef);
2482       }
2483       ++IRef;
2484       ++IElemInitRef;
2485     }
2486   }
2487   // Get list of lastprivate variables (for taskloops).
2488   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2489   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2490     auto IRef = C->varlist_begin();
2491     auto ID = C->destination_exprs().begin();
2492     for (auto *IInit : C->private_copies()) {
2493       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2494       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2495         Data.LastprivateVars.push_back(*IRef);
2496         Data.LastprivateCopies.push_back(IInit);
2497       }
2498       LastprivateDstsOrigs.insert(
2499           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2500            cast<DeclRefExpr>(*IRef)});
2501       ++IRef;
2502       ++ID;
2503     }
2504   }
2505   // Build list of dependences.
2506   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2507     for (auto *IRef : C->varlists())
2508       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2509   auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs](
2510       CodeGenFunction &CGF, PrePostActionTy &Action) {
2511     // Set proper addresses for generated private copies.
2512     OMPPrivateScope Scope(CGF);
2513     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2514         !Data.LastprivateVars.empty()) {
2515       auto *CopyFn = CGF.Builder.CreateLoad(
2516           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2517       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2518           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2519       // Map privates.
2520       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2521       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2522       CallArgs.push_back(PrivatesPtr);
2523       for (auto *E : Data.PrivateVars) {
2524         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2525         Address PrivatePtr = CGF.CreateMemTemp(
2526             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2527         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2528         CallArgs.push_back(PrivatePtr.getPointer());
2529       }
2530       for (auto *E : Data.FirstprivateVars) {
2531         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2532         Address PrivatePtr =
2533             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2534                               ".firstpriv.ptr.addr");
2535         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2536         CallArgs.push_back(PrivatePtr.getPointer());
2537       }
2538       for (auto *E : Data.LastprivateVars) {
2539         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2540         Address PrivatePtr =
2541             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2542                               ".lastpriv.ptr.addr");
2543         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2544         CallArgs.push_back(PrivatePtr.getPointer());
2545       }
2546       CGF.EmitRuntimeCall(CopyFn, CallArgs);
2547       for (auto &&Pair : LastprivateDstsOrigs) {
2548         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2549         DeclRefExpr DRE(
2550             const_cast<VarDecl *>(OrigVD),
2551             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2552                 OrigVD) != nullptr,
2553             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2554         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2555           return CGF.EmitLValue(&DRE).getAddress();
2556         });
2557       }
2558       for (auto &&Pair : PrivatePtrs) {
2559         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2560                             CGF.getContext().getDeclAlign(Pair.first));
2561         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2562       }
2563     }
2564     (void)Scope.Privatize();
2565 
2566     Action.Enter(CGF);
2567     BodyGen(CGF);
2568   };
2569   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2570       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2571       Data.NumberOfParts);
2572   OMPLexicalScope Scope(*this, S);
2573   TaskGen(*this, OutlinedFn, Data);
2574 }
2575 
2576 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2577   // Emit outlined function for task construct.
2578   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2579   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2580   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2581   const Expr *IfCond = nullptr;
2582   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2583     if (C->getNameModifier() == OMPD_unknown ||
2584         C->getNameModifier() == OMPD_task) {
2585       IfCond = C->getCondition();
2586       break;
2587     }
2588   }
2589 
2590   OMPTaskDataTy Data;
2591   // Check if we should emit tied or untied task.
2592   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2593   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2594     CGF.EmitStmt(CS->getCapturedStmt());
2595   };
2596   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2597                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2598                             const OMPTaskDataTy &Data) {
2599     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2600                                             SharedsTy, CapturedStruct, IfCond,
2601                                             Data);
2602   };
2603   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2604 }
2605 
2606 void CodeGenFunction::EmitOMPTaskyieldDirective(
2607     const OMPTaskyieldDirective &S) {
2608   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2609 }
2610 
2611 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2612   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2613 }
2614 
2615 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2616   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2617 }
2618 
2619 void CodeGenFunction::EmitOMPTaskgroupDirective(
2620     const OMPTaskgroupDirective &S) {
2621   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2622     Action.Enter(CGF);
2623     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2624   };
2625   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2626   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2627 }
2628 
2629 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2630   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2631     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2632       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2633                                 FlushClause->varlist_end());
2634     }
2635     return llvm::None;
2636   }(), S.getLocStart());
2637 }
2638 
2639 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
2640   // Emit the loop iteration variable.
2641   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2642   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2643   EmitVarDecl(*IVDecl);
2644 
2645   // Emit the iterations count variable.
2646   // If it is not a variable, Sema decided to calculate iterations count on each
2647   // iteration (e.g., it is foldable into a constant).
2648   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2649     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2650     // Emit calculation of the iterations count.
2651     EmitIgnoredExpr(S.getCalcLastIteration());
2652   }
2653 
2654   auto &RT = CGM.getOpenMPRuntime();
2655 
2656   // Check pre-condition.
2657   {
2658     OMPLoopScope PreInitScope(*this, S);
2659     // Skip the entire loop if we don't meet the precondition.
2660     // If the condition constant folds and can be elided, avoid emitting the
2661     // whole loop.
2662     bool CondConstant;
2663     llvm::BasicBlock *ContBlock = nullptr;
2664     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2665       if (!CondConstant)
2666         return;
2667     } else {
2668       auto *ThenBlock = createBasicBlock("omp.precond.then");
2669       ContBlock = createBasicBlock("omp.precond.end");
2670       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2671                   getProfileCount(&S));
2672       EmitBlock(ThenBlock);
2673       incrementProfileCounter(&S);
2674     }
2675 
2676     // Emit 'then' code.
2677     {
2678       // Emit helper vars inits.
2679       LValue LB =
2680           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2681       LValue UB =
2682           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2683       LValue ST =
2684           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2685       LValue IL =
2686           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2687 
2688       OMPPrivateScope LoopScope(*this);
2689       EmitOMPPrivateLoopCounters(S, LoopScope);
2690       (void)LoopScope.Privatize();
2691 
2692       // Detect the distribute schedule kind and chunk.
2693       llvm::Value *Chunk = nullptr;
2694       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
2695       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
2696         ScheduleKind = C->getDistScheduleKind();
2697         if (const auto *Ch = C->getChunkSize()) {
2698           Chunk = EmitScalarExpr(Ch);
2699           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2700           S.getIterationVariable()->getType(),
2701           S.getLocStart());
2702         }
2703       }
2704       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2705       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2706 
2707       // OpenMP [2.10.8, distribute Construct, Description]
2708       // If dist_schedule is specified, kind must be static. If specified,
2709       // iterations are divided into chunks of size chunk_size, chunks are
2710       // assigned to the teams of the league in a round-robin fashion in the
2711       // order of the team number. When no chunk_size is specified, the
2712       // iteration space is divided into chunks that are approximately equal
2713       // in size, and at most one chunk is distributed to each team of the
2714       // league. The size of the chunks is unspecified in this case.
2715       if (RT.isStaticNonchunked(ScheduleKind,
2716                                 /* Chunked */ Chunk != nullptr)) {
2717         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
2718                              IVSize, IVSigned, /* Ordered = */ false,
2719                              IL.getAddress(), LB.getAddress(),
2720                              UB.getAddress(), ST.getAddress());
2721         auto LoopExit =
2722             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2723         // UB = min(UB, GlobalUB);
2724         EmitIgnoredExpr(S.getEnsureUpperBound());
2725         // IV = LB;
2726         EmitIgnoredExpr(S.getInit());
2727         // while (idx <= UB) { BODY; ++idx; }
2728         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2729                          S.getInc(),
2730                          [&S, LoopExit](CodeGenFunction &CGF) {
2731                            CGF.EmitOMPLoopBody(S, LoopExit);
2732                            CGF.EmitStopPoint(&S);
2733                          },
2734                          [](CodeGenFunction &) {});
2735         EmitBlock(LoopExit.getBlock());
2736         // Tell the runtime we are done.
2737         RT.emitForStaticFinish(*this, S.getLocStart());
2738       } else {
2739         // Emit the outer loop, which requests its work chunk [LB..UB] from
2740         // runtime and runs the inner loop to process it.
2741         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
2742                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2743                             IL.getAddress(), Chunk);
2744       }
2745     }
2746 
2747     // We're now done with the loop, so jump to the continuation block.
2748     if (ContBlock) {
2749       EmitBranch(ContBlock);
2750       EmitBlock(ContBlock, true);
2751     }
2752   }
2753 }
2754 
2755 void CodeGenFunction::EmitOMPDistributeDirective(
2756     const OMPDistributeDirective &S) {
2757   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2758     CGF.EmitOMPDistributeLoop(S);
2759   };
2760   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2761   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2762                                               false);
2763 }
2764 
2765 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
2766                                                    const CapturedStmt *S) {
2767   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
2768   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
2769   CGF.CapturedStmtInfo = &CapStmtInfo;
2770   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
2771   Fn->addFnAttr(llvm::Attribute::NoInline);
2772   return Fn;
2773 }
2774 
2775 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
2776   if (!S.getAssociatedStmt()) {
2777     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
2778       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
2779     return;
2780   }
2781   auto *C = S.getSingleClause<OMPSIMDClause>();
2782   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
2783                                  PrePostActionTy &Action) {
2784     if (C) {
2785       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2786       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2787       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
2788       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
2789       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
2790     } else {
2791       Action.Enter(CGF);
2792       CGF.EmitStmt(
2793           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2794     }
2795   };
2796   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2797   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
2798 }
2799 
2800 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
2801                                          QualType SrcType, QualType DestType,
2802                                          SourceLocation Loc) {
2803   assert(CGF.hasScalarEvaluationKind(DestType) &&
2804          "DestType must have scalar evaluation kind.");
2805   assert(!Val.isAggregate() && "Must be a scalar or complex.");
2806   return Val.isScalar()
2807              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
2808                                         Loc)
2809              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
2810                                                  DestType, Loc);
2811 }
2812 
2813 static CodeGenFunction::ComplexPairTy
2814 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
2815                       QualType DestType, SourceLocation Loc) {
2816   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
2817          "DestType must have complex evaluation kind.");
2818   CodeGenFunction::ComplexPairTy ComplexVal;
2819   if (Val.isScalar()) {
2820     // Convert the input element to the element type of the complex.
2821     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2822     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
2823                                               DestElementType, Loc);
2824     ComplexVal = CodeGenFunction::ComplexPairTy(
2825         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
2826   } else {
2827     assert(Val.isComplex() && "Must be a scalar or complex.");
2828     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
2829     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2830     ComplexVal.first = CGF.EmitScalarConversion(
2831         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
2832     ComplexVal.second = CGF.EmitScalarConversion(
2833         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
2834   }
2835   return ComplexVal;
2836 }
2837 
2838 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
2839                                   LValue LVal, RValue RVal) {
2840   if (LVal.isGlobalReg()) {
2841     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
2842   } else {
2843     CGF.EmitAtomicStore(RVal, LVal,
2844                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2845                                  : llvm::AtomicOrdering::Monotonic,
2846                         LVal.isVolatile(), /*IsInit=*/false);
2847   }
2848 }
2849 
2850 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
2851                                          QualType RValTy, SourceLocation Loc) {
2852   switch (getEvaluationKind(LVal.getType())) {
2853   case TEK_Scalar:
2854     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
2855                                *this, RVal, RValTy, LVal.getType(), Loc)),
2856                            LVal);
2857     break;
2858   case TEK_Complex:
2859     EmitStoreOfComplex(
2860         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
2861         /*isInit=*/false);
2862     break;
2863   case TEK_Aggregate:
2864     llvm_unreachable("Must be a scalar or complex.");
2865   }
2866 }
2867 
2868 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
2869                                   const Expr *X, const Expr *V,
2870                                   SourceLocation Loc) {
2871   // v = x;
2872   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
2873   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
2874   LValue XLValue = CGF.EmitLValue(X);
2875   LValue VLValue = CGF.EmitLValue(V);
2876   RValue Res = XLValue.isGlobalReg()
2877                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
2878                    : CGF.EmitAtomicLoad(
2879                          XLValue, Loc,
2880                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2881                                   : llvm::AtomicOrdering::Monotonic,
2882                          XLValue.isVolatile());
2883   // OpenMP, 2.12.6, atomic Construct
2884   // Any atomic construct with a seq_cst clause forces the atomically
2885   // performed operation to include an implicit flush operation without a
2886   // list.
2887   if (IsSeqCst)
2888     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2889   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
2890 }
2891 
2892 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
2893                                    const Expr *X, const Expr *E,
2894                                    SourceLocation Loc) {
2895   // x = expr;
2896   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
2897   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
2898   // OpenMP, 2.12.6, atomic Construct
2899   // Any atomic construct with a seq_cst clause forces the atomically
2900   // performed operation to include an implicit flush operation without a
2901   // list.
2902   if (IsSeqCst)
2903     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2904 }
2905 
2906 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
2907                                                 RValue Update,
2908                                                 BinaryOperatorKind BO,
2909                                                 llvm::AtomicOrdering AO,
2910                                                 bool IsXLHSInRHSPart) {
2911   auto &Context = CGF.CGM.getContext();
2912   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
2913   // expression is simple and atomic is allowed for the given type for the
2914   // target platform.
2915   if (BO == BO_Comma || !Update.isScalar() ||
2916       !Update.getScalarVal()->getType()->isIntegerTy() ||
2917       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
2918                         (Update.getScalarVal()->getType() !=
2919                          X.getAddress().getElementType())) ||
2920       !X.getAddress().getElementType()->isIntegerTy() ||
2921       !Context.getTargetInfo().hasBuiltinAtomic(
2922           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
2923     return std::make_pair(false, RValue::get(nullptr));
2924 
2925   llvm::AtomicRMWInst::BinOp RMWOp;
2926   switch (BO) {
2927   case BO_Add:
2928     RMWOp = llvm::AtomicRMWInst::Add;
2929     break;
2930   case BO_Sub:
2931     if (!IsXLHSInRHSPart)
2932       return std::make_pair(false, RValue::get(nullptr));
2933     RMWOp = llvm::AtomicRMWInst::Sub;
2934     break;
2935   case BO_And:
2936     RMWOp = llvm::AtomicRMWInst::And;
2937     break;
2938   case BO_Or:
2939     RMWOp = llvm::AtomicRMWInst::Or;
2940     break;
2941   case BO_Xor:
2942     RMWOp = llvm::AtomicRMWInst::Xor;
2943     break;
2944   case BO_LT:
2945     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2946                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
2947                                    : llvm::AtomicRMWInst::Max)
2948                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
2949                                    : llvm::AtomicRMWInst::UMax);
2950     break;
2951   case BO_GT:
2952     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2953                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
2954                                    : llvm::AtomicRMWInst::Min)
2955                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
2956                                    : llvm::AtomicRMWInst::UMin);
2957     break;
2958   case BO_Assign:
2959     RMWOp = llvm::AtomicRMWInst::Xchg;
2960     break;
2961   case BO_Mul:
2962   case BO_Div:
2963   case BO_Rem:
2964   case BO_Shl:
2965   case BO_Shr:
2966   case BO_LAnd:
2967   case BO_LOr:
2968     return std::make_pair(false, RValue::get(nullptr));
2969   case BO_PtrMemD:
2970   case BO_PtrMemI:
2971   case BO_LE:
2972   case BO_GE:
2973   case BO_EQ:
2974   case BO_NE:
2975   case BO_AddAssign:
2976   case BO_SubAssign:
2977   case BO_AndAssign:
2978   case BO_OrAssign:
2979   case BO_XorAssign:
2980   case BO_MulAssign:
2981   case BO_DivAssign:
2982   case BO_RemAssign:
2983   case BO_ShlAssign:
2984   case BO_ShrAssign:
2985   case BO_Comma:
2986     llvm_unreachable("Unsupported atomic update operation");
2987   }
2988   auto *UpdateVal = Update.getScalarVal();
2989   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
2990     UpdateVal = CGF.Builder.CreateIntCast(
2991         IC, X.getAddress().getElementType(),
2992         X.getType()->hasSignedIntegerRepresentation());
2993   }
2994   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
2995   return std::make_pair(true, RValue::get(Res));
2996 }
2997 
2998 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
2999     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3000     llvm::AtomicOrdering AO, SourceLocation Loc,
3001     const llvm::function_ref<RValue(RValue)> &CommonGen) {
3002   // Update expressions are allowed to have the following forms:
3003   // x binop= expr; -> xrval + expr;
3004   // x++, ++x -> xrval + 1;
3005   // x--, --x -> xrval - 1;
3006   // x = x binop expr; -> xrval binop expr
3007   // x = expr Op x; - > expr binop xrval;
3008   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3009   if (!Res.first) {
3010     if (X.isGlobalReg()) {
3011       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3012       // 'xrval'.
3013       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3014     } else {
3015       // Perform compare-and-swap procedure.
3016       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3017     }
3018   }
3019   return Res;
3020 }
3021 
3022 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3023                                     const Expr *X, const Expr *E,
3024                                     const Expr *UE, bool IsXLHSInRHSPart,
3025                                     SourceLocation Loc) {
3026   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3027          "Update expr in 'atomic update' must be a binary operator.");
3028   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3029   // Update expressions are allowed to have the following forms:
3030   // x binop= expr; -> xrval + expr;
3031   // x++, ++x -> xrval + 1;
3032   // x--, --x -> xrval - 1;
3033   // x = x binop expr; -> xrval binop expr
3034   // x = expr Op x; - > expr binop xrval;
3035   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3036   LValue XLValue = CGF.EmitLValue(X);
3037   RValue ExprRValue = CGF.EmitAnyExpr(E);
3038   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3039                      : llvm::AtomicOrdering::Monotonic;
3040   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3041   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3042   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3043   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3044   auto Gen =
3045       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3046         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3047         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3048         return CGF.EmitAnyExpr(UE);
3049       };
3050   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3051       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3052   // OpenMP, 2.12.6, atomic Construct
3053   // Any atomic construct with a seq_cst clause forces the atomically
3054   // performed operation to include an implicit flush operation without a
3055   // list.
3056   if (IsSeqCst)
3057     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3058 }
3059 
3060 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3061                             QualType SourceType, QualType ResType,
3062                             SourceLocation Loc) {
3063   switch (CGF.getEvaluationKind(ResType)) {
3064   case TEK_Scalar:
3065     return RValue::get(
3066         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3067   case TEK_Complex: {
3068     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3069     return RValue::getComplex(Res.first, Res.second);
3070   }
3071   case TEK_Aggregate:
3072     break;
3073   }
3074   llvm_unreachable("Must be a scalar or complex.");
3075 }
3076 
3077 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3078                                      bool IsPostfixUpdate, const Expr *V,
3079                                      const Expr *X, const Expr *E,
3080                                      const Expr *UE, bool IsXLHSInRHSPart,
3081                                      SourceLocation Loc) {
3082   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3083   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3084   RValue NewVVal;
3085   LValue VLValue = CGF.EmitLValue(V);
3086   LValue XLValue = CGF.EmitLValue(X);
3087   RValue ExprRValue = CGF.EmitAnyExpr(E);
3088   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3089                      : llvm::AtomicOrdering::Monotonic;
3090   QualType NewVValType;
3091   if (UE) {
3092     // 'x' is updated with some additional value.
3093     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3094            "Update expr in 'atomic capture' must be a binary operator.");
3095     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3096     // Update expressions are allowed to have the following forms:
3097     // x binop= expr; -> xrval + expr;
3098     // x++, ++x -> xrval + 1;
3099     // x--, --x -> xrval - 1;
3100     // x = x binop expr; -> xrval binop expr
3101     // x = expr Op x; - > expr binop xrval;
3102     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3103     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3104     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3105     NewVValType = XRValExpr->getType();
3106     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3107     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3108                   IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
3109       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3110       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3111       RValue Res = CGF.EmitAnyExpr(UE);
3112       NewVVal = IsPostfixUpdate ? XRValue : Res;
3113       return Res;
3114     };
3115     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3116         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3117     if (Res.first) {
3118       // 'atomicrmw' instruction was generated.
3119       if (IsPostfixUpdate) {
3120         // Use old value from 'atomicrmw'.
3121         NewVVal = Res.second;
3122       } else {
3123         // 'atomicrmw' does not provide new value, so evaluate it using old
3124         // value of 'x'.
3125         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3126         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3127         NewVVal = CGF.EmitAnyExpr(UE);
3128       }
3129     }
3130   } else {
3131     // 'x' is simply rewritten with some 'expr'.
3132     NewVValType = X->getType().getNonReferenceType();
3133     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3134                                X->getType().getNonReferenceType(), Loc);
3135     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
3136       NewVVal = XRValue;
3137       return ExprRValue;
3138     };
3139     // Try to perform atomicrmw xchg, otherwise simple exchange.
3140     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3141         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3142         Loc, Gen);
3143     if (Res.first) {
3144       // 'atomicrmw' instruction was generated.
3145       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3146     }
3147   }
3148   // Emit post-update store to 'v' of old/new 'x' value.
3149   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3150   // OpenMP, 2.12.6, atomic Construct
3151   // Any atomic construct with a seq_cst clause forces the atomically
3152   // performed operation to include an implicit flush operation without a
3153   // list.
3154   if (IsSeqCst)
3155     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3156 }
3157 
3158 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3159                               bool IsSeqCst, bool IsPostfixUpdate,
3160                               const Expr *X, const Expr *V, const Expr *E,
3161                               const Expr *UE, bool IsXLHSInRHSPart,
3162                               SourceLocation Loc) {
3163   switch (Kind) {
3164   case OMPC_read:
3165     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3166     break;
3167   case OMPC_write:
3168     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3169     break;
3170   case OMPC_unknown:
3171   case OMPC_update:
3172     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3173     break;
3174   case OMPC_capture:
3175     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3176                              IsXLHSInRHSPart, Loc);
3177     break;
3178   case OMPC_if:
3179   case OMPC_final:
3180   case OMPC_num_threads:
3181   case OMPC_private:
3182   case OMPC_firstprivate:
3183   case OMPC_lastprivate:
3184   case OMPC_reduction:
3185   case OMPC_safelen:
3186   case OMPC_simdlen:
3187   case OMPC_collapse:
3188   case OMPC_default:
3189   case OMPC_seq_cst:
3190   case OMPC_shared:
3191   case OMPC_linear:
3192   case OMPC_aligned:
3193   case OMPC_copyin:
3194   case OMPC_copyprivate:
3195   case OMPC_flush:
3196   case OMPC_proc_bind:
3197   case OMPC_schedule:
3198   case OMPC_ordered:
3199   case OMPC_nowait:
3200   case OMPC_untied:
3201   case OMPC_threadprivate:
3202   case OMPC_depend:
3203   case OMPC_mergeable:
3204   case OMPC_device:
3205   case OMPC_threads:
3206   case OMPC_simd:
3207   case OMPC_map:
3208   case OMPC_num_teams:
3209   case OMPC_thread_limit:
3210   case OMPC_priority:
3211   case OMPC_grainsize:
3212   case OMPC_nogroup:
3213   case OMPC_num_tasks:
3214   case OMPC_hint:
3215   case OMPC_dist_schedule:
3216   case OMPC_defaultmap:
3217   case OMPC_uniform:
3218   case OMPC_to:
3219   case OMPC_from:
3220   case OMPC_use_device_ptr:
3221   case OMPC_is_device_ptr:
3222     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3223   }
3224 }
3225 
3226 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3227   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3228   OpenMPClauseKind Kind = OMPC_unknown;
3229   for (auto *C : S.clauses()) {
3230     // Find first clause (skip seq_cst clause, if it is first).
3231     if (C->getClauseKind() != OMPC_seq_cst) {
3232       Kind = C->getClauseKind();
3233       break;
3234     }
3235   }
3236 
3237   const auto *CS =
3238       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3239   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3240     enterFullExpression(EWC);
3241   }
3242   // Processing for statements under 'atomic capture'.
3243   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3244     for (const auto *C : Compound->body()) {
3245       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3246         enterFullExpression(EWC);
3247       }
3248     }
3249   }
3250 
3251   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3252                                             PrePostActionTy &) {
3253     CGF.EmitStopPoint(CS);
3254     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3255                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3256                       S.isXLHSInRHSPart(), S.getLocStart());
3257   };
3258   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3259   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3260 }
3261 
3262 std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
3263 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
3264     CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
3265     bool IsOffloadEntry) {
3266   llvm::Function *OutlinedFn = nullptr;
3267   llvm::Constant *OutlinedFnID = nullptr;
3268   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3269     OMPPrivateScope PrivateScope(CGF);
3270     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3271     CGF.EmitOMPPrivateClause(S, PrivateScope);
3272     (void)PrivateScope.Privatize();
3273 
3274     Action.Enter(CGF);
3275     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3276   };
3277   // Emit target region as a standalone region.
3278   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3279       S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
3280   return std::make_pair(OutlinedFn, OutlinedFnID);
3281 }
3282 
3283 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3284   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3285 
3286   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3287   GenerateOpenMPCapturedVars(CS, CapturedVars);
3288 
3289   llvm::Function *Fn = nullptr;
3290   llvm::Constant *FnID = nullptr;
3291 
3292   // Check if we have any if clause associated with the directive.
3293   const Expr *IfCond = nullptr;
3294 
3295   if (auto *C = S.getSingleClause<OMPIfClause>()) {
3296     IfCond = C->getCondition();
3297   }
3298 
3299   // Check if we have any device clause associated with the directive.
3300   const Expr *Device = nullptr;
3301   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3302     Device = C->getDevice();
3303   }
3304 
3305   // Check if we have an if clause whose conditional always evaluates to false
3306   // or if we do not have any targets specified. If so the target region is not
3307   // an offload entry point.
3308   bool IsOffloadEntry = true;
3309   if (IfCond) {
3310     bool Val;
3311     if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3312       IsOffloadEntry = false;
3313   }
3314   if (CGM.getLangOpts().OMPTargetTriples.empty())
3315     IsOffloadEntry = false;
3316 
3317   assert(CurFuncDecl && "No parent declaration for target region!");
3318   StringRef ParentName;
3319   // In case we have Ctors/Dtors we use the complete type variant to produce
3320   // the mangling of the device outlined kernel.
3321   if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
3322     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3323   else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
3324     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3325   else
3326     ParentName =
3327         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
3328 
3329   std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
3330       CGM, S, ParentName, IsOffloadEntry);
3331   OMPLexicalScope Scope(*this, S);
3332   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
3333                                         CapturedVars);
3334 }
3335 
3336 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3337                                         const OMPExecutableDirective &S,
3338                                         OpenMPDirectiveKind InnermostKind,
3339                                         const RegionCodeGenTy &CodeGen) {
3340   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3341   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
3342       emitParallelOrTeamsOutlinedFunction(S,
3343           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3344 
3345   const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
3346   const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
3347   const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
3348   if (NT || TL) {
3349     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3350     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3351 
3352     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3353                                                   S.getLocStart());
3354   }
3355 
3356   OMPLexicalScope Scope(CGF, S);
3357   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3358   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3359   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3360                                            CapturedVars);
3361 }
3362 
3363 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3364   // Emit parallel region as a standalone region.
3365   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3366     OMPPrivateScope PrivateScope(CGF);
3367     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3368     CGF.EmitOMPPrivateClause(S, PrivateScope);
3369     (void)PrivateScope.Privatize();
3370     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3371   };
3372   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3373 }
3374 
3375 void CodeGenFunction::EmitOMPCancellationPointDirective(
3376     const OMPCancellationPointDirective &S) {
3377   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3378                                                    S.getCancelRegion());
3379 }
3380 
3381 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3382   const Expr *IfCond = nullptr;
3383   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3384     if (C->getNameModifier() == OMPD_unknown ||
3385         C->getNameModifier() == OMPD_cancel) {
3386       IfCond = C->getCondition();
3387       break;
3388     }
3389   }
3390   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3391                                         S.getCancelRegion());
3392 }
3393 
3394 CodeGenFunction::JumpDest
3395 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3396   if (Kind == OMPD_parallel || Kind == OMPD_task)
3397     return ReturnBlock;
3398   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3399          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
3400   return BreakContinueStack.back().BreakBlock;
3401 }
3402 
3403 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3404     const OMPClause &NC, OMPPrivateScope &PrivateScope,
3405     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3406   const auto &C = cast<OMPUseDevicePtrClause>(NC);
3407   auto OrigVarIt = C.varlist_begin();
3408   auto InitIt = C.inits().begin();
3409   for (auto PvtVarIt : C.private_copies()) {
3410     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3411     auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
3412     auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
3413 
3414     // In order to identify the right initializer we need to match the
3415     // declaration used by the mapping logic. In some cases we may get
3416     // OMPCapturedExprDecl that refers to the original declaration.
3417     const ValueDecl *MatchingVD = OrigVD;
3418     if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
3419       // OMPCapturedExprDecl are used to privative fields of the current
3420       // structure.
3421       auto *ME = cast<MemberExpr>(OED->getInit());
3422       assert(isa<CXXThisExpr>(ME->getBase()) &&
3423              "Base should be the current struct!");
3424       MatchingVD = ME->getMemberDecl();
3425     }
3426 
3427     // If we don't have information about the current list item, move on to
3428     // the next one.
3429     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
3430     if (InitAddrIt == CaptureDeviceAddrMap.end())
3431       continue;
3432 
3433     bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
3434       // Initialize the temporary initialization variable with the address we
3435       // get from the runtime library. We have to cast the source address
3436       // because it is always a void *. References are materialized in the
3437       // privatization scope, so the initialization here disregards the fact
3438       // the original variable is a reference.
3439       QualType AddrQTy =
3440           getContext().getPointerType(OrigVD->getType().getNonReferenceType());
3441       llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
3442       Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
3443       setAddrOfLocalVar(InitVD, InitAddr);
3444 
3445       // Emit private declaration, it will be initialized by the value we
3446       // declaration we just added to the local declarations map.
3447       EmitDecl(*PvtVD);
3448 
3449       // The initialization variables reached its purpose in the emission
3450       // ofthe previous declaration, so we don't need it anymore.
3451       LocalDeclMap.erase(InitVD);
3452 
3453       // Return the address of the private variable.
3454       return GetAddrOfLocalVar(PvtVD);
3455     });
3456     assert(IsRegistered && "firstprivate var already registered as private");
3457     // Silence the warning about unused variable.
3458     (void)IsRegistered;
3459 
3460     ++OrigVarIt;
3461     ++InitIt;
3462   }
3463 }
3464 
3465 // Generate the instructions for '#pragma omp target data' directive.
3466 void CodeGenFunction::EmitOMPTargetDataDirective(
3467     const OMPTargetDataDirective &S) {
3468   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
3469 
3470   // Create a pre/post action to signal the privatization of the device pointer.
3471   // This action can be replaced by the OpenMP runtime code generation to
3472   // deactivate privatization.
3473   bool PrivatizeDevicePointers = false;
3474   class DevicePointerPrivActionTy : public PrePostActionTy {
3475     bool &PrivatizeDevicePointers;
3476 
3477   public:
3478     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
3479         : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
3480     void Enter(CodeGenFunction &CGF) override {
3481       PrivatizeDevicePointers = true;
3482     }
3483   };
3484   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
3485 
3486   auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
3487       CodeGenFunction &CGF, PrePostActionTy &Action) {
3488     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3489       CGF.EmitStmt(
3490           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3491     };
3492 
3493     // Codegen that selects wheather to generate the privatization code or not.
3494     auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
3495                           &InnermostCodeGen](CodeGenFunction &CGF,
3496                                              PrePostActionTy &Action) {
3497       RegionCodeGenTy RCG(InnermostCodeGen);
3498       PrivatizeDevicePointers = false;
3499 
3500       // Call the pre-action to change the status of PrivatizeDevicePointers if
3501       // needed.
3502       Action.Enter(CGF);
3503 
3504       if (PrivatizeDevicePointers) {
3505         OMPPrivateScope PrivateScope(CGF);
3506         // Emit all instances of the use_device_ptr clause.
3507         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
3508           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
3509                                         Info.CaptureDeviceAddrMap);
3510         (void)PrivateScope.Privatize();
3511         RCG(CGF);
3512       } else
3513         RCG(CGF);
3514     };
3515 
3516     // Forward the provided action to the privatization codegen.
3517     RegionCodeGenTy PrivRCG(PrivCodeGen);
3518     PrivRCG.setAction(Action);
3519 
3520     // Notwithstanding the body of the region is emitted as inlined directive,
3521     // we don't use an inline scope as changes in the references inside the
3522     // region are expected to be visible outside, so we do not privative them.
3523     OMPLexicalScope Scope(CGF, S);
3524     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
3525                                                     PrivRCG);
3526   };
3527 
3528   RegionCodeGenTy RCG(CodeGen);
3529 
3530   // If we don't have target devices, don't bother emitting the data mapping
3531   // code.
3532   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
3533     RCG(*this);
3534     return;
3535   }
3536 
3537   // Check if we have any if clause associated with the directive.
3538   const Expr *IfCond = nullptr;
3539   if (auto *C = S.getSingleClause<OMPIfClause>())
3540     IfCond = C->getCondition();
3541 
3542   // Check if we have any device clause associated with the directive.
3543   const Expr *Device = nullptr;
3544   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3545     Device = C->getDevice();
3546 
3547   // Set the action to signal privatization of device pointers.
3548   RCG.setAction(PrivAction);
3549 
3550   // Emit region code.
3551   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
3552                                              Info);
3553 }
3554 
3555 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
3556     const OMPTargetEnterDataDirective &S) {
3557   // If we don't have target devices, don't bother emitting the data mapping
3558   // code.
3559   if (CGM.getLangOpts().OMPTargetTriples.empty())
3560     return;
3561 
3562   // Check if we have any if clause associated with the directive.
3563   const Expr *IfCond = nullptr;
3564   if (auto *C = S.getSingleClause<OMPIfClause>())
3565     IfCond = C->getCondition();
3566 
3567   // Check if we have any device clause associated with the directive.
3568   const Expr *Device = nullptr;
3569   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3570     Device = C->getDevice();
3571 
3572   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3573 }
3574 
3575 void CodeGenFunction::EmitOMPTargetExitDataDirective(
3576     const OMPTargetExitDataDirective &S) {
3577   // If we don't have target devices, don't bother emitting the data mapping
3578   // code.
3579   if (CGM.getLangOpts().OMPTargetTriples.empty())
3580     return;
3581 
3582   // Check if we have any if clause associated with the directive.
3583   const Expr *IfCond = nullptr;
3584   if (auto *C = S.getSingleClause<OMPIfClause>())
3585     IfCond = C->getCondition();
3586 
3587   // Check if we have any device clause associated with the directive.
3588   const Expr *Device = nullptr;
3589   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3590     Device = C->getDevice();
3591 
3592   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3593 }
3594 
3595 void CodeGenFunction::EmitOMPTargetParallelDirective(
3596     const OMPTargetParallelDirective &S) {
3597   // TODO: codegen for target parallel.
3598 }
3599 
3600 void CodeGenFunction::EmitOMPTargetParallelForDirective(
3601     const OMPTargetParallelForDirective &S) {
3602   // TODO: codegen for target parallel for.
3603 }
3604 
3605 /// Emit a helper variable and return corresponding lvalue.
3606 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
3607                      const ImplicitParamDecl *PVD,
3608                      CodeGenFunction::OMPPrivateScope &Privates) {
3609   auto *VDecl = cast<VarDecl>(Helper->getDecl());
3610   Privates.addPrivate(
3611       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
3612 }
3613 
3614 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
3615   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
3616   // Emit outlined function for task construct.
3617   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3618   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3619   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3620   const Expr *IfCond = nullptr;
3621   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3622     if (C->getNameModifier() == OMPD_unknown ||
3623         C->getNameModifier() == OMPD_taskloop) {
3624       IfCond = C->getCondition();
3625       break;
3626     }
3627   }
3628 
3629   OMPTaskDataTy Data;
3630   // Check if taskloop must be emitted without taskgroup.
3631   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
3632   // TODO: Check if we should emit tied or untied task.
3633   Data.Tied = true;
3634   // Set scheduling for taskloop
3635   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
3636     // grainsize clause
3637     Data.Schedule.setInt(/*IntVal=*/false);
3638     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
3639   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
3640     // num_tasks clause
3641     Data.Schedule.setInt(/*IntVal=*/true);
3642     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
3643   }
3644 
3645   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
3646     // if (PreCond) {
3647     //   for (IV in 0..LastIteration) BODY;
3648     //   <Final counter/linear vars updates>;
3649     // }
3650     //
3651 
3652     // Emit: if (PreCond) - begin.
3653     // If the condition constant folds and can be elided, avoid emitting the
3654     // whole loop.
3655     bool CondConstant;
3656     llvm::BasicBlock *ContBlock = nullptr;
3657     OMPLoopScope PreInitScope(CGF, S);
3658     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3659       if (!CondConstant)
3660         return;
3661     } else {
3662       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
3663       ContBlock = CGF.createBasicBlock("taskloop.if.end");
3664       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
3665                   CGF.getProfileCount(&S));
3666       CGF.EmitBlock(ThenBlock);
3667       CGF.incrementProfileCounter(&S);
3668     }
3669 
3670     if (isOpenMPSimdDirective(S.getDirectiveKind()))
3671       CGF.EmitOMPSimdInit(S);
3672 
3673     OMPPrivateScope LoopScope(CGF);
3674     // Emit helper vars inits.
3675     enum { LowerBound = 5, UpperBound, Stride, LastIter };
3676     auto *I = CS->getCapturedDecl()->param_begin();
3677     auto *LBP = std::next(I, LowerBound);
3678     auto *UBP = std::next(I, UpperBound);
3679     auto *STP = std::next(I, Stride);
3680     auto *LIP = std::next(I, LastIter);
3681     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
3682              LoopScope);
3683     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
3684              LoopScope);
3685     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
3686     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
3687              LoopScope);
3688     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
3689     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3690     (void)LoopScope.Privatize();
3691     // Emit the loop iteration variable.
3692     const Expr *IVExpr = S.getIterationVariable();
3693     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
3694     CGF.EmitVarDecl(*IVDecl);
3695     CGF.EmitIgnoredExpr(S.getInit());
3696 
3697     // Emit the iterations count variable.
3698     // If it is not a variable, Sema decided to calculate iterations count on
3699     // each iteration (e.g., it is foldable into a constant).
3700     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3701       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3702       // Emit calculation of the iterations count.
3703       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
3704     }
3705 
3706     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
3707                          S.getInc(),
3708                          [&S](CodeGenFunction &CGF) {
3709                            CGF.EmitOMPLoopBody(S, JumpDest());
3710                            CGF.EmitStopPoint(&S);
3711                          },
3712                          [](CodeGenFunction &) {});
3713     // Emit: if (PreCond) - end.
3714     if (ContBlock) {
3715       CGF.EmitBranch(ContBlock);
3716       CGF.EmitBlock(ContBlock, true);
3717     }
3718     // Emit final copy of the lastprivate variables if IsLastIter != 0.
3719     if (HasLastprivateClause) {
3720       CGF.EmitOMPLastprivateClauseFinal(
3721           S, isOpenMPSimdDirective(S.getDirectiveKind()),
3722           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
3723               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
3724               (*LIP)->getType(), S.getLocStart())));
3725     }
3726   };
3727   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3728                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3729                             const OMPTaskDataTy &Data) {
3730     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
3731       OMPLoopScope PreInitScope(CGF, S);
3732       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
3733                                                   OutlinedFn, SharedsTy,
3734                                                   CapturedStruct, IfCond, Data);
3735     };
3736     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
3737                                                     CodeGen);
3738   };
3739   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
3740 }
3741 
3742 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
3743   EmitOMPTaskLoopBasedDirective(S);
3744 }
3745 
3746 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
3747     const OMPTaskLoopSimdDirective &S) {
3748   EmitOMPTaskLoopBasedDirective(S);
3749 }
3750 
3751 // Generate the instructions for '#pragma omp target update' directive.
3752 void CodeGenFunction::EmitOMPTargetUpdateDirective(
3753     const OMPTargetUpdateDirective &S) {
3754   // If we don't have target devices, don't bother emitting the data mapping
3755   // code.
3756   if (CGM.getLangOpts().OMPTargetTriples.empty())
3757     return;
3758 
3759   // Check if we have any if clause associated with the directive.
3760   const Expr *IfCond = nullptr;
3761   if (auto *C = S.getSingleClause<OMPIfClause>())
3762     IfCond = C->getCondition();
3763 
3764   // Check if we have any device clause associated with the directive.
3765   const Expr *Device = nullptr;
3766   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3767     Device = C->getDevice();
3768 
3769   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3770 }
3771