1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47 
48 public:
49   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
50       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()) {
51     emitPreInitStmt(CGF, S);
52   }
53 };
54 
55 /// Private scope for OpenMP loop-based directives, that supports capturing
56 /// of used expression from loop statement.
57 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
58   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
59     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
60       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
61         for (const auto *I : PreInits->decls())
62           CGF.EmitVarDecl(cast<VarDecl>(*I));
63       }
64     }
65   }
66 
67 public:
68   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
69       : CodeGenFunction::RunCleanupsScope(CGF) {
70     emitPreInitStmt(CGF, S);
71   }
72 };
73 
74 } // namespace
75 
76 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
77   auto &C = getContext();
78   llvm::Value *Size = nullptr;
79   auto SizeInChars = C.getTypeSizeInChars(Ty);
80   if (SizeInChars.isZero()) {
81     // getTypeSizeInChars() returns 0 for a VLA.
82     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
83       llvm::Value *ArraySize;
84       std::tie(ArraySize, Ty) = getVLASize(VAT);
85       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
86     }
87     SizeInChars = C.getTypeSizeInChars(Ty);
88     if (SizeInChars.isZero())
89       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
90     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
91   } else
92     Size = CGM.getSize(SizeInChars);
93   return Size;
94 }
95 
96 void CodeGenFunction::GenerateOpenMPCapturedVars(
97     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
98   const RecordDecl *RD = S.getCapturedRecordDecl();
99   auto CurField = RD->field_begin();
100   auto CurCap = S.captures().begin();
101   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
102                                                  E = S.capture_init_end();
103        I != E; ++I, ++CurField, ++CurCap) {
104     if (CurField->hasCapturedVLAType()) {
105       auto VAT = CurField->getCapturedVLAType();
106       auto *Val = VLASizeMap[VAT->getSizeExpr()];
107       CapturedVars.push_back(Val);
108     } else if (CurCap->capturesThis())
109       CapturedVars.push_back(CXXThisValue);
110     else if (CurCap->capturesVariableByCopy())
111       CapturedVars.push_back(
112           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
113     else {
114       assert(CurCap->capturesVariable() && "Expected capture by reference.");
115       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
116     }
117   }
118 }
119 
120 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
121                                     StringRef Name, LValue AddrLV,
122                                     bool isReferenceType = false) {
123   ASTContext &Ctx = CGF.getContext();
124 
125   auto *CastedPtr = CGF.EmitScalarConversion(
126       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
127       Ctx.getPointerType(DstType), SourceLocation());
128   auto TmpAddr =
129       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
130           .getAddress();
131 
132   // If we are dealing with references we need to return the address of the
133   // reference instead of the reference of the value.
134   if (isReferenceType) {
135     QualType RefType = Ctx.getLValueReferenceType(DstType);
136     auto *RefVal = TmpAddr.getPointer();
137     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
138     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
139     CGF.EmitScalarInit(RefVal, TmpLVal);
140   }
141 
142   return TmpAddr;
143 }
144 
145 llvm::Function *
146 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
147   assert(
148       CapturedStmtInfo &&
149       "CapturedStmtInfo should be set when generating the captured function");
150   const CapturedDecl *CD = S.getCapturedDecl();
151   const RecordDecl *RD = S.getCapturedRecordDecl();
152   assert(CD->hasBody() && "missing CapturedDecl body");
153 
154   // Build the argument list.
155   ASTContext &Ctx = CGM.getContext();
156   FunctionArgList Args;
157   Args.append(CD->param_begin(),
158               std::next(CD->param_begin(), CD->getContextParamPosition()));
159   auto I = S.captures().begin();
160   for (auto *FD : RD->fields()) {
161     QualType ArgType = FD->getType();
162     IdentifierInfo *II = nullptr;
163     VarDecl *CapVar = nullptr;
164 
165     // If this is a capture by copy and the type is not a pointer, the outlined
166     // function argument type should be uintptr and the value properly casted to
167     // uintptr. This is necessary given that the runtime library is only able to
168     // deal with pointers. We can pass in the same way the VLA type sizes to the
169     // outlined function.
170     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
171         I->capturesVariableArrayType())
172       ArgType = Ctx.getUIntPtrType();
173 
174     if (I->capturesVariable() || I->capturesVariableByCopy()) {
175       CapVar = I->getCapturedVar();
176       II = CapVar->getIdentifier();
177     } else if (I->capturesThis())
178       II = &getContext().Idents.get("this");
179     else {
180       assert(I->capturesVariableArrayType());
181       II = &getContext().Idents.get("vla");
182     }
183     if (ArgType->isVariablyModifiedType())
184       ArgType = getContext().getVariableArrayDecayedType(ArgType);
185     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
186                                              FD->getLocation(), II, ArgType));
187     ++I;
188   }
189   Args.append(
190       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
191       CD->param_end());
192 
193   // Create the function declaration.
194   FunctionType::ExtInfo ExtInfo;
195   const CGFunctionInfo &FuncInfo =
196       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
197   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
198 
199   llvm::Function *F = llvm::Function::Create(
200       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
201       CapturedStmtInfo->getHelperName(), &CGM.getModule());
202   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
203   if (CD->isNothrow())
204     F->addFnAttr(llvm::Attribute::NoUnwind);
205 
206   // Generate the function.
207   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
208                 CD->getBody()->getLocStart());
209   unsigned Cnt = CD->getContextParamPosition();
210   I = S.captures().begin();
211   for (auto *FD : RD->fields()) {
212     // If we are capturing a pointer by copy we don't need to do anything, just
213     // use the value that we get from the arguments.
214     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
215       setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
216       ++Cnt;
217       ++I;
218       continue;
219     }
220 
221     LValue ArgLVal =
222         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
223                        AlignmentSource::Decl);
224     if (FD->hasCapturedVLAType()) {
225       LValue CastedArgLVal =
226           MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
227                                               Args[Cnt]->getName(), ArgLVal),
228                          FD->getType(), AlignmentSource::Decl);
229       auto *ExprArg =
230           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
231       auto VAT = FD->getCapturedVLAType();
232       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
233     } else if (I->capturesVariable()) {
234       auto *Var = I->getCapturedVar();
235       QualType VarTy = Var->getType();
236       Address ArgAddr = ArgLVal.getAddress();
237       if (!VarTy->isReferenceType()) {
238         ArgAddr = EmitLoadOfReference(
239             ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
240       }
241       setAddrOfLocalVar(
242           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
243     } else if (I->capturesVariableByCopy()) {
244       assert(!FD->getType()->isAnyPointerType() &&
245              "Not expecting a captured pointer.");
246       auto *Var = I->getCapturedVar();
247       QualType VarTy = Var->getType();
248       setAddrOfLocalVar(I->getCapturedVar(),
249                         castValueFromUintptr(*this, FD->getType(),
250                                              Args[Cnt]->getName(), ArgLVal,
251                                              VarTy->isReferenceType()));
252     } else {
253       // If 'this' is captured, load it into CXXThisValue.
254       assert(I->capturesThis());
255       CXXThisValue =
256           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
257     }
258     ++Cnt;
259     ++I;
260   }
261 
262   PGO.assignRegionCounters(GlobalDecl(CD), F);
263   CapturedStmtInfo->EmitBody(*this, CD->getBody());
264   FinishFunction(CD->getBodyRBrace());
265 
266   return F;
267 }
268 
269 //===----------------------------------------------------------------------===//
270 //                              OpenMP Directive Emission
271 //===----------------------------------------------------------------------===//
272 void CodeGenFunction::EmitOMPAggregateAssign(
273     Address DestAddr, Address SrcAddr, QualType OriginalType,
274     const llvm::function_ref<void(Address, Address)> &CopyGen) {
275   // Perform element-by-element initialization.
276   QualType ElementTy;
277 
278   // Drill down to the base element type on both arrays.
279   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
280   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
281   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
282 
283   auto SrcBegin = SrcAddr.getPointer();
284   auto DestBegin = DestAddr.getPointer();
285   // Cast from pointer to array type to pointer to single element.
286   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
287   // The basic structure here is a while-do loop.
288   auto BodyBB = createBasicBlock("omp.arraycpy.body");
289   auto DoneBB = createBasicBlock("omp.arraycpy.done");
290   auto IsEmpty =
291       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
292   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
293 
294   // Enter the loop body, making that address the current address.
295   auto EntryBB = Builder.GetInsertBlock();
296   EmitBlock(BodyBB);
297 
298   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
299 
300   llvm::PHINode *SrcElementPHI =
301     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
302   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
303   Address SrcElementCurrent =
304       Address(SrcElementPHI,
305               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
306 
307   llvm::PHINode *DestElementPHI =
308     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
309   DestElementPHI->addIncoming(DestBegin, EntryBB);
310   Address DestElementCurrent =
311     Address(DestElementPHI,
312             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
313 
314   // Emit copy.
315   CopyGen(DestElementCurrent, SrcElementCurrent);
316 
317   // Shift the address forward by one element.
318   auto DestElementNext = Builder.CreateConstGEP1_32(
319       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
320   auto SrcElementNext = Builder.CreateConstGEP1_32(
321       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
322   // Check whether we've reached the end.
323   auto Done =
324       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
325   Builder.CreateCondBr(Done, DoneBB, BodyBB);
326   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
327   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
328 
329   // Done.
330   EmitBlock(DoneBB, /*IsFinished=*/true);
331 }
332 
333 /// Check if the combiner is a call to UDR combiner and if it is so return the
334 /// UDR decl used for reduction.
335 static const OMPDeclareReductionDecl *
336 getReductionInit(const Expr *ReductionOp) {
337   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
338     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
339       if (auto *DRE =
340               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
341         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
342           return DRD;
343   return nullptr;
344 }
345 
346 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
347                                              const OMPDeclareReductionDecl *DRD,
348                                              const Expr *InitOp,
349                                              Address Private, Address Original,
350                                              QualType Ty) {
351   if (DRD->getInitializer()) {
352     std::pair<llvm::Function *, llvm::Function *> Reduction =
353         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
354     auto *CE = cast<CallExpr>(InitOp);
355     auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
356     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
357     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
358     auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
359     auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
360     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
361     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
362                             [=]() -> Address { return Private; });
363     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
364                             [=]() -> Address { return Original; });
365     (void)PrivateScope.Privatize();
366     RValue Func = RValue::get(Reduction.second);
367     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
368     CGF.EmitIgnoredExpr(InitOp);
369   } else {
370     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
371     auto *GV = new llvm::GlobalVariable(
372         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
373         llvm::GlobalValue::PrivateLinkage, Init, ".init");
374     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
375     RValue InitRVal;
376     switch (CGF.getEvaluationKind(Ty)) {
377     case TEK_Scalar:
378       InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
379       break;
380     case TEK_Complex:
381       InitRVal =
382           RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
383       break;
384     case TEK_Aggregate:
385       InitRVal = RValue::getAggregate(LV.getAddress());
386       break;
387     }
388     OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
389     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
390     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
391                          /*IsInitializer=*/false);
392   }
393 }
394 
395 /// \brief Emit initialization of arrays of complex types.
396 /// \param DestAddr Address of the array.
397 /// \param Type Type of array.
398 /// \param Init Initial expression of array.
399 /// \param SrcAddr Address of the original array.
400 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
401                                  QualType Type, const Expr *Init,
402                                  Address SrcAddr = Address::invalid()) {
403   auto *DRD = getReductionInit(Init);
404   // Perform element-by-element initialization.
405   QualType ElementTy;
406 
407   // Drill down to the base element type on both arrays.
408   auto ArrayTy = Type->getAsArrayTypeUnsafe();
409   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
410   DestAddr =
411       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
412   if (DRD)
413     SrcAddr =
414         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
415 
416   llvm::Value *SrcBegin = nullptr;
417   if (DRD)
418     SrcBegin = SrcAddr.getPointer();
419   auto DestBegin = DestAddr.getPointer();
420   // Cast from pointer to array type to pointer to single element.
421   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
422   // The basic structure here is a while-do loop.
423   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
424   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
425   auto IsEmpty =
426       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
427   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
428 
429   // Enter the loop body, making that address the current address.
430   auto EntryBB = CGF.Builder.GetInsertBlock();
431   CGF.EmitBlock(BodyBB);
432 
433   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
434 
435   llvm::PHINode *SrcElementPHI = nullptr;
436   Address SrcElementCurrent = Address::invalid();
437   if (DRD) {
438     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
439                                           "omp.arraycpy.srcElementPast");
440     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
441     SrcElementCurrent =
442         Address(SrcElementPHI,
443                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
444   }
445   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
446       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
447   DestElementPHI->addIncoming(DestBegin, EntryBB);
448   Address DestElementCurrent =
449       Address(DestElementPHI,
450               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
451 
452   // Emit copy.
453   {
454     CodeGenFunction::RunCleanupsScope InitScope(CGF);
455     if (DRD) {
456       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
457                                        SrcElementCurrent, ElementTy);
458     } else
459       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
460                            /*IsInitializer=*/false);
461   }
462 
463   if (DRD) {
464     // Shift the address forward by one element.
465     auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
466         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
467     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
468   }
469 
470   // Shift the address forward by one element.
471   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
472       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
473   // Check whether we've reached the end.
474   auto Done =
475       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
476   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
477   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
478 
479   // Done.
480   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
481 }
482 
483 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
484                                   Address SrcAddr, const VarDecl *DestVD,
485                                   const VarDecl *SrcVD, const Expr *Copy) {
486   if (OriginalType->isArrayType()) {
487     auto *BO = dyn_cast<BinaryOperator>(Copy);
488     if (BO && BO->getOpcode() == BO_Assign) {
489       // Perform simple memcpy for simple copying.
490       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
491     } else {
492       // For arrays with complex element types perform element by element
493       // copying.
494       EmitOMPAggregateAssign(
495           DestAddr, SrcAddr, OriginalType,
496           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
497             // Working with the single array element, so have to remap
498             // destination and source variables to corresponding array
499             // elements.
500             CodeGenFunction::OMPPrivateScope Remap(*this);
501             Remap.addPrivate(DestVD, [DestElement]() -> Address {
502               return DestElement;
503             });
504             Remap.addPrivate(
505                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
506             (void)Remap.Privatize();
507             EmitIgnoredExpr(Copy);
508           });
509     }
510   } else {
511     // Remap pseudo source variable to private copy.
512     CodeGenFunction::OMPPrivateScope Remap(*this);
513     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
514     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
515     (void)Remap.Privatize();
516     // Emit copying of the whole variable.
517     EmitIgnoredExpr(Copy);
518   }
519 }
520 
521 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
522                                                 OMPPrivateScope &PrivateScope) {
523   if (!HaveInsertPoint())
524     return false;
525   bool FirstprivateIsLastprivate = false;
526   llvm::DenseSet<const VarDecl *> Lastprivates;
527   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
528     for (const auto *D : C->varlists())
529       Lastprivates.insert(
530           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
531   }
532   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
533   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
534     auto IRef = C->varlist_begin();
535     auto InitsRef = C->inits().begin();
536     for (auto IInit : C->private_copies()) {
537       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
538       FirstprivateIsLastprivate =
539           FirstprivateIsLastprivate ||
540           (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0);
541       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
542         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
543         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
544         bool IsRegistered;
545         DeclRefExpr DRE(
546             const_cast<VarDecl *>(OrigVD),
547             /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
548                 OrigVD) != nullptr,
549             (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
550         Address OriginalAddr = EmitLValue(&DRE).getAddress();
551         QualType Type = OrigVD->getType();
552         if (Type->isArrayType()) {
553           // Emit VarDecl with copy init for arrays.
554           // Get the address of the original variable captured in current
555           // captured region.
556           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
557             auto Emission = EmitAutoVarAlloca(*VD);
558             auto *Init = VD->getInit();
559             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
560               // Perform simple memcpy.
561               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
562                                   Type);
563             } else {
564               EmitOMPAggregateAssign(
565                   Emission.getAllocatedAddress(), OriginalAddr, Type,
566                   [this, VDInit, Init](Address DestElement,
567                                        Address SrcElement) {
568                     // Clean up any temporaries needed by the initialization.
569                     RunCleanupsScope InitScope(*this);
570                     // Emit initialization for single element.
571                     setAddrOfLocalVar(VDInit, SrcElement);
572                     EmitAnyExprToMem(Init, DestElement,
573                                      Init->getType().getQualifiers(),
574                                      /*IsInitializer*/ false);
575                     LocalDeclMap.erase(VDInit);
576                   });
577             }
578             EmitAutoVarCleanups(Emission);
579             return Emission.getAllocatedAddress();
580           });
581         } else {
582           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
583             // Emit private VarDecl with copy init.
584             // Remap temp VDInit variable to the address of the original
585             // variable
586             // (for proper handling of captured global variables).
587             setAddrOfLocalVar(VDInit, OriginalAddr);
588             EmitDecl(*VD);
589             LocalDeclMap.erase(VDInit);
590             return GetAddrOfLocalVar(VD);
591           });
592         }
593         assert(IsRegistered &&
594                "firstprivate var already registered as private");
595         // Silence the warning about unused variable.
596         (void)IsRegistered;
597       }
598       ++IRef;
599       ++InitsRef;
600     }
601   }
602   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
603 }
604 
605 void CodeGenFunction::EmitOMPPrivateClause(
606     const OMPExecutableDirective &D,
607     CodeGenFunction::OMPPrivateScope &PrivateScope) {
608   if (!HaveInsertPoint())
609     return;
610   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
611   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
612     auto IRef = C->varlist_begin();
613     for (auto IInit : C->private_copies()) {
614       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
615       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
616         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
617         bool IsRegistered =
618             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
619               // Emit private VarDecl with copy init.
620               EmitDecl(*VD);
621               return GetAddrOfLocalVar(VD);
622             });
623         assert(IsRegistered && "private var already registered as private");
624         // Silence the warning about unused variable.
625         (void)IsRegistered;
626       }
627       ++IRef;
628     }
629   }
630 }
631 
632 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
633   if (!HaveInsertPoint())
634     return false;
635   // threadprivate_var1 = master_threadprivate_var1;
636   // operator=(threadprivate_var2, master_threadprivate_var2);
637   // ...
638   // __kmpc_barrier(&loc, global_tid);
639   llvm::DenseSet<const VarDecl *> CopiedVars;
640   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
641   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
642     auto IRef = C->varlist_begin();
643     auto ISrcRef = C->source_exprs().begin();
644     auto IDestRef = C->destination_exprs().begin();
645     for (auto *AssignOp : C->assignment_ops()) {
646       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
647       QualType Type = VD->getType();
648       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
649         // Get the address of the master variable. If we are emitting code with
650         // TLS support, the address is passed from the master as field in the
651         // captured declaration.
652         Address MasterAddr = Address::invalid();
653         if (getLangOpts().OpenMPUseTLS &&
654             getContext().getTargetInfo().isTLSSupported()) {
655           assert(CapturedStmtInfo->lookup(VD) &&
656                  "Copyin threadprivates should have been captured!");
657           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
658                           VK_LValue, (*IRef)->getExprLoc());
659           MasterAddr = EmitLValue(&DRE).getAddress();
660           LocalDeclMap.erase(VD);
661         } else {
662           MasterAddr =
663             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
664                                         : CGM.GetAddrOfGlobal(VD),
665                     getContext().getDeclAlign(VD));
666         }
667         // Get the address of the threadprivate variable.
668         Address PrivateAddr = EmitLValue(*IRef).getAddress();
669         if (CopiedVars.size() == 1) {
670           // At first check if current thread is a master thread. If it is, no
671           // need to copy data.
672           CopyBegin = createBasicBlock("copyin.not.master");
673           CopyEnd = createBasicBlock("copyin.not.master.end");
674           Builder.CreateCondBr(
675               Builder.CreateICmpNE(
676                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
677                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
678               CopyBegin, CopyEnd);
679           EmitBlock(CopyBegin);
680         }
681         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
682         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
683         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
684       }
685       ++IRef;
686       ++ISrcRef;
687       ++IDestRef;
688     }
689   }
690   if (CopyEnd) {
691     // Exit out of copying procedure for non-master thread.
692     EmitBlock(CopyEnd, /*IsFinished=*/true);
693     return true;
694   }
695   return false;
696 }
697 
698 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
699     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
700   if (!HaveInsertPoint())
701     return false;
702   bool HasAtLeastOneLastprivate = false;
703   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
704   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
705     HasAtLeastOneLastprivate = true;
706     auto IRef = C->varlist_begin();
707     auto IDestRef = C->destination_exprs().begin();
708     for (auto *IInit : C->private_copies()) {
709       // Keep the address of the original variable for future update at the end
710       // of the loop.
711       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
712       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
713         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
714         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
715           DeclRefExpr DRE(
716               const_cast<VarDecl *>(OrigVD),
717               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
718                   OrigVD) != nullptr,
719               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
720           return EmitLValue(&DRE).getAddress();
721         });
722         // Check if the variable is also a firstprivate: in this case IInit is
723         // not generated. Initialization of this variable will happen in codegen
724         // for 'firstprivate' clause.
725         if (IInit) {
726           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
727           bool IsRegistered =
728               PrivateScope.addPrivate(OrigVD, [&]() -> Address {
729                 // Emit private VarDecl with copy init.
730                 EmitDecl(*VD);
731                 return GetAddrOfLocalVar(VD);
732               });
733           assert(IsRegistered &&
734                  "lastprivate var already registered as private");
735           (void)IsRegistered;
736         }
737       }
738       ++IRef;
739       ++IDestRef;
740     }
741   }
742   return HasAtLeastOneLastprivate;
743 }
744 
745 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
746     const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
747   if (!HaveInsertPoint())
748     return;
749   // Emit following code:
750   // if (<IsLastIterCond>) {
751   //   orig_var1 = private_orig_var1;
752   //   ...
753   //   orig_varn = private_orig_varn;
754   // }
755   llvm::BasicBlock *ThenBB = nullptr;
756   llvm::BasicBlock *DoneBB = nullptr;
757   if (IsLastIterCond) {
758     ThenBB = createBasicBlock(".omp.lastprivate.then");
759     DoneBB = createBasicBlock(".omp.lastprivate.done");
760     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
761     EmitBlock(ThenBB);
762   }
763   llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
764   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
765     auto IC = LoopDirective->counters().begin();
766     for (auto F : LoopDirective->finals()) {
767       auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl();
768       LoopCountersAndUpdates[D] = F;
769       ++IC;
770     }
771   }
772   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
773   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
774     auto IRef = C->varlist_begin();
775     auto ISrcRef = C->source_exprs().begin();
776     auto IDestRef = C->destination_exprs().begin();
777     for (auto *AssignOp : C->assignment_ops()) {
778       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
779       QualType Type = PrivateVD->getType();
780       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
781       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
782         // If lastprivate variable is a loop control variable for loop-based
783         // directive, update its value before copyin back to original
784         // variable.
785         if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
786           EmitIgnoredExpr(UpExpr);
787         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
788         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
789         // Get the address of the original variable.
790         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
791         // Get the address of the private variable.
792         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
793         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
794           PrivateAddr =
795               Address(Builder.CreateLoad(PrivateAddr),
796                       getNaturalTypeAlignment(RefTy->getPointeeType()));
797         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
798       }
799       ++IRef;
800       ++ISrcRef;
801       ++IDestRef;
802     }
803     if (auto *PostUpdate = C->getPostUpdateExpr())
804       EmitIgnoredExpr(PostUpdate);
805   }
806   if (IsLastIterCond)
807     EmitBlock(DoneBB, /*IsFinished=*/true);
808 }
809 
810 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
811                           LValue BaseLV, llvm::Value *Addr) {
812   Address Tmp = Address::invalid();
813   Address TopTmp = Address::invalid();
814   Address MostTopTmp = Address::invalid();
815   BaseTy = BaseTy.getNonReferenceType();
816   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
817          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
818     Tmp = CGF.CreateMemTemp(BaseTy);
819     if (TopTmp.isValid())
820       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
821     else
822       MostTopTmp = Tmp;
823     TopTmp = Tmp;
824     BaseTy = BaseTy->getPointeeType();
825   }
826   llvm::Type *Ty = BaseLV.getPointer()->getType();
827   if (Tmp.isValid())
828     Ty = Tmp.getElementType();
829   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
830   if (Tmp.isValid()) {
831     CGF.Builder.CreateStore(Addr, Tmp);
832     return MostTopTmp;
833   }
834   return Address(Addr, BaseLV.getAlignment());
835 }
836 
837 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
838                           LValue BaseLV) {
839   BaseTy = BaseTy.getNonReferenceType();
840   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
841          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
842     if (auto *PtrTy = BaseTy->getAs<PointerType>())
843       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
844     else {
845       BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
846                                              BaseTy->castAs<ReferenceType>());
847     }
848     BaseTy = BaseTy->getPointeeType();
849   }
850   return CGF.MakeAddrLValue(
851       Address(
852           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
853               BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
854           BaseLV.getAlignment()),
855       BaseLV.getType(), BaseLV.getAlignmentSource());
856 }
857 
858 void CodeGenFunction::EmitOMPReductionClauseInit(
859     const OMPExecutableDirective &D,
860     CodeGenFunction::OMPPrivateScope &PrivateScope) {
861   if (!HaveInsertPoint())
862     return;
863   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
864     auto ILHS = C->lhs_exprs().begin();
865     auto IRHS = C->rhs_exprs().begin();
866     auto IPriv = C->privates().begin();
867     auto IRed = C->reduction_ops().begin();
868     for (auto IRef : C->varlists()) {
869       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
870       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
871       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
872       auto *DRD = getReductionInit(*IRed);
873       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
874         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
875         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
876           Base = TempOASE->getBase()->IgnoreParenImpCasts();
877         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
878           Base = TempASE->getBase()->IgnoreParenImpCasts();
879         auto *DE = cast<DeclRefExpr>(Base);
880         auto *OrigVD = cast<VarDecl>(DE->getDecl());
881         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
882         auto OASELValueUB =
883             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
884         auto OriginalBaseLValue = EmitLValue(DE);
885         LValue BaseLValue =
886             loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
887                         OriginalBaseLValue);
888         // Store the address of the original variable associated with the LHS
889         // implicit variable.
890         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
891           return OASELValueLB.getAddress();
892         });
893         // Emit reduction copy.
894         bool IsRegistered = PrivateScope.addPrivate(
895             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
896                      OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
897               // Emit VarDecl with copy init for arrays.
898               // Get the address of the original variable captured in current
899               // captured region.
900               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
901                                                  OASELValueLB.getPointer());
902               Size = Builder.CreateNUWAdd(
903                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
904               CodeGenFunction::OpaqueValueMapping OpaqueMap(
905                   *this, cast<OpaqueValueExpr>(
906                              getContext()
907                                  .getAsVariableArrayType(PrivateVD->getType())
908                                  ->getSizeExpr()),
909                   RValue::get(Size));
910               EmitVariablyModifiedType(PrivateVD->getType());
911               auto Emission = EmitAutoVarAlloca(*PrivateVD);
912               auto Addr = Emission.getAllocatedAddress();
913               auto *Init = PrivateVD->getInit();
914               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
915                                    DRD ? *IRed : Init,
916                                    OASELValueLB.getAddress());
917               EmitAutoVarCleanups(Emission);
918               // Emit private VarDecl with reduction init.
919               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
920                                                    OASELValueLB.getPointer());
921               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
922               return castToBase(*this, OrigVD->getType(),
923                                 OASELValueLB.getType(), OriginalBaseLValue,
924                                 Ptr);
925             });
926         assert(IsRegistered && "private var already registered as private");
927         // Silence the warning about unused variable.
928         (void)IsRegistered;
929         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
930           return GetAddrOfLocalVar(PrivateVD);
931         });
932       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
933         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
934         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
935           Base = TempASE->getBase()->IgnoreParenImpCasts();
936         auto *DE = cast<DeclRefExpr>(Base);
937         auto *OrigVD = cast<VarDecl>(DE->getDecl());
938         auto ASELValue = EmitLValue(ASE);
939         auto OriginalBaseLValue = EmitLValue(DE);
940         LValue BaseLValue = loadToBegin(
941             *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
942         // Store the address of the original variable associated with the LHS
943         // implicit variable.
944         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
945           return ASELValue.getAddress();
946         });
947         // Emit reduction copy.
948         bool IsRegistered = PrivateScope.addPrivate(
949             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
950                      OriginalBaseLValue, DRD, IRed]() -> Address {
951               // Emit private VarDecl with reduction init.
952               AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
953               auto Addr = Emission.getAllocatedAddress();
954               if (DRD) {
955                 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
956                                                  ASELValue.getAddress(),
957                                                  ASELValue.getType());
958               } else
959                 EmitAutoVarInit(Emission);
960               EmitAutoVarCleanups(Emission);
961               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
962                                                    ASELValue.getPointer());
963               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
964               return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
965                                 OriginalBaseLValue, Ptr);
966             });
967         assert(IsRegistered && "private var already registered as private");
968         // Silence the warning about unused variable.
969         (void)IsRegistered;
970         PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
971           return Builder.CreateElementBitCast(
972               GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
973               "rhs.begin");
974         });
975       } else {
976         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
977         QualType Type = PrivateVD->getType();
978         if (getContext().getAsArrayType(Type)) {
979           // Store the address of the original variable associated with the LHS
980           // implicit variable.
981           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
982                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
983                           IRef->getType(), VK_LValue, IRef->getExprLoc());
984           Address OriginalAddr = EmitLValue(&DRE).getAddress();
985           PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
986                                           LHSVD]() -> Address {
987             OriginalAddr = Builder.CreateElementBitCast(
988                 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
989             return OriginalAddr;
990           });
991           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
992             if (Type->isVariablyModifiedType()) {
993               CodeGenFunction::OpaqueValueMapping OpaqueMap(
994                   *this, cast<OpaqueValueExpr>(
995                              getContext()
996                                  .getAsVariableArrayType(PrivateVD->getType())
997                                  ->getSizeExpr()),
998                   RValue::get(
999                       getTypeSize(OrigVD->getType().getNonReferenceType())));
1000               EmitVariablyModifiedType(Type);
1001             }
1002             auto Emission = EmitAutoVarAlloca(*PrivateVD);
1003             auto Addr = Emission.getAllocatedAddress();
1004             auto *Init = PrivateVD->getInit();
1005             EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1006                                  DRD ? *IRed : Init, OriginalAddr);
1007             EmitAutoVarCleanups(Emission);
1008             return Emission.getAllocatedAddress();
1009           });
1010           assert(IsRegistered && "private var already registered as private");
1011           // Silence the warning about unused variable.
1012           (void)IsRegistered;
1013           PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1014             return Builder.CreateElementBitCast(
1015                 GetAddrOfLocalVar(PrivateVD),
1016                 ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
1017           });
1018         } else {
1019           // Store the address of the original variable associated with the LHS
1020           // implicit variable.
1021           Address OriginalAddr = Address::invalid();
1022           PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
1023                                           &OriginalAddr]() -> Address {
1024             DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1025                             CapturedStmtInfo->lookup(OrigVD) != nullptr,
1026                             IRef->getType(), VK_LValue, IRef->getExprLoc());
1027             OriginalAddr = EmitLValue(&DRE).getAddress();
1028             return OriginalAddr;
1029           });
1030           // Emit reduction copy.
1031           bool IsRegistered = PrivateScope.addPrivate(
1032               OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
1033                 // Emit private VarDecl with reduction init.
1034                 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1035                 auto Addr = Emission.getAllocatedAddress();
1036                 if (DRD) {
1037                   emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1038                                                    OriginalAddr,
1039                                                    PrivateVD->getType());
1040                 } else
1041                   EmitAutoVarInit(Emission);
1042                 EmitAutoVarCleanups(Emission);
1043                 return Addr;
1044               });
1045           assert(IsRegistered && "private var already registered as private");
1046           // Silence the warning about unused variable.
1047           (void)IsRegistered;
1048           PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1049             return GetAddrOfLocalVar(PrivateVD);
1050           });
1051         }
1052       }
1053       ++ILHS;
1054       ++IRHS;
1055       ++IPriv;
1056       ++IRed;
1057     }
1058   }
1059 }
1060 
1061 void CodeGenFunction::EmitOMPReductionClauseFinal(
1062     const OMPExecutableDirective &D) {
1063   if (!HaveInsertPoint())
1064     return;
1065   llvm::SmallVector<const Expr *, 8> Privates;
1066   llvm::SmallVector<const Expr *, 8> LHSExprs;
1067   llvm::SmallVector<const Expr *, 8> RHSExprs;
1068   llvm::SmallVector<const Expr *, 8> ReductionOps;
1069   bool HasAtLeastOneReduction = false;
1070   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1071     HasAtLeastOneReduction = true;
1072     Privates.append(C->privates().begin(), C->privates().end());
1073     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1074     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1075     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1076   }
1077   if (HasAtLeastOneReduction) {
1078     // Emit nowait reduction if nowait clause is present or directive is a
1079     // parallel directive (it always has implicit barrier).
1080     CGM.getOpenMPRuntime().emitReduction(
1081         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1082         D.getSingleClause<OMPNowaitClause>() ||
1083             isOpenMPParallelDirective(D.getDirectiveKind()) ||
1084             D.getDirectiveKind() == OMPD_simd,
1085         D.getDirectiveKind() == OMPD_simd);
1086   }
1087 }
1088 
1089 static void emitPostUpdateForReductionClause(
1090     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1091     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1092   if (!CGF.HaveInsertPoint())
1093     return;
1094   llvm::BasicBlock *DoneBB = nullptr;
1095   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1096     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1097       if (!DoneBB) {
1098         if (auto *Cond = CondGen(CGF)) {
1099           // If the first post-update expression is found, emit conditional
1100           // block if it was requested.
1101           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1102           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1103           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1104           CGF.EmitBlock(ThenBB);
1105         }
1106       }
1107       CGF.EmitIgnoredExpr(PostUpdate);
1108     }
1109   }
1110   if (DoneBB)
1111     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1112 }
1113 
1114 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
1115                                            const OMPExecutableDirective &S,
1116                                            OpenMPDirectiveKind InnermostKind,
1117                                            const RegionCodeGenTy &CodeGen) {
1118   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1119   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
1120       emitParallelOrTeamsOutlinedFunction(S,
1121           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1122   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1123     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1124     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1125                                          /*IgnoreResultAssign*/ true);
1126     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1127         CGF, NumThreads, NumThreadsClause->getLocStart());
1128   }
1129   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1130     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1131     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1132         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1133   }
1134   const Expr *IfCond = nullptr;
1135   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1136     if (C->getNameModifier() == OMPD_unknown ||
1137         C->getNameModifier() == OMPD_parallel) {
1138       IfCond = C->getCondition();
1139       break;
1140     }
1141   }
1142 
1143   OMPLexicalScope Scope(CGF, S);
1144   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1145   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1146   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1147                                               CapturedVars, IfCond);
1148 }
1149 
1150 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1151   // Emit parallel region as a standalone region.
1152   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1153     OMPPrivateScope PrivateScope(CGF);
1154     bool Copyins = CGF.EmitOMPCopyinClause(S);
1155     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1156     if (Copyins) {
1157       // Emit implicit barrier to synchronize threads and avoid data races on
1158       // propagation master's thread values of threadprivate variables to local
1159       // instances of that variables of all other implicit threads.
1160       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1161           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1162           /*ForceSimpleCall=*/true);
1163     }
1164     CGF.EmitOMPPrivateClause(S, PrivateScope);
1165     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1166     (void)PrivateScope.Privatize();
1167     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1168     CGF.EmitOMPReductionClauseFinal(S);
1169   };
1170   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
1171   emitPostUpdateForReductionClause(
1172       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1173 }
1174 
1175 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1176                                       JumpDest LoopExit) {
1177   RunCleanupsScope BodyScope(*this);
1178   // Update counters values on current iteration.
1179   for (auto I : D.updates()) {
1180     EmitIgnoredExpr(I);
1181   }
1182   // Update the linear variables.
1183   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1184     for (auto U : C->updates()) {
1185       EmitIgnoredExpr(U);
1186     }
1187   }
1188 
1189   // On a continue in the body, jump to the end.
1190   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1191   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1192   // Emit loop body.
1193   EmitStmt(D.getBody());
1194   // The end (updates/cleanups).
1195   EmitBlock(Continue.getBlock());
1196   BreakContinueStack.pop_back();
1197 }
1198 
1199 void CodeGenFunction::EmitOMPInnerLoop(
1200     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1201     const Expr *IncExpr,
1202     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1203     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1204   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1205 
1206   // Start the loop with a block that tests the condition.
1207   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1208   EmitBlock(CondBlock);
1209   LoopStack.push(CondBlock);
1210 
1211   // If there are any cleanups between here and the loop-exit scope,
1212   // create a block to stage a loop exit along.
1213   auto ExitBlock = LoopExit.getBlock();
1214   if (RequiresCleanup)
1215     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1216 
1217   auto LoopBody = createBasicBlock("omp.inner.for.body");
1218 
1219   // Emit condition.
1220   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1221   if (ExitBlock != LoopExit.getBlock()) {
1222     EmitBlock(ExitBlock);
1223     EmitBranchThroughCleanup(LoopExit);
1224   }
1225 
1226   EmitBlock(LoopBody);
1227   incrementProfileCounter(&S);
1228 
1229   // Create a block for the increment.
1230   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1231   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1232 
1233   BodyGen(*this);
1234 
1235   // Emit "IV = IV + 1" and a back-edge to the condition block.
1236   EmitBlock(Continue.getBlock());
1237   EmitIgnoredExpr(IncExpr);
1238   PostIncGen(*this);
1239   BreakContinueStack.pop_back();
1240   EmitBranch(CondBlock);
1241   LoopStack.pop();
1242   // Emit the fall-through block.
1243   EmitBlock(LoopExit.getBlock());
1244 }
1245 
1246 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1247   if (!HaveInsertPoint())
1248     return;
1249   // Emit inits for the linear variables.
1250   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1251     for (auto Init : C->inits()) {
1252       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1253       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1254         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1255         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1256         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1257                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1258                         VD->getInit()->getType(), VK_LValue,
1259                         VD->getInit()->getExprLoc());
1260         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1261                                                 VD->getType()),
1262                        /*capturedByInit=*/false);
1263         EmitAutoVarCleanups(Emission);
1264       } else
1265         EmitVarDecl(*VD);
1266     }
1267     // Emit the linear steps for the linear clauses.
1268     // If a step is not constant, it is pre-calculated before the loop.
1269     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1270       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1271         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1272         // Emit calculation of the linear step.
1273         EmitIgnoredExpr(CS);
1274       }
1275   }
1276 }
1277 
1278 static void emitLinearClauseFinal(
1279     CodeGenFunction &CGF, const OMPLoopDirective &D,
1280     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1281   if (!CGF.HaveInsertPoint())
1282     return;
1283   llvm::BasicBlock *DoneBB = nullptr;
1284   // Emit the final values of the linear variables.
1285   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1286     auto IC = C->varlist_begin();
1287     for (auto F : C->finals()) {
1288       if (!DoneBB) {
1289         if (auto *Cond = CondGen(CGF)) {
1290           // If the first post-update expression is found, emit conditional
1291           // block if it was requested.
1292           auto *ThenBB = CGF.createBasicBlock(".omp.linear.pu");
1293           DoneBB = CGF.createBasicBlock(".omp.linear.pu.done");
1294           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1295           CGF.EmitBlock(ThenBB);
1296         }
1297       }
1298       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1299       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1300                       CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
1301                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1302       Address OrigAddr = CGF.EmitLValue(&DRE).getAddress();
1303       CodeGenFunction::OMPPrivateScope VarScope(CGF);
1304       VarScope.addPrivate(OrigVD,
1305                           [OrigAddr]() -> Address { return OrigAddr; });
1306       (void)VarScope.Privatize();
1307       CGF.EmitIgnoredExpr(F);
1308       ++IC;
1309     }
1310     if (auto *PostUpdate = C->getPostUpdateExpr())
1311       CGF.EmitIgnoredExpr(PostUpdate);
1312   }
1313   if (DoneBB)
1314     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1315 }
1316 
1317 static void emitAlignedClause(CodeGenFunction &CGF,
1318                               const OMPExecutableDirective &D) {
1319   if (!CGF.HaveInsertPoint())
1320     return;
1321   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1322     unsigned ClauseAlignment = 0;
1323     if (auto AlignmentExpr = Clause->getAlignment()) {
1324       auto AlignmentCI =
1325           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1326       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1327     }
1328     for (auto E : Clause->varlists()) {
1329       unsigned Alignment = ClauseAlignment;
1330       if (Alignment == 0) {
1331         // OpenMP [2.8.1, Description]
1332         // If no optional parameter is specified, implementation-defined default
1333         // alignments for SIMD instructions on the target platforms are assumed.
1334         Alignment =
1335             CGF.getContext()
1336                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1337                     E->getType()->getPointeeType()))
1338                 .getQuantity();
1339       }
1340       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1341              "alignment is not power of 2");
1342       if (Alignment != 0) {
1343         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1344         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1345       }
1346     }
1347   }
1348 }
1349 
1350 static void emitPrivateLoopCounters(CodeGenFunction &CGF,
1351                                     CodeGenFunction::OMPPrivateScope &LoopScope,
1352                                     ArrayRef<Expr *> Counters,
1353                                     ArrayRef<Expr *> PrivateCounters) {
1354   if (!CGF.HaveInsertPoint())
1355     return;
1356   auto I = PrivateCounters.begin();
1357   for (auto *E : Counters) {
1358     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1359     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1360     Address Addr = Address::invalid();
1361     (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1362       // Emit var without initialization.
1363       auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD);
1364       CGF.EmitAutoVarCleanups(VarEmission);
1365       Addr = VarEmission.getAllocatedAddress();
1366       return Addr;
1367     });
1368     (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; });
1369     ++I;
1370   }
1371 }
1372 
1373 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1374                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1375                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1376   if (!CGF.HaveInsertPoint())
1377     return;
1378   {
1379     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1380     emitPrivateLoopCounters(CGF, PreCondScope, S.counters(),
1381                             S.private_counters());
1382     (void)PreCondScope.Privatize();
1383     // Get initial values of real counters.
1384     for (auto I : S.inits()) {
1385       CGF.EmitIgnoredExpr(I);
1386     }
1387   }
1388   // Check that loop is executed at least one time.
1389   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1390 }
1391 
1392 static void
1393 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
1394                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
1395   if (!CGF.HaveInsertPoint())
1396     return;
1397   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1398     auto CurPrivate = C->privates().begin();
1399     for (auto *E : C->varlists()) {
1400       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1401       auto *PrivateVD =
1402           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1403       bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1404         // Emit private VarDecl with copy init.
1405         CGF.EmitVarDecl(*PrivateVD);
1406         return CGF.GetAddrOfLocalVar(PrivateVD);
1407       });
1408       assert(IsRegistered && "linear var already registered as private");
1409       // Silence the warning about unused variable.
1410       (void)IsRegistered;
1411       ++CurPrivate;
1412     }
1413   }
1414 }
1415 
1416 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1417                                      const OMPExecutableDirective &D,
1418                                      bool IsMonotonic) {
1419   if (!CGF.HaveInsertPoint())
1420     return;
1421   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1422     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1423                                  /*ignoreResult=*/true);
1424     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1425     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1426     // In presence of finite 'safelen', it may be unsafe to mark all
1427     // the memory instructions parallel, because loop-carried
1428     // dependences of 'safelen' iterations are possible.
1429     if (!IsMonotonic)
1430       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1431   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1432     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1433                                  /*ignoreResult=*/true);
1434     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1435     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1436     // In presence of finite 'safelen', it may be unsafe to mark all
1437     // the memory instructions parallel, because loop-carried
1438     // dependences of 'safelen' iterations are possible.
1439     CGF.LoopStack.setParallel(false);
1440   }
1441 }
1442 
1443 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1444                                       bool IsMonotonic) {
1445   // Walk clauses and process safelen/lastprivate.
1446   LoopStack.setParallel(!IsMonotonic);
1447   LoopStack.setVectorizeEnable(true);
1448   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1449 }
1450 
1451 void CodeGenFunction::EmitOMPSimdFinal(
1452     const OMPLoopDirective &D,
1453     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1454   if (!HaveInsertPoint())
1455     return;
1456   llvm::BasicBlock *DoneBB = nullptr;
1457   auto IC = D.counters().begin();
1458   for (auto F : D.finals()) {
1459     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1460     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
1461       if (!DoneBB) {
1462         if (auto *Cond = CondGen(*this)) {
1463           // If the first post-update expression is found, emit conditional
1464           // block if it was requested.
1465           auto *ThenBB = createBasicBlock(".omp.final.then");
1466           DoneBB = createBasicBlock(".omp.final.done");
1467           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1468           EmitBlock(ThenBB);
1469         }
1470       }
1471       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1472                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1473                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1474       Address OrigAddr = EmitLValue(&DRE).getAddress();
1475       OMPPrivateScope VarScope(*this);
1476       VarScope.addPrivate(OrigVD,
1477                           [OrigAddr]() -> Address { return OrigAddr; });
1478       (void)VarScope.Privatize();
1479       EmitIgnoredExpr(F);
1480     }
1481     ++IC;
1482   }
1483   if (DoneBB)
1484     EmitBlock(DoneBB, /*IsFinished=*/true);
1485 }
1486 
1487 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1488   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1489     OMPLoopScope PreInitScope(CGF, S);
1490     // if (PreCond) {
1491     //   for (IV in 0..LastIteration) BODY;
1492     //   <Final counter/linear vars updates>;
1493     // }
1494     //
1495 
1496     // Emit: if (PreCond) - begin.
1497     // If the condition constant folds and can be elided, avoid emitting the
1498     // whole loop.
1499     bool CondConstant;
1500     llvm::BasicBlock *ContBlock = nullptr;
1501     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1502       if (!CondConstant)
1503         return;
1504     } else {
1505       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1506       ContBlock = CGF.createBasicBlock("simd.if.end");
1507       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1508                   CGF.getProfileCount(&S));
1509       CGF.EmitBlock(ThenBlock);
1510       CGF.incrementProfileCounter(&S);
1511     }
1512 
1513     // Emit the loop iteration variable.
1514     const Expr *IVExpr = S.getIterationVariable();
1515     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1516     CGF.EmitVarDecl(*IVDecl);
1517     CGF.EmitIgnoredExpr(S.getInit());
1518 
1519     // Emit the iterations count variable.
1520     // If it is not a variable, Sema decided to calculate iterations count on
1521     // each iteration (e.g., it is foldable into a constant).
1522     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1523       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1524       // Emit calculation of the iterations count.
1525       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1526     }
1527 
1528     CGF.EmitOMPSimdInit(S);
1529 
1530     emitAlignedClause(CGF, S);
1531     CGF.EmitOMPLinearClauseInit(S);
1532     {
1533       OMPPrivateScope LoopScope(CGF);
1534       emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
1535                               S.private_counters());
1536       emitPrivateLinearVars(CGF, S, LoopScope);
1537       CGF.EmitOMPPrivateClause(S, LoopScope);
1538       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1539       bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1540       (void)LoopScope.Privatize();
1541       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1542                            S.getInc(),
1543                            [&S](CodeGenFunction &CGF) {
1544                              CGF.EmitOMPLoopBody(S, JumpDest());
1545                              CGF.EmitStopPoint(&S);
1546                            },
1547                            [](CodeGenFunction &) {});
1548       // Emit final copy of the lastprivate variables at the end of loops.
1549       if (HasLastprivateClause)
1550         CGF.EmitOMPLastprivateClauseFinal(S);
1551       CGF.EmitOMPReductionClauseFinal(S);
1552       emitPostUpdateForReductionClause(
1553           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1554     }
1555     CGF.EmitOMPSimdFinal(
1556         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1557     emitLinearClauseFinal(
1558         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1559     // Emit: if (PreCond) - end.
1560     if (ContBlock) {
1561       CGF.EmitBranch(ContBlock);
1562       CGF.EmitBlock(ContBlock, true);
1563     }
1564   };
1565   OMPLexicalScope Scope(*this, S);
1566   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1567 }
1568 
1569 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
1570     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1571     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1572   auto &RT = CGM.getOpenMPRuntime();
1573 
1574   const Expr *IVExpr = S.getIterationVariable();
1575   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1576   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1577 
1578   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1579 
1580   // Start the loop with a block that tests the condition.
1581   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1582   EmitBlock(CondBlock);
1583   LoopStack.push(CondBlock);
1584 
1585   llvm::Value *BoolCondVal = nullptr;
1586   if (!DynamicOrOrdered) {
1587     // UB = min(UB, GlobalUB)
1588     EmitIgnoredExpr(S.getEnsureUpperBound());
1589     // IV = LB
1590     EmitIgnoredExpr(S.getInit());
1591     // IV < UB
1592     BoolCondVal = EvaluateExprAsBool(S.getCond());
1593   } else {
1594     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
1595                                     IL, LB, UB, ST);
1596   }
1597 
1598   // If there are any cleanups between here and the loop-exit scope,
1599   // create a block to stage a loop exit along.
1600   auto ExitBlock = LoopExit.getBlock();
1601   if (LoopScope.requiresCleanups())
1602     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1603 
1604   auto LoopBody = createBasicBlock("omp.dispatch.body");
1605   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1606   if (ExitBlock != LoopExit.getBlock()) {
1607     EmitBlock(ExitBlock);
1608     EmitBranchThroughCleanup(LoopExit);
1609   }
1610   EmitBlock(LoopBody);
1611 
1612   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1613   // LB for loop condition and emitted it above).
1614   if (DynamicOrOrdered)
1615     EmitIgnoredExpr(S.getInit());
1616 
1617   // Create a block for the increment.
1618   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1619   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1620 
1621   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1622   // with dynamic/guided scheduling and without ordered clause.
1623   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1624     LoopStack.setParallel(!IsMonotonic);
1625   else
1626     EmitOMPSimdInit(S, IsMonotonic);
1627 
1628   SourceLocation Loc = S.getLocStart();
1629   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
1630                    [&S, LoopExit](CodeGenFunction &CGF) {
1631                      CGF.EmitOMPLoopBody(S, LoopExit);
1632                      CGF.EmitStopPoint(&S);
1633                    },
1634                    [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
1635                      if (Ordered) {
1636                        CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
1637                            CGF, Loc, IVSize, IVSigned);
1638                      }
1639                    });
1640 
1641   EmitBlock(Continue.getBlock());
1642   BreakContinueStack.pop_back();
1643   if (!DynamicOrOrdered) {
1644     // Emit "LB = LB + Stride", "UB = UB + Stride".
1645     EmitIgnoredExpr(S.getNextLowerBound());
1646     EmitIgnoredExpr(S.getNextUpperBound());
1647   }
1648 
1649   EmitBranch(CondBlock);
1650   LoopStack.pop();
1651   // Emit the fall-through block.
1652   EmitBlock(LoopExit.getBlock());
1653 
1654   // Tell the runtime we are done.
1655   if (!DynamicOrOrdered)
1656     RT.emitForStaticFinish(*this, S.getLocEnd());
1657 
1658 }
1659 
1660 void CodeGenFunction::EmitOMPForOuterLoop(
1661     OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic,
1662     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1663     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1664   auto &RT = CGM.getOpenMPRuntime();
1665 
1666   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1667   const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
1668 
1669   assert((Ordered ||
1670           !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
1671          "static non-chunked schedule does not need outer loop");
1672 
1673   // Emit outer loop.
1674   //
1675   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1676   // When schedule(dynamic,chunk_size) is specified, the iterations are
1677   // distributed to threads in the team in chunks as the threads request them.
1678   // Each thread executes a chunk of iterations, then requests another chunk,
1679   // until no chunks remain to be distributed. Each chunk contains chunk_size
1680   // iterations, except for the last chunk to be distributed, which may have
1681   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1682   //
1683   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1684   // to threads in the team in chunks as the executing threads request them.
1685   // Each thread executes a chunk of iterations, then requests another chunk,
1686   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1687   // each chunk is proportional to the number of unassigned iterations divided
1688   // by the number of threads in the team, decreasing to 1. For a chunk_size
1689   // with value k (greater than 1), the size of each chunk is determined in the
1690   // same way, with the restriction that the chunks do not contain fewer than k
1691   // iterations (except for the last chunk to be assigned, which may have fewer
1692   // than k iterations).
1693   //
1694   // When schedule(auto) is specified, the decision regarding scheduling is
1695   // delegated to the compiler and/or runtime system. The programmer gives the
1696   // implementation the freedom to choose any possible mapping of iterations to
1697   // threads in the team.
1698   //
1699   // When schedule(runtime) is specified, the decision regarding scheduling is
1700   // deferred until run time, and the schedule and chunk size are taken from the
1701   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1702   // implementation defined
1703   //
1704   // while(__kmpc_dispatch_next(&LB, &UB)) {
1705   //   idx = LB;
1706   //   while (idx <= UB) { BODY; ++idx;
1707   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1708   //   } // inner loop
1709   // }
1710   //
1711   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1712   // When schedule(static, chunk_size) is specified, iterations are divided into
1713   // chunks of size chunk_size, and the chunks are assigned to the threads in
1714   // the team in a round-robin fashion in the order of the thread number.
1715   //
1716   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1717   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1718   //   LB = LB + ST;
1719   //   UB = UB + ST;
1720   // }
1721   //
1722 
1723   const Expr *IVExpr = S.getIterationVariable();
1724   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1725   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1726 
1727   if (DynamicOrOrdered) {
1728     llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
1729     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind,
1730                            IVSize, IVSigned, Ordered, UBVal, Chunk);
1731   } else {
1732     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1733                          Ordered, IL, LB, UB, ST, Chunk);
1734   }
1735 
1736   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
1737                    ST, IL, Chunk);
1738 }
1739 
1740 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1741     OpenMPDistScheduleClauseKind ScheduleKind,
1742     const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
1743     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1744 
1745   auto &RT = CGM.getOpenMPRuntime();
1746 
1747   // Emit outer loop.
1748   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1749   // dynamic
1750   //
1751 
1752   const Expr *IVExpr = S.getIterationVariable();
1753   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1754   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1755 
1756   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
1757                               IVSize, IVSigned, /* Ordered = */ false,
1758                               IL, LB, UB, ST, Chunk);
1759 
1760   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
1761                    S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
1762 }
1763 
1764 /// \brief Emit a helper variable and return corresponding lvalue.
1765 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1766                                const DeclRefExpr *Helper) {
1767   auto VDecl = cast<VarDecl>(Helper->getDecl());
1768   CGF.EmitVarDecl(*VDecl);
1769   return CGF.EmitLValue(Helper);
1770 }
1771 
1772 namespace {
1773   struct ScheduleKindModifiersTy {
1774     OpenMPScheduleClauseKind Kind;
1775     OpenMPScheduleClauseModifier M1;
1776     OpenMPScheduleClauseModifier M2;
1777     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
1778                             OpenMPScheduleClauseModifier M1,
1779                             OpenMPScheduleClauseModifier M2)
1780         : Kind(Kind), M1(M1), M2(M2) {}
1781   };
1782 } // namespace
1783 
1784 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
1785   // Emit the loop iteration variable.
1786   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
1787   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
1788   EmitVarDecl(*IVDecl);
1789 
1790   // Emit the iterations count variable.
1791   // If it is not a variable, Sema decided to calculate iterations count on each
1792   // iteration (e.g., it is foldable into a constant).
1793   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1794     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1795     // Emit calculation of the iterations count.
1796     EmitIgnoredExpr(S.getCalcLastIteration());
1797   }
1798 
1799   auto &RT = CGM.getOpenMPRuntime();
1800 
1801   bool HasLastprivateClause;
1802   // Check pre-condition.
1803   {
1804     OMPLoopScope PreInitScope(*this, S);
1805     // Skip the entire loop if we don't meet the precondition.
1806     // If the condition constant folds and can be elided, avoid emitting the
1807     // whole loop.
1808     bool CondConstant;
1809     llvm::BasicBlock *ContBlock = nullptr;
1810     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1811       if (!CondConstant)
1812         return false;
1813     } else {
1814       auto *ThenBlock = createBasicBlock("omp.precond.then");
1815       ContBlock = createBasicBlock("omp.precond.end");
1816       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
1817                   getProfileCount(&S));
1818       EmitBlock(ThenBlock);
1819       incrementProfileCounter(&S);
1820     }
1821 
1822     emitAlignedClause(*this, S);
1823     EmitOMPLinearClauseInit(S);
1824     // Emit helper vars inits.
1825     LValue LB =
1826         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1827     LValue UB =
1828         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1829     LValue ST =
1830         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
1831     LValue IL =
1832         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
1833 
1834     // Emit 'then' code.
1835     {
1836       OMPPrivateScope LoopScope(*this);
1837       if (EmitOMPFirstprivateClause(S, LoopScope)) {
1838         // Emit implicit barrier to synchronize threads and avoid data races on
1839         // initialization of firstprivate variables and post-update of
1840         // lastprivate variables.
1841         CGM.getOpenMPRuntime().emitBarrierCall(
1842             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1843             /*ForceSimpleCall=*/true);
1844       }
1845       EmitOMPPrivateClause(S, LoopScope);
1846       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
1847       EmitOMPReductionClauseInit(S, LoopScope);
1848       emitPrivateLoopCounters(*this, LoopScope, S.counters(),
1849                               S.private_counters());
1850       emitPrivateLinearVars(*this, S, LoopScope);
1851       (void)LoopScope.Privatize();
1852 
1853       // Detect the loop schedule kind and chunk.
1854       llvm::Value *Chunk = nullptr;
1855       OpenMPScheduleClauseKind ScheduleKind = OMPC_SCHEDULE_unknown;
1856       OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown;
1857       OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown;
1858       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
1859         ScheduleKind = C->getScheduleKind();
1860         M1 = C->getFirstScheduleModifier();
1861         M2 = C->getSecondScheduleModifier();
1862         if (const auto *Ch = C->getChunkSize()) {
1863           Chunk = EmitScalarExpr(Ch);
1864           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
1865                                        S.getIterationVariable()->getType(),
1866                                        S.getLocStart());
1867         }
1868       }
1869       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1870       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1871       const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
1872       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
1873       // If the static schedule kind is specified or if the ordered clause is
1874       // specified, and if no monotonic modifier is specified, the effect will
1875       // be as if the monotonic modifier was specified.
1876       if (RT.isStaticNonchunked(ScheduleKind,
1877                                 /* Chunked */ Chunk != nullptr) &&
1878           !Ordered) {
1879         if (isOpenMPSimdDirective(S.getDirectiveKind()))
1880           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
1881         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1882         // When no chunk_size is specified, the iteration space is divided into
1883         // chunks that are approximately equal in size, and at most one chunk is
1884         // distributed to each thread. Note that the size of the chunks is
1885         // unspecified in this case.
1886         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
1887                              IVSize, IVSigned, Ordered,
1888                              IL.getAddress(), LB.getAddress(),
1889                              UB.getAddress(), ST.getAddress());
1890         auto LoopExit =
1891             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
1892         // UB = min(UB, GlobalUB);
1893         EmitIgnoredExpr(S.getEnsureUpperBound());
1894         // IV = LB;
1895         EmitIgnoredExpr(S.getInit());
1896         // while (idx <= UB) { BODY; ++idx; }
1897         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1898                          S.getInc(),
1899                          [&S, LoopExit](CodeGenFunction &CGF) {
1900                            CGF.EmitOMPLoopBody(S, LoopExit);
1901                            CGF.EmitStopPoint(&S);
1902                          },
1903                          [](CodeGenFunction &) {});
1904         EmitBlock(LoopExit.getBlock());
1905         // Tell the runtime we are done.
1906         RT.emitForStaticFinish(*this, S.getLocStart());
1907       } else {
1908         const bool IsMonotonic = Ordered ||
1909                                  ScheduleKind == OMPC_SCHEDULE_static ||
1910                                  ScheduleKind == OMPC_SCHEDULE_unknown ||
1911                                  M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
1912                                  M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
1913         // Emit the outer loop, which requests its work chunk [LB..UB] from
1914         // runtime and runs the inner loop to process it.
1915         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
1916                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
1917                             IL.getAddress(), Chunk);
1918       }
1919       EmitOMPReductionClauseFinal(S);
1920       // Emit post-update of the reduction variables if IsLastIter != 0.
1921       emitPostUpdateForReductionClause(
1922           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
1923             return CGF.Builder.CreateIsNotNull(
1924                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
1925           });
1926       // Emit final copy of the lastprivate variables if IsLastIter != 0.
1927       if (HasLastprivateClause)
1928         EmitOMPLastprivateClauseFinal(
1929             S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
1930     }
1931     if (isOpenMPSimdDirective(S.getDirectiveKind())) {
1932       EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
1933         return CGF.Builder.CreateIsNotNull(
1934             CGF.EmitLoadOfScalar(IL, S.getLocStart()));
1935       });
1936     }
1937     emitLinearClauseFinal(*this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
1938       return CGF.Builder.CreateIsNotNull(
1939           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
1940     });
1941     // We're now done with the loop, so jump to the continuation block.
1942     if (ContBlock) {
1943       EmitBranch(ContBlock);
1944       EmitBlock(ContBlock, true);
1945     }
1946   }
1947   return HasLastprivateClause;
1948 }
1949 
1950 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
1951   bool HasLastprivates = false;
1952   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
1953                                           PrePostActionTy &) {
1954     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1955   };
1956   {
1957     OMPLexicalScope Scope(*this, S);
1958     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
1959                                                 S.hasCancel());
1960   }
1961 
1962   // Emit an implicit barrier at the end.
1963   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
1964     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1965   }
1966 }
1967 
1968 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
1969   bool HasLastprivates = false;
1970   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
1971                                           PrePostActionTy &) {
1972     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1973   };
1974   {
1975     OMPLexicalScope Scope(*this, S);
1976     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1977   }
1978 
1979   // Emit an implicit barrier at the end.
1980   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
1981     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1982   }
1983 }
1984 
1985 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
1986                                 const Twine &Name,
1987                                 llvm::Value *Init = nullptr) {
1988   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
1989   if (Init)
1990     CGF.EmitScalarInit(Init, LVal);
1991   return LVal;
1992 }
1993 
1994 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
1995   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
1996   auto *CS = dyn_cast<CompoundStmt>(Stmt);
1997   bool HasLastprivates = false;
1998   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
1999                                                     PrePostActionTy &) {
2000     auto &C = CGF.CGM.getContext();
2001     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2002     // Emit helper vars inits.
2003     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2004                                   CGF.Builder.getInt32(0));
2005     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2006                                       : CGF.Builder.getInt32(0);
2007     LValue UB =
2008         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2009     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2010                                   CGF.Builder.getInt32(1));
2011     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2012                                   CGF.Builder.getInt32(0));
2013     // Loop counter.
2014     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2015     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2016     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2017     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2018     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2019     // Generate condition for loop.
2020     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2021                         OK_Ordinary, S.getLocStart(),
2022                         /*fpContractable=*/false);
2023     // Increment for loop counter.
2024     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2025                       S.getLocStart());
2026     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2027       // Iterate through all sections and emit a switch construct:
2028       // switch (IV) {
2029       //   case 0:
2030       //     <SectionStmt[0]>;
2031       //     break;
2032       // ...
2033       //   case <NumSection> - 1:
2034       //     <SectionStmt[<NumSection> - 1]>;
2035       //     break;
2036       // }
2037       // .omp.sections.exit:
2038       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2039       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2040           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2041           CS == nullptr ? 1 : CS->size());
2042       if (CS) {
2043         unsigned CaseNumber = 0;
2044         for (auto *SubStmt : CS->children()) {
2045           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2046           CGF.EmitBlock(CaseBB);
2047           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2048           CGF.EmitStmt(SubStmt);
2049           CGF.EmitBranch(ExitBB);
2050           ++CaseNumber;
2051         }
2052       } else {
2053         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2054         CGF.EmitBlock(CaseBB);
2055         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2056         CGF.EmitStmt(Stmt);
2057         CGF.EmitBranch(ExitBB);
2058       }
2059       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2060     };
2061 
2062     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2063     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2064       // Emit implicit barrier to synchronize threads and avoid data races on
2065       // initialization of firstprivate variables and post-update of lastprivate
2066       // variables.
2067       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2068           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2069           /*ForceSimpleCall=*/true);
2070     }
2071     CGF.EmitOMPPrivateClause(S, LoopScope);
2072     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2073     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2074     (void)LoopScope.Privatize();
2075 
2076     // Emit static non-chunked loop.
2077     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2078         CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
2079         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
2080         UB.getAddress(), ST.getAddress());
2081     // UB = min(UB, GlobalUB);
2082     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2083     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2084         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2085     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2086     // IV = LB;
2087     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2088     // while (idx <= UB) { BODY; ++idx; }
2089     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2090                          [](CodeGenFunction &) {});
2091     // Tell the runtime we are done.
2092     CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
2093     CGF.EmitOMPReductionClauseFinal(S);
2094     // Emit post-update of the reduction variables if IsLastIter != 0.
2095     emitPostUpdateForReductionClause(
2096         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2097           return CGF.Builder.CreateIsNotNull(
2098               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2099         });
2100 
2101     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2102     if (HasLastprivates)
2103       CGF.EmitOMPLastprivateClauseFinal(
2104           S, CGF.Builder.CreateIsNotNull(
2105                  CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2106   };
2107 
2108   bool HasCancel = false;
2109   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2110     HasCancel = OSD->hasCancel();
2111   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2112     HasCancel = OPSD->hasCancel();
2113   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2114                                               HasCancel);
2115   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2116   // clause. Otherwise the barrier will be generated by the codegen for the
2117   // directive.
2118   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2119     // Emit implicit barrier to synchronize threads and avoid data races on
2120     // initialization of firstprivate variables.
2121     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2122                                            OMPD_unknown);
2123   }
2124 }
2125 
2126 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2127   {
2128     OMPLexicalScope Scope(*this, S);
2129     EmitSections(S);
2130   }
2131   // Emit an implicit barrier at the end.
2132   if (!S.getSingleClause<OMPNowaitClause>()) {
2133     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2134                                            OMPD_sections);
2135   }
2136 }
2137 
2138 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2139   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2140     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2141   };
2142   OMPLexicalScope Scope(*this, S);
2143   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2144                                               S.hasCancel());
2145 }
2146 
2147 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2148   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2149   llvm::SmallVector<const Expr *, 8> DestExprs;
2150   llvm::SmallVector<const Expr *, 8> SrcExprs;
2151   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2152   // Check if there are any 'copyprivate' clauses associated with this
2153   // 'single' construct.
2154   // Build a list of copyprivate variables along with helper expressions
2155   // (<source>, <destination>, <destination>=<source> expressions)
2156   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2157     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2158     DestExprs.append(C->destination_exprs().begin(),
2159                      C->destination_exprs().end());
2160     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2161     AssignmentOps.append(C->assignment_ops().begin(),
2162                          C->assignment_ops().end());
2163   }
2164   // Emit code for 'single' region along with 'copyprivate' clauses
2165   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2166     Action.Enter(CGF);
2167     OMPPrivateScope SingleScope(CGF);
2168     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2169     CGF.EmitOMPPrivateClause(S, SingleScope);
2170     (void)SingleScope.Privatize();
2171     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2172   };
2173   {
2174     OMPLexicalScope Scope(*this, S);
2175     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2176                                             CopyprivateVars, DestExprs,
2177                                             SrcExprs, AssignmentOps);
2178   }
2179   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2180   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2181   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2182     CGM.getOpenMPRuntime().emitBarrierCall(
2183         *this, S.getLocStart(),
2184         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2185   }
2186 }
2187 
2188 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2189   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2190     Action.Enter(CGF);
2191     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2192   };
2193   OMPLexicalScope Scope(*this, S);
2194   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2195 }
2196 
2197 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2198   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2199     Action.Enter(CGF);
2200     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2201   };
2202   Expr *Hint = nullptr;
2203   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2204     Hint = HintClause->getHint();
2205   OMPLexicalScope Scope(*this, S);
2206   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2207                                             S.getDirectiveName().getAsString(),
2208                                             CodeGen, S.getLocStart(), Hint);
2209 }
2210 
2211 void CodeGenFunction::EmitOMPParallelForDirective(
2212     const OMPParallelForDirective &S) {
2213   // Emit directive as a combined directive that consists of two implicit
2214   // directives: 'parallel' with 'for' directive.
2215   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2216     CGF.EmitOMPWorksharingLoop(S);
2217   };
2218   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
2219 }
2220 
2221 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2222     const OMPParallelForSimdDirective &S) {
2223   // Emit directive as a combined directive that consists of two implicit
2224   // directives: 'parallel' with 'for' directive.
2225   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2226     CGF.EmitOMPWorksharingLoop(S);
2227   };
2228   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
2229 }
2230 
2231 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2232     const OMPParallelSectionsDirective &S) {
2233   // Emit directive as a combined directive that consists of two implicit
2234   // directives: 'parallel' with 'sections' directive.
2235   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2236     CGF.EmitSections(S);
2237   };
2238   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
2239 }
2240 
2241 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2242   // Emit outlined function for task construct.
2243   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2244   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2245   auto *I = CS->getCapturedDecl()->param_begin();
2246   auto *PartId = std::next(I);
2247   // The first function argument for tasks is a thread id, the second one is a
2248   // part id (0 for tied tasks, >=0 for untied task).
2249   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2250   // Get list of private variables.
2251   llvm::SmallVector<const Expr *, 8> PrivateVars;
2252   llvm::SmallVector<const Expr *, 8> PrivateCopies;
2253   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2254     auto IRef = C->varlist_begin();
2255     for (auto *IInit : C->private_copies()) {
2256       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2257       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2258         PrivateVars.push_back(*IRef);
2259         PrivateCopies.push_back(IInit);
2260       }
2261       ++IRef;
2262     }
2263   }
2264   EmittedAsPrivate.clear();
2265   // Get list of firstprivate variables.
2266   llvm::SmallVector<const Expr *, 8> FirstprivateVars;
2267   llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
2268   llvm::SmallVector<const Expr *, 8> FirstprivateInits;
2269   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2270     auto IRef = C->varlist_begin();
2271     auto IElemInitRef = C->inits().begin();
2272     for (auto *IInit : C->private_copies()) {
2273       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2274       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2275         FirstprivateVars.push_back(*IRef);
2276         FirstprivateCopies.push_back(IInit);
2277         FirstprivateInits.push_back(*IElemInitRef);
2278       }
2279       ++IRef;
2280       ++IElemInitRef;
2281     }
2282   }
2283   // Build list of dependences.
2284   llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
2285       Dependences;
2286   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
2287     for (auto *IRef : C->varlists()) {
2288       Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2289     }
2290   }
2291   auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
2292       CodeGenFunction &CGF, PrePostActionTy &) {
2293     // Set proper addresses for generated private copies.
2294     auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
2295     {
2296       OMPPrivateScope Scope(CGF);
2297       if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
2298         auto *CopyFn = CGF.Builder.CreateLoad(
2299             CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2300         auto *PrivatesPtr = CGF.Builder.CreateLoad(
2301             CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2302         // Map privates.
2303         llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2304         llvm::SmallVector<llvm::Value *, 16> CallArgs;
2305         CallArgs.push_back(PrivatesPtr);
2306         for (auto *E : PrivateVars) {
2307           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2308           Address PrivatePtr =
2309               CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
2310           PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2311           CallArgs.push_back(PrivatePtr.getPointer());
2312         }
2313         for (auto *E : FirstprivateVars) {
2314           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2315           Address PrivatePtr =
2316               CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
2317           PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2318           CallArgs.push_back(PrivatePtr.getPointer());
2319         }
2320         CGF.EmitRuntimeCall(CopyFn, CallArgs);
2321         for (auto &&Pair : PrivatePtrs) {
2322           Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2323                               CGF.getContext().getDeclAlign(Pair.first));
2324           Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2325         }
2326       }
2327       (void)Scope.Privatize();
2328       if (*PartId) {
2329         // TODO: emit code for untied tasks.
2330       }
2331       CGF.EmitStmt(CS->getCapturedStmt());
2332     }
2333   };
2334   auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2335       S, *I, OMPD_task, CodeGen);
2336   // Check if we should emit tied or untied task.
2337   bool Tied = !S.getSingleClause<OMPUntiedClause>();
2338   // Check if the task is final
2339   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
2340   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2341     // If the condition constant folds and can be elided, try to avoid emitting
2342     // the condition and the dead arm of the if/else.
2343     auto *Cond = Clause->getCondition();
2344     bool CondConstant;
2345     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2346       Final.setInt(CondConstant);
2347     else
2348       Final.setPointer(EvaluateExprAsBool(Cond));
2349   } else {
2350     // By default the task is not final.
2351     Final.setInt(/*IntVal=*/false);
2352   }
2353   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2354   const Expr *IfCond = nullptr;
2355   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2356     if (C->getNameModifier() == OMPD_unknown ||
2357         C->getNameModifier() == OMPD_task) {
2358       IfCond = C->getCondition();
2359       break;
2360     }
2361   }
2362   OMPLexicalScope Scope(*this, S);
2363   CGM.getOpenMPRuntime().emitTaskCall(
2364       *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
2365       CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
2366       FirstprivateCopies, FirstprivateInits, Dependences);
2367 }
2368 
2369 void CodeGenFunction::EmitOMPTaskyieldDirective(
2370     const OMPTaskyieldDirective &S) {
2371   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2372 }
2373 
2374 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2375   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2376 }
2377 
2378 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2379   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2380 }
2381 
2382 void CodeGenFunction::EmitOMPTaskgroupDirective(
2383     const OMPTaskgroupDirective &S) {
2384   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2385     Action.Enter(CGF);
2386     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2387   };
2388   OMPLexicalScope Scope(*this, S);
2389   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2390 }
2391 
2392 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2393   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2394     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2395       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2396                                 FlushClause->varlist_end());
2397     }
2398     return llvm::None;
2399   }(), S.getLocStart());
2400 }
2401 
2402 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
2403   // Emit the loop iteration variable.
2404   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2405   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2406   EmitVarDecl(*IVDecl);
2407 
2408   // Emit the iterations count variable.
2409   // If it is not a variable, Sema decided to calculate iterations count on each
2410   // iteration (e.g., it is foldable into a constant).
2411   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2412     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2413     // Emit calculation of the iterations count.
2414     EmitIgnoredExpr(S.getCalcLastIteration());
2415   }
2416 
2417   auto &RT = CGM.getOpenMPRuntime();
2418 
2419   // Check pre-condition.
2420   {
2421     OMPLoopScope PreInitScope(*this, S);
2422     // Skip the entire loop if we don't meet the precondition.
2423     // If the condition constant folds and can be elided, avoid emitting the
2424     // whole loop.
2425     bool CondConstant;
2426     llvm::BasicBlock *ContBlock = nullptr;
2427     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2428       if (!CondConstant)
2429         return;
2430     } else {
2431       auto *ThenBlock = createBasicBlock("omp.precond.then");
2432       ContBlock = createBasicBlock("omp.precond.end");
2433       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2434                   getProfileCount(&S));
2435       EmitBlock(ThenBlock);
2436       incrementProfileCounter(&S);
2437     }
2438 
2439     // Emit 'then' code.
2440     {
2441       // Emit helper vars inits.
2442       LValue LB =
2443           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2444       LValue UB =
2445           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2446       LValue ST =
2447           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2448       LValue IL =
2449           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2450 
2451       OMPPrivateScope LoopScope(*this);
2452       emitPrivateLoopCounters(*this, LoopScope, S.counters(),
2453                               S.private_counters());
2454       (void)LoopScope.Privatize();
2455 
2456       // Detect the distribute schedule kind and chunk.
2457       llvm::Value *Chunk = nullptr;
2458       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
2459       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
2460         ScheduleKind = C->getDistScheduleKind();
2461         if (const auto *Ch = C->getChunkSize()) {
2462           Chunk = EmitScalarExpr(Ch);
2463           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2464           S.getIterationVariable()->getType(),
2465           S.getLocStart());
2466         }
2467       }
2468       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2469       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2470 
2471       // OpenMP [2.10.8, distribute Construct, Description]
2472       // If dist_schedule is specified, kind must be static. If specified,
2473       // iterations are divided into chunks of size chunk_size, chunks are
2474       // assigned to the teams of the league in a round-robin fashion in the
2475       // order of the team number. When no chunk_size is specified, the
2476       // iteration space is divided into chunks that are approximately equal
2477       // in size, and at most one chunk is distributed to each team of the
2478       // league. The size of the chunks is unspecified in this case.
2479       if (RT.isStaticNonchunked(ScheduleKind,
2480                                 /* Chunked */ Chunk != nullptr)) {
2481         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
2482                              IVSize, IVSigned, /* Ordered = */ false,
2483                              IL.getAddress(), LB.getAddress(),
2484                              UB.getAddress(), ST.getAddress());
2485         auto LoopExit =
2486             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2487         // UB = min(UB, GlobalUB);
2488         EmitIgnoredExpr(S.getEnsureUpperBound());
2489         // IV = LB;
2490         EmitIgnoredExpr(S.getInit());
2491         // while (idx <= UB) { BODY; ++idx; }
2492         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2493                          S.getInc(),
2494                          [&S, LoopExit](CodeGenFunction &CGF) {
2495                            CGF.EmitOMPLoopBody(S, LoopExit);
2496                            CGF.EmitStopPoint(&S);
2497                          },
2498                          [](CodeGenFunction &) {});
2499         EmitBlock(LoopExit.getBlock());
2500         // Tell the runtime we are done.
2501         RT.emitForStaticFinish(*this, S.getLocStart());
2502       } else {
2503         // Emit the outer loop, which requests its work chunk [LB..UB] from
2504         // runtime and runs the inner loop to process it.
2505         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
2506                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2507                             IL.getAddress(), Chunk);
2508       }
2509     }
2510 
2511     // We're now done with the loop, so jump to the continuation block.
2512     if (ContBlock) {
2513       EmitBranch(ContBlock);
2514       EmitBlock(ContBlock, true);
2515     }
2516   }
2517 }
2518 
2519 void CodeGenFunction::EmitOMPDistributeDirective(
2520     const OMPDistributeDirective &S) {
2521   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2522     CGF.EmitOMPDistributeLoop(S);
2523   };
2524   OMPLexicalScope Scope(*this, S);
2525   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2526                                               false);
2527 }
2528 
2529 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
2530                                                    const CapturedStmt *S) {
2531   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
2532   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
2533   CGF.CapturedStmtInfo = &CapStmtInfo;
2534   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
2535   Fn->addFnAttr(llvm::Attribute::NoInline);
2536   return Fn;
2537 }
2538 
2539 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
2540   if (!S.getAssociatedStmt())
2541     return;
2542   auto *C = S.getSingleClause<OMPSIMDClause>();
2543   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
2544                                  PrePostActionTy &Action) {
2545     if (C) {
2546       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2547       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2548       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
2549       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
2550       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
2551     } else {
2552       Action.Enter(CGF);
2553       CGF.EmitStmt(
2554           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2555     }
2556   };
2557   OMPLexicalScope Scope(*this, S);
2558   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
2559 }
2560 
2561 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
2562                                          QualType SrcType, QualType DestType,
2563                                          SourceLocation Loc) {
2564   assert(CGF.hasScalarEvaluationKind(DestType) &&
2565          "DestType must have scalar evaluation kind.");
2566   assert(!Val.isAggregate() && "Must be a scalar or complex.");
2567   return Val.isScalar()
2568              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
2569                                         Loc)
2570              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
2571                                                  DestType, Loc);
2572 }
2573 
2574 static CodeGenFunction::ComplexPairTy
2575 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
2576                       QualType DestType, SourceLocation Loc) {
2577   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
2578          "DestType must have complex evaluation kind.");
2579   CodeGenFunction::ComplexPairTy ComplexVal;
2580   if (Val.isScalar()) {
2581     // Convert the input element to the element type of the complex.
2582     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2583     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
2584                                               DestElementType, Loc);
2585     ComplexVal = CodeGenFunction::ComplexPairTy(
2586         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
2587   } else {
2588     assert(Val.isComplex() && "Must be a scalar or complex.");
2589     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
2590     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2591     ComplexVal.first = CGF.EmitScalarConversion(
2592         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
2593     ComplexVal.second = CGF.EmitScalarConversion(
2594         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
2595   }
2596   return ComplexVal;
2597 }
2598 
2599 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
2600                                   LValue LVal, RValue RVal) {
2601   if (LVal.isGlobalReg()) {
2602     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
2603   } else {
2604     CGF.EmitAtomicStore(RVal, LVal,
2605                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2606                                  : llvm::AtomicOrdering::Monotonic,
2607                         LVal.isVolatile(), /*IsInit=*/false);
2608   }
2609 }
2610 
2611 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
2612                                          QualType RValTy, SourceLocation Loc) {
2613   switch (getEvaluationKind(LVal.getType())) {
2614   case TEK_Scalar:
2615     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
2616                                *this, RVal, RValTy, LVal.getType(), Loc)),
2617                            LVal);
2618     break;
2619   case TEK_Complex:
2620     EmitStoreOfComplex(
2621         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
2622         /*isInit=*/false);
2623     break;
2624   case TEK_Aggregate:
2625     llvm_unreachable("Must be a scalar or complex.");
2626   }
2627 }
2628 
2629 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
2630                                   const Expr *X, const Expr *V,
2631                                   SourceLocation Loc) {
2632   // v = x;
2633   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
2634   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
2635   LValue XLValue = CGF.EmitLValue(X);
2636   LValue VLValue = CGF.EmitLValue(V);
2637   RValue Res = XLValue.isGlobalReg()
2638                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
2639                    : CGF.EmitAtomicLoad(
2640                          XLValue, Loc,
2641                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2642                                   : llvm::AtomicOrdering::Monotonic,
2643                          XLValue.isVolatile());
2644   // OpenMP, 2.12.6, atomic Construct
2645   // Any atomic construct with a seq_cst clause forces the atomically
2646   // performed operation to include an implicit flush operation without a
2647   // list.
2648   if (IsSeqCst)
2649     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2650   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
2651 }
2652 
2653 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
2654                                    const Expr *X, const Expr *E,
2655                                    SourceLocation Loc) {
2656   // x = expr;
2657   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
2658   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
2659   // OpenMP, 2.12.6, atomic Construct
2660   // Any atomic construct with a seq_cst clause forces the atomically
2661   // performed operation to include an implicit flush operation without a
2662   // list.
2663   if (IsSeqCst)
2664     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2665 }
2666 
2667 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
2668                                                 RValue Update,
2669                                                 BinaryOperatorKind BO,
2670                                                 llvm::AtomicOrdering AO,
2671                                                 bool IsXLHSInRHSPart) {
2672   auto &Context = CGF.CGM.getContext();
2673   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
2674   // expression is simple and atomic is allowed for the given type for the
2675   // target platform.
2676   if (BO == BO_Comma || !Update.isScalar() ||
2677       !Update.getScalarVal()->getType()->isIntegerTy() ||
2678       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
2679                         (Update.getScalarVal()->getType() !=
2680                          X.getAddress().getElementType())) ||
2681       !X.getAddress().getElementType()->isIntegerTy() ||
2682       !Context.getTargetInfo().hasBuiltinAtomic(
2683           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
2684     return std::make_pair(false, RValue::get(nullptr));
2685 
2686   llvm::AtomicRMWInst::BinOp RMWOp;
2687   switch (BO) {
2688   case BO_Add:
2689     RMWOp = llvm::AtomicRMWInst::Add;
2690     break;
2691   case BO_Sub:
2692     if (!IsXLHSInRHSPart)
2693       return std::make_pair(false, RValue::get(nullptr));
2694     RMWOp = llvm::AtomicRMWInst::Sub;
2695     break;
2696   case BO_And:
2697     RMWOp = llvm::AtomicRMWInst::And;
2698     break;
2699   case BO_Or:
2700     RMWOp = llvm::AtomicRMWInst::Or;
2701     break;
2702   case BO_Xor:
2703     RMWOp = llvm::AtomicRMWInst::Xor;
2704     break;
2705   case BO_LT:
2706     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2707                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
2708                                    : llvm::AtomicRMWInst::Max)
2709                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
2710                                    : llvm::AtomicRMWInst::UMax);
2711     break;
2712   case BO_GT:
2713     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2714                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
2715                                    : llvm::AtomicRMWInst::Min)
2716                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
2717                                    : llvm::AtomicRMWInst::UMin);
2718     break;
2719   case BO_Assign:
2720     RMWOp = llvm::AtomicRMWInst::Xchg;
2721     break;
2722   case BO_Mul:
2723   case BO_Div:
2724   case BO_Rem:
2725   case BO_Shl:
2726   case BO_Shr:
2727   case BO_LAnd:
2728   case BO_LOr:
2729     return std::make_pair(false, RValue::get(nullptr));
2730   case BO_PtrMemD:
2731   case BO_PtrMemI:
2732   case BO_LE:
2733   case BO_GE:
2734   case BO_EQ:
2735   case BO_NE:
2736   case BO_AddAssign:
2737   case BO_SubAssign:
2738   case BO_AndAssign:
2739   case BO_OrAssign:
2740   case BO_XorAssign:
2741   case BO_MulAssign:
2742   case BO_DivAssign:
2743   case BO_RemAssign:
2744   case BO_ShlAssign:
2745   case BO_ShrAssign:
2746   case BO_Comma:
2747     llvm_unreachable("Unsupported atomic update operation");
2748   }
2749   auto *UpdateVal = Update.getScalarVal();
2750   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
2751     UpdateVal = CGF.Builder.CreateIntCast(
2752         IC, X.getAddress().getElementType(),
2753         X.getType()->hasSignedIntegerRepresentation());
2754   }
2755   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
2756   return std::make_pair(true, RValue::get(Res));
2757 }
2758 
2759 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
2760     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
2761     llvm::AtomicOrdering AO, SourceLocation Loc,
2762     const llvm::function_ref<RValue(RValue)> &CommonGen) {
2763   // Update expressions are allowed to have the following forms:
2764   // x binop= expr; -> xrval + expr;
2765   // x++, ++x -> xrval + 1;
2766   // x--, --x -> xrval - 1;
2767   // x = x binop expr; -> xrval binop expr
2768   // x = expr Op x; - > expr binop xrval;
2769   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
2770   if (!Res.first) {
2771     if (X.isGlobalReg()) {
2772       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
2773       // 'xrval'.
2774       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
2775     } else {
2776       // Perform compare-and-swap procedure.
2777       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
2778     }
2779   }
2780   return Res;
2781 }
2782 
2783 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
2784                                     const Expr *X, const Expr *E,
2785                                     const Expr *UE, bool IsXLHSInRHSPart,
2786                                     SourceLocation Loc) {
2787   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2788          "Update expr in 'atomic update' must be a binary operator.");
2789   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2790   // Update expressions are allowed to have the following forms:
2791   // x binop= expr; -> xrval + expr;
2792   // x++, ++x -> xrval + 1;
2793   // x--, --x -> xrval - 1;
2794   // x = x binop expr; -> xrval binop expr
2795   // x = expr Op x; - > expr binop xrval;
2796   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
2797   LValue XLValue = CGF.EmitLValue(X);
2798   RValue ExprRValue = CGF.EmitAnyExpr(E);
2799   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2800                      : llvm::AtomicOrdering::Monotonic;
2801   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2802   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2803   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2804   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2805   auto Gen =
2806       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
2807         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2808         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2809         return CGF.EmitAnyExpr(UE);
2810       };
2811   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
2812       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2813   // OpenMP, 2.12.6, atomic Construct
2814   // Any atomic construct with a seq_cst clause forces the atomically
2815   // performed operation to include an implicit flush operation without a
2816   // list.
2817   if (IsSeqCst)
2818     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2819 }
2820 
2821 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
2822                             QualType SourceType, QualType ResType,
2823                             SourceLocation Loc) {
2824   switch (CGF.getEvaluationKind(ResType)) {
2825   case TEK_Scalar:
2826     return RValue::get(
2827         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
2828   case TEK_Complex: {
2829     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
2830     return RValue::getComplex(Res.first, Res.second);
2831   }
2832   case TEK_Aggregate:
2833     break;
2834   }
2835   llvm_unreachable("Must be a scalar or complex.");
2836 }
2837 
2838 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
2839                                      bool IsPostfixUpdate, const Expr *V,
2840                                      const Expr *X, const Expr *E,
2841                                      const Expr *UE, bool IsXLHSInRHSPart,
2842                                      SourceLocation Loc) {
2843   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
2844   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
2845   RValue NewVVal;
2846   LValue VLValue = CGF.EmitLValue(V);
2847   LValue XLValue = CGF.EmitLValue(X);
2848   RValue ExprRValue = CGF.EmitAnyExpr(E);
2849   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2850                      : llvm::AtomicOrdering::Monotonic;
2851   QualType NewVValType;
2852   if (UE) {
2853     // 'x' is updated with some additional value.
2854     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2855            "Update expr in 'atomic capture' must be a binary operator.");
2856     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2857     // Update expressions are allowed to have the following forms:
2858     // x binop= expr; -> xrval + expr;
2859     // x++, ++x -> xrval + 1;
2860     // x--, --x -> xrval - 1;
2861     // x = x binop expr; -> xrval binop expr
2862     // x = expr Op x; - > expr binop xrval;
2863     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2864     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2865     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2866     NewVValType = XRValExpr->getType();
2867     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2868     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
2869                   IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
2870       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2871       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2872       RValue Res = CGF.EmitAnyExpr(UE);
2873       NewVVal = IsPostfixUpdate ? XRValue : Res;
2874       return Res;
2875     };
2876     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
2877         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2878     if (Res.first) {
2879       // 'atomicrmw' instruction was generated.
2880       if (IsPostfixUpdate) {
2881         // Use old value from 'atomicrmw'.
2882         NewVVal = Res.second;
2883       } else {
2884         // 'atomicrmw' does not provide new value, so evaluate it using old
2885         // value of 'x'.
2886         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2887         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
2888         NewVVal = CGF.EmitAnyExpr(UE);
2889       }
2890     }
2891   } else {
2892     // 'x' is simply rewritten with some 'expr'.
2893     NewVValType = X->getType().getNonReferenceType();
2894     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
2895                                X->getType().getNonReferenceType(), Loc);
2896     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
2897       NewVVal = XRValue;
2898       return ExprRValue;
2899     };
2900     // Try to perform atomicrmw xchg, otherwise simple exchange.
2901     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
2902         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
2903         Loc, Gen);
2904     if (Res.first) {
2905       // 'atomicrmw' instruction was generated.
2906       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
2907     }
2908   }
2909   // Emit post-update store to 'v' of old/new 'x' value.
2910   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
2911   // OpenMP, 2.12.6, atomic Construct
2912   // Any atomic construct with a seq_cst clause forces the atomically
2913   // performed operation to include an implicit flush operation without a
2914   // list.
2915   if (IsSeqCst)
2916     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2917 }
2918 
2919 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
2920                               bool IsSeqCst, bool IsPostfixUpdate,
2921                               const Expr *X, const Expr *V, const Expr *E,
2922                               const Expr *UE, bool IsXLHSInRHSPart,
2923                               SourceLocation Loc) {
2924   switch (Kind) {
2925   case OMPC_read:
2926     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
2927     break;
2928   case OMPC_write:
2929     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
2930     break;
2931   case OMPC_unknown:
2932   case OMPC_update:
2933     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
2934     break;
2935   case OMPC_capture:
2936     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
2937                              IsXLHSInRHSPart, Loc);
2938     break;
2939   case OMPC_if:
2940   case OMPC_final:
2941   case OMPC_num_threads:
2942   case OMPC_private:
2943   case OMPC_firstprivate:
2944   case OMPC_lastprivate:
2945   case OMPC_reduction:
2946   case OMPC_safelen:
2947   case OMPC_simdlen:
2948   case OMPC_collapse:
2949   case OMPC_default:
2950   case OMPC_seq_cst:
2951   case OMPC_shared:
2952   case OMPC_linear:
2953   case OMPC_aligned:
2954   case OMPC_copyin:
2955   case OMPC_copyprivate:
2956   case OMPC_flush:
2957   case OMPC_proc_bind:
2958   case OMPC_schedule:
2959   case OMPC_ordered:
2960   case OMPC_nowait:
2961   case OMPC_untied:
2962   case OMPC_threadprivate:
2963   case OMPC_depend:
2964   case OMPC_mergeable:
2965   case OMPC_device:
2966   case OMPC_threads:
2967   case OMPC_simd:
2968   case OMPC_map:
2969   case OMPC_num_teams:
2970   case OMPC_thread_limit:
2971   case OMPC_priority:
2972   case OMPC_grainsize:
2973   case OMPC_nogroup:
2974   case OMPC_num_tasks:
2975   case OMPC_hint:
2976   case OMPC_dist_schedule:
2977   case OMPC_defaultmap:
2978   case OMPC_uniform:
2979     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
2980   }
2981 }
2982 
2983 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
2984   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
2985   OpenMPClauseKind Kind = OMPC_unknown;
2986   for (auto *C : S.clauses()) {
2987     // Find first clause (skip seq_cst clause, if it is first).
2988     if (C->getClauseKind() != OMPC_seq_cst) {
2989       Kind = C->getClauseKind();
2990       break;
2991     }
2992   }
2993 
2994   const auto *CS =
2995       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
2996   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
2997     enterFullExpression(EWC);
2998   }
2999   // Processing for statements under 'atomic capture'.
3000   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3001     for (const auto *C : Compound->body()) {
3002       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3003         enterFullExpression(EWC);
3004       }
3005     }
3006   }
3007 
3008   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3009                                             PrePostActionTy &) {
3010     CGF.EmitStopPoint(CS);
3011     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3012                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3013                       S.isXLHSInRHSPart(), S.getLocStart());
3014   };
3015   OMPLexicalScope Scope(*this, S);
3016   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3017 }
3018 
3019 std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
3020 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
3021     CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
3022     bool IsOffloadEntry) {
3023   llvm::Function *OutlinedFn = nullptr;
3024   llvm::Constant *OutlinedFnID = nullptr;
3025   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3026     OMPPrivateScope PrivateScope(CGF);
3027     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3028     CGF.EmitOMPPrivateClause(S, PrivateScope);
3029     (void)PrivateScope.Privatize();
3030 
3031     Action.Enter(CGF);
3032     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3033   };
3034   // Emit target region as a standalone region.
3035   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3036       S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
3037   return std::make_pair(OutlinedFn, OutlinedFnID);
3038 }
3039 
3040 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3041   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3042 
3043   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3044   GenerateOpenMPCapturedVars(CS, CapturedVars);
3045 
3046   llvm::Function *Fn = nullptr;
3047   llvm::Constant *FnID = nullptr;
3048 
3049   // Check if we have any if clause associated with the directive.
3050   const Expr *IfCond = nullptr;
3051 
3052   if (auto *C = S.getSingleClause<OMPIfClause>()) {
3053     IfCond = C->getCondition();
3054   }
3055 
3056   // Check if we have any device clause associated with the directive.
3057   const Expr *Device = nullptr;
3058   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3059     Device = C->getDevice();
3060   }
3061 
3062   // Check if we have an if clause whose conditional always evaluates to false
3063   // or if we do not have any targets specified. If so the target region is not
3064   // an offload entry point.
3065   bool IsOffloadEntry = true;
3066   if (IfCond) {
3067     bool Val;
3068     if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3069       IsOffloadEntry = false;
3070   }
3071   if (CGM.getLangOpts().OMPTargetTriples.empty())
3072     IsOffloadEntry = false;
3073 
3074   assert(CurFuncDecl && "No parent declaration for target region!");
3075   StringRef ParentName;
3076   // In case we have Ctors/Dtors we use the complete type variant to produce
3077   // the mangling of the device outlined kernel.
3078   if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
3079     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3080   else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
3081     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3082   else
3083     ParentName =
3084         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
3085 
3086   std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
3087       CGM, S, ParentName, IsOffloadEntry);
3088   OMPLexicalScope Scope(*this, S);
3089   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
3090                                         CapturedVars);
3091 }
3092 
3093 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3094                                         const OMPExecutableDirective &S,
3095                                         OpenMPDirectiveKind InnermostKind,
3096                                         const RegionCodeGenTy &CodeGen) {
3097   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3098   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
3099       emitParallelOrTeamsOutlinedFunction(S,
3100           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3101 
3102   const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
3103   const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
3104   const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
3105   if (NT || TL) {
3106     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3107     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3108 
3109     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3110                                                   S.getLocStart());
3111   }
3112 
3113   OMPLexicalScope Scope(CGF, S);
3114   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3115   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3116   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3117                                            CapturedVars);
3118 }
3119 
3120 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3121   // Emit parallel region as a standalone region.
3122   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3123     OMPPrivateScope PrivateScope(CGF);
3124     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3125     CGF.EmitOMPPrivateClause(S, PrivateScope);
3126     (void)PrivateScope.Privatize();
3127     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3128   };
3129   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3130 }
3131 
3132 void CodeGenFunction::EmitOMPCancellationPointDirective(
3133     const OMPCancellationPointDirective &S) {
3134   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3135                                                    S.getCancelRegion());
3136 }
3137 
3138 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3139   const Expr *IfCond = nullptr;
3140   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3141     if (C->getNameModifier() == OMPD_unknown ||
3142         C->getNameModifier() == OMPD_cancel) {
3143       IfCond = C->getCondition();
3144       break;
3145     }
3146   }
3147   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3148                                         S.getCancelRegion());
3149 }
3150 
3151 CodeGenFunction::JumpDest
3152 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3153   if (Kind == OMPD_parallel || Kind == OMPD_task)
3154     return ReturnBlock;
3155   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3156          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
3157   return BreakContinueStack.back().BreakBlock;
3158 }
3159 
3160 // Generate the instructions for '#pragma omp target data' directive.
3161 void CodeGenFunction::EmitOMPTargetDataDirective(
3162     const OMPTargetDataDirective &S) {
3163   // emit the code inside the construct for now
3164   OMPLexicalScope Scope(*this, S);
3165   CGM.getOpenMPRuntime().emitInlinedDirective(
3166       *this, OMPD_target_data, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3167         CGF.EmitStmt(
3168             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3169       });
3170 }
3171 
3172 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
3173     const OMPTargetEnterDataDirective &S) {
3174   // TODO: codegen for target enter data.
3175 }
3176 
3177 void CodeGenFunction::EmitOMPTargetExitDataDirective(
3178     const OMPTargetExitDataDirective &S) {
3179   // TODO: codegen for target exit data.
3180 }
3181 
3182 void CodeGenFunction::EmitOMPTargetParallelDirective(
3183     const OMPTargetParallelDirective &S) {
3184   // TODO: codegen for target parallel.
3185 }
3186 
3187 void CodeGenFunction::EmitOMPTargetParallelForDirective(
3188     const OMPTargetParallelForDirective &S) {
3189   // TODO: codegen for target parallel for.
3190 }
3191 
3192 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
3193   // emit the code inside the construct for now
3194   OMPLexicalScope Scope(*this, S);
3195   CGM.getOpenMPRuntime().emitInlinedDirective(
3196       *this, OMPD_taskloop, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3197         OMPLoopScope PreInitScope(CGF, S);
3198         CGF.EmitStmt(
3199             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3200       });
3201 }
3202 
3203 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
3204     const OMPTaskLoopSimdDirective &S) {
3205   // emit the code inside the construct for now
3206   OMPLexicalScope Scope(*this, S);
3207   CGM.getOpenMPRuntime().emitInlinedDirective(
3208       *this, OMPD_taskloop_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3209         OMPLoopScope PreInitScope(CGF, S);
3210         CGF.EmitStmt(
3211             cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3212       });
3213 }
3214