1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
25 
26 namespace {
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47   CodeGenFunction::OMPPrivateScope InlinedShareds;
48 
49   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50     return CGF.LambdaCaptureFields.lookup(VD) ||
51            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
53   }
54 
55 public:
56   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57                   bool AsInlined = false)
58       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
59         InlinedShareds(CGF) {
60     emitPreInitStmt(CGF, S);
61     if (AsInlined) {
62       if (S.hasAssociatedStmt()) {
63         auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
64         for (auto &C : CS->captures()) {
65           if (C.capturesVariable() || C.capturesVariableByCopy()) {
66             auto *VD = C.getCapturedVar();
67             DeclRefExpr DRE(const_cast<VarDecl *>(VD),
68                             isCapturedVar(CGF, VD) ||
69                                 (CGF.CapturedStmtInfo &&
70                                  InlinedShareds.isGlobalVarCaptured(VD)),
71                             VD->getType().getNonReferenceType(), VK_LValue,
72                             SourceLocation());
73             InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
74               return CGF.EmitLValue(&DRE).getAddress();
75             });
76           }
77         }
78         (void)InlinedShareds.Privatize();
79       }
80     }
81   }
82 };
83 
84 /// Private scope for OpenMP loop-based directives, that supports capturing
85 /// of used expression from loop statement.
86 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
87   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
88     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
89       if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
90         for (const auto *I : PreInits->decls())
91           CGF.EmitVarDecl(cast<VarDecl>(*I));
92       }
93     }
94   }
95 
96 public:
97   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
98       : CodeGenFunction::RunCleanupsScope(CGF) {
99     emitPreInitStmt(CGF, S);
100   }
101 };
102 
103 } // namespace
104 
105 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
106   auto &C = getContext();
107   llvm::Value *Size = nullptr;
108   auto SizeInChars = C.getTypeSizeInChars(Ty);
109   if (SizeInChars.isZero()) {
110     // getTypeSizeInChars() returns 0 for a VLA.
111     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
112       llvm::Value *ArraySize;
113       std::tie(ArraySize, Ty) = getVLASize(VAT);
114       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
115     }
116     SizeInChars = C.getTypeSizeInChars(Ty);
117     if (SizeInChars.isZero())
118       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
119     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
120   } else
121     Size = CGM.getSize(SizeInChars);
122   return Size;
123 }
124 
125 void CodeGenFunction::GenerateOpenMPCapturedVars(
126     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
127   const RecordDecl *RD = S.getCapturedRecordDecl();
128   auto CurField = RD->field_begin();
129   auto CurCap = S.captures().begin();
130   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
131                                                  E = S.capture_init_end();
132        I != E; ++I, ++CurField, ++CurCap) {
133     if (CurField->hasCapturedVLAType()) {
134       auto VAT = CurField->getCapturedVLAType();
135       auto *Val = VLASizeMap[VAT->getSizeExpr()];
136       CapturedVars.push_back(Val);
137     } else if (CurCap->capturesThis())
138       CapturedVars.push_back(CXXThisValue);
139     else if (CurCap->capturesVariableByCopy())
140       CapturedVars.push_back(
141           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
142     else {
143       assert(CurCap->capturesVariable() && "Expected capture by reference.");
144       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
145     }
146   }
147 }
148 
149 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
150                                     StringRef Name, LValue AddrLV,
151                                     bool isReferenceType = false) {
152   ASTContext &Ctx = CGF.getContext();
153 
154   auto *CastedPtr = CGF.EmitScalarConversion(
155       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
156       Ctx.getPointerType(DstType), SourceLocation());
157   auto TmpAddr =
158       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
159           .getAddress();
160 
161   // If we are dealing with references we need to return the address of the
162   // reference instead of the reference of the value.
163   if (isReferenceType) {
164     QualType RefType = Ctx.getLValueReferenceType(DstType);
165     auto *RefVal = TmpAddr.getPointer();
166     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
167     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
168     CGF.EmitScalarInit(RefVal, TmpLVal);
169   }
170 
171   return TmpAddr;
172 }
173 
174 llvm::Function *
175 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
176                                                     bool CastValToPtr) {
177   assert(
178       CapturedStmtInfo &&
179       "CapturedStmtInfo should be set when generating the captured function");
180   const CapturedDecl *CD = S.getCapturedDecl();
181   const RecordDecl *RD = S.getCapturedRecordDecl();
182   assert(CD->hasBody() && "missing CapturedDecl body");
183 
184   // Build the argument list.
185   ASTContext &Ctx = CGM.getContext();
186   FunctionArgList Args;
187   Args.append(CD->param_begin(),
188               std::next(CD->param_begin(), CD->getContextParamPosition()));
189   auto I = S.captures().begin();
190   for (auto *FD : RD->fields()) {
191     QualType ArgType = FD->getType();
192     IdentifierInfo *II = nullptr;
193     VarDecl *CapVar = nullptr;
194 
195     // If this is a capture by copy and the type is not a pointer, the outlined
196     // function argument type should be uintptr and the value properly casted to
197     // uintptr. This is necessary given that the runtime library is only able to
198     // deal with pointers. We can pass in the same way the VLA type sizes to the
199     // outlined function.
200     if (CastValToPtr) {
201       if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
202           I->capturesVariableArrayType())
203         ArgType = Ctx.getUIntPtrType();
204     }
205 
206     if (I->capturesVariable() || I->capturesVariableByCopy()) {
207       CapVar = I->getCapturedVar();
208       II = CapVar->getIdentifier();
209     } else if (I->capturesThis())
210       II = &getContext().Idents.get("this");
211     else {
212       assert(I->capturesVariableArrayType());
213       II = &getContext().Idents.get("vla");
214     }
215     if (ArgType->isVariablyModifiedType())
216       ArgType = getContext().getVariableArrayDecayedType(ArgType);
217     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
218                                              FD->getLocation(), II, ArgType));
219     ++I;
220   }
221   Args.append(
222       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
223       CD->param_end());
224 
225   // Create the function declaration.
226   FunctionType::ExtInfo ExtInfo;
227   const CGFunctionInfo &FuncInfo =
228       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
229   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
230 
231   llvm::Function *F = llvm::Function::Create(
232       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
233       CapturedStmtInfo->getHelperName(), &CGM.getModule());
234   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
235   if (CD->isNothrow())
236     F->addFnAttr(llvm::Attribute::NoUnwind);
237 
238   // Generate the function.
239   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
240                 CD->getBody()->getLocStart());
241   unsigned Cnt = CD->getContextParamPosition();
242   I = S.captures().begin();
243   for (auto *FD : RD->fields()) {
244     // If we are capturing a pointer by copy we don't need to do anything, just
245     // use the value that we get from the arguments.
246     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
247       setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
248       ++Cnt;
249       ++I;
250       continue;
251     }
252 
253     LValue ArgLVal =
254         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
255                        AlignmentSource::Decl);
256     if (FD->hasCapturedVLAType()) {
257       LValue CastedArgLVal =
258           CastValToPtr
259               ? MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
260                                                     Args[Cnt]->getName(),
261                                                     ArgLVal),
262                                FD->getType(), AlignmentSource::Decl)
263               : ArgLVal;
264       auto *ExprArg =
265           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
266       auto VAT = FD->getCapturedVLAType();
267       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
268     } else if (I->capturesVariable()) {
269       auto *Var = I->getCapturedVar();
270       QualType VarTy = Var->getType();
271       Address ArgAddr = ArgLVal.getAddress();
272       if (!VarTy->isReferenceType()) {
273         ArgAddr = EmitLoadOfReference(
274             ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
275       }
276       setAddrOfLocalVar(
277           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
278     } else if (I->capturesVariableByCopy()) {
279       assert(!FD->getType()->isAnyPointerType() &&
280              "Not expecting a captured pointer.");
281       auto *Var = I->getCapturedVar();
282       QualType VarTy = Var->getType();
283       if (!CastValToPtr && VarTy->isReferenceType()) {
284         Address Temp = CreateMemTemp(VarTy);
285         Builder.CreateStore(ArgLVal.getPointer(), Temp);
286         ArgLVal = MakeAddrLValue(Temp, VarTy);
287       }
288       setAddrOfLocalVar(Var, CastValToPtr ? castValueFromUintptr(
289                                                 *this, FD->getType(),
290                                                 Args[Cnt]->getName(), ArgLVal,
291                                                 VarTy->isReferenceType())
292                                           : ArgLVal.getAddress());
293     } else {
294       // If 'this' is captured, load it into CXXThisValue.
295       assert(I->capturesThis());
296       CXXThisValue =
297           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
298     }
299     ++Cnt;
300     ++I;
301   }
302 
303   PGO.assignRegionCounters(GlobalDecl(CD), F);
304   CapturedStmtInfo->EmitBody(*this, CD->getBody());
305   FinishFunction(CD->getBodyRBrace());
306 
307   return F;
308 }
309 
310 //===----------------------------------------------------------------------===//
311 //                              OpenMP Directive Emission
312 //===----------------------------------------------------------------------===//
313 void CodeGenFunction::EmitOMPAggregateAssign(
314     Address DestAddr, Address SrcAddr, QualType OriginalType,
315     const llvm::function_ref<void(Address, Address)> &CopyGen) {
316   // Perform element-by-element initialization.
317   QualType ElementTy;
318 
319   // Drill down to the base element type on both arrays.
320   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
321   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
322   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
323 
324   auto SrcBegin = SrcAddr.getPointer();
325   auto DestBegin = DestAddr.getPointer();
326   // Cast from pointer to array type to pointer to single element.
327   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
328   // The basic structure here is a while-do loop.
329   auto BodyBB = createBasicBlock("omp.arraycpy.body");
330   auto DoneBB = createBasicBlock("omp.arraycpy.done");
331   auto IsEmpty =
332       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
333   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
334 
335   // Enter the loop body, making that address the current address.
336   auto EntryBB = Builder.GetInsertBlock();
337   EmitBlock(BodyBB);
338 
339   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
340 
341   llvm::PHINode *SrcElementPHI =
342     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
343   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
344   Address SrcElementCurrent =
345       Address(SrcElementPHI,
346               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
347 
348   llvm::PHINode *DestElementPHI =
349     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
350   DestElementPHI->addIncoming(DestBegin, EntryBB);
351   Address DestElementCurrent =
352     Address(DestElementPHI,
353             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
354 
355   // Emit copy.
356   CopyGen(DestElementCurrent, SrcElementCurrent);
357 
358   // Shift the address forward by one element.
359   auto DestElementNext = Builder.CreateConstGEP1_32(
360       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
361   auto SrcElementNext = Builder.CreateConstGEP1_32(
362       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
363   // Check whether we've reached the end.
364   auto Done =
365       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
366   Builder.CreateCondBr(Done, DoneBB, BodyBB);
367   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
368   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
369 
370   // Done.
371   EmitBlock(DoneBB, /*IsFinished=*/true);
372 }
373 
374 /// Check if the combiner is a call to UDR combiner and if it is so return the
375 /// UDR decl used for reduction.
376 static const OMPDeclareReductionDecl *
377 getReductionInit(const Expr *ReductionOp) {
378   if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
379     if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
380       if (auto *DRE =
381               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
382         if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
383           return DRD;
384   return nullptr;
385 }
386 
387 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
388                                              const OMPDeclareReductionDecl *DRD,
389                                              const Expr *InitOp,
390                                              Address Private, Address Original,
391                                              QualType Ty) {
392   if (DRD->getInitializer()) {
393     std::pair<llvm::Function *, llvm::Function *> Reduction =
394         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
395     auto *CE = cast<CallExpr>(InitOp);
396     auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
397     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
398     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
399     auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
400     auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
401     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
402     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
403                             [=]() -> Address { return Private; });
404     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
405                             [=]() -> Address { return Original; });
406     (void)PrivateScope.Privatize();
407     RValue Func = RValue::get(Reduction.second);
408     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
409     CGF.EmitIgnoredExpr(InitOp);
410   } else {
411     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
412     auto *GV = new llvm::GlobalVariable(
413         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
414         llvm::GlobalValue::PrivateLinkage, Init, ".init");
415     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
416     RValue InitRVal;
417     switch (CGF.getEvaluationKind(Ty)) {
418     case TEK_Scalar:
419       InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
420       break;
421     case TEK_Complex:
422       InitRVal =
423           RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
424       break;
425     case TEK_Aggregate:
426       InitRVal = RValue::getAggregate(LV.getAddress());
427       break;
428     }
429     OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
430     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
431     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
432                          /*IsInitializer=*/false);
433   }
434 }
435 
436 /// \brief Emit initialization of arrays of complex types.
437 /// \param DestAddr Address of the array.
438 /// \param Type Type of array.
439 /// \param Init Initial expression of array.
440 /// \param SrcAddr Address of the original array.
441 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
442                                  QualType Type, const Expr *Init,
443                                  Address SrcAddr = Address::invalid()) {
444   auto *DRD = getReductionInit(Init);
445   // Perform element-by-element initialization.
446   QualType ElementTy;
447 
448   // Drill down to the base element type on both arrays.
449   auto ArrayTy = Type->getAsArrayTypeUnsafe();
450   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
451   DestAddr =
452       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
453   if (DRD)
454     SrcAddr =
455         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
456 
457   llvm::Value *SrcBegin = nullptr;
458   if (DRD)
459     SrcBegin = SrcAddr.getPointer();
460   auto DestBegin = DestAddr.getPointer();
461   // Cast from pointer to array type to pointer to single element.
462   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
463   // The basic structure here is a while-do loop.
464   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
465   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
466   auto IsEmpty =
467       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
468   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
469 
470   // Enter the loop body, making that address the current address.
471   auto EntryBB = CGF.Builder.GetInsertBlock();
472   CGF.EmitBlock(BodyBB);
473 
474   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
475 
476   llvm::PHINode *SrcElementPHI = nullptr;
477   Address SrcElementCurrent = Address::invalid();
478   if (DRD) {
479     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
480                                           "omp.arraycpy.srcElementPast");
481     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
482     SrcElementCurrent =
483         Address(SrcElementPHI,
484                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
485   }
486   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
487       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
488   DestElementPHI->addIncoming(DestBegin, EntryBB);
489   Address DestElementCurrent =
490       Address(DestElementPHI,
491               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
492 
493   // Emit copy.
494   {
495     CodeGenFunction::RunCleanupsScope InitScope(CGF);
496     if (DRD && (DRD->getInitializer() || !Init)) {
497       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
498                                        SrcElementCurrent, ElementTy);
499     } else
500       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
501                            /*IsInitializer=*/false);
502   }
503 
504   if (DRD) {
505     // Shift the address forward by one element.
506     auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
507         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
508     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
509   }
510 
511   // Shift the address forward by one element.
512   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
513       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
514   // Check whether we've reached the end.
515   auto Done =
516       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
517   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
518   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
519 
520   // Done.
521   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
522 }
523 
524 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
525                                   Address SrcAddr, const VarDecl *DestVD,
526                                   const VarDecl *SrcVD, const Expr *Copy) {
527   if (OriginalType->isArrayType()) {
528     auto *BO = dyn_cast<BinaryOperator>(Copy);
529     if (BO && BO->getOpcode() == BO_Assign) {
530       // Perform simple memcpy for simple copying.
531       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
532     } else {
533       // For arrays with complex element types perform element by element
534       // copying.
535       EmitOMPAggregateAssign(
536           DestAddr, SrcAddr, OriginalType,
537           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
538             // Working with the single array element, so have to remap
539             // destination and source variables to corresponding array
540             // elements.
541             CodeGenFunction::OMPPrivateScope Remap(*this);
542             Remap.addPrivate(DestVD, [DestElement]() -> Address {
543               return DestElement;
544             });
545             Remap.addPrivate(
546                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
547             (void)Remap.Privatize();
548             EmitIgnoredExpr(Copy);
549           });
550     }
551   } else {
552     // Remap pseudo source variable to private copy.
553     CodeGenFunction::OMPPrivateScope Remap(*this);
554     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
555     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
556     (void)Remap.Privatize();
557     // Emit copying of the whole variable.
558     EmitIgnoredExpr(Copy);
559   }
560 }
561 
562 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
563                                                 OMPPrivateScope &PrivateScope) {
564   if (!HaveInsertPoint())
565     return false;
566   bool FirstprivateIsLastprivate = false;
567   llvm::DenseSet<const VarDecl *> Lastprivates;
568   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
569     for (const auto *D : C->varlists())
570       Lastprivates.insert(
571           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
572   }
573   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
574   CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
575   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
576     auto IRef = C->varlist_begin();
577     auto InitsRef = C->inits().begin();
578     for (auto IInit : C->private_copies()) {
579       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
580       bool ThisFirstprivateIsLastprivate =
581           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
582       auto *CapFD = CapturesInfo.lookup(OrigVD);
583       auto *FD = CapturedStmtInfo->lookup(OrigVD);
584       if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
585           !FD->getType()->isReferenceType()) {
586         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
587         ++IRef;
588         ++InitsRef;
589         continue;
590       }
591       FirstprivateIsLastprivate =
592           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
593       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
594         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
595         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
596         bool IsRegistered;
597         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
598                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
599                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
600         Address OriginalAddr = EmitLValue(&DRE).getAddress();
601         QualType Type = VD->getType();
602         if (Type->isArrayType()) {
603           // Emit VarDecl with copy init for arrays.
604           // Get the address of the original variable captured in current
605           // captured region.
606           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
607             auto Emission = EmitAutoVarAlloca(*VD);
608             auto *Init = VD->getInit();
609             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
610               // Perform simple memcpy.
611               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
612                                   Type);
613             } else {
614               EmitOMPAggregateAssign(
615                   Emission.getAllocatedAddress(), OriginalAddr, Type,
616                   [this, VDInit, Init](Address DestElement,
617                                        Address SrcElement) {
618                     // Clean up any temporaries needed by the initialization.
619                     RunCleanupsScope InitScope(*this);
620                     // Emit initialization for single element.
621                     setAddrOfLocalVar(VDInit, SrcElement);
622                     EmitAnyExprToMem(Init, DestElement,
623                                      Init->getType().getQualifiers(),
624                                      /*IsInitializer*/ false);
625                     LocalDeclMap.erase(VDInit);
626                   });
627             }
628             EmitAutoVarCleanups(Emission);
629             return Emission.getAllocatedAddress();
630           });
631         } else {
632           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
633             // Emit private VarDecl with copy init.
634             // Remap temp VDInit variable to the address of the original
635             // variable
636             // (for proper handling of captured global variables).
637             setAddrOfLocalVar(VDInit, OriginalAddr);
638             EmitDecl(*VD);
639             LocalDeclMap.erase(VDInit);
640             return GetAddrOfLocalVar(VD);
641           });
642         }
643         assert(IsRegistered &&
644                "firstprivate var already registered as private");
645         // Silence the warning about unused variable.
646         (void)IsRegistered;
647       }
648       ++IRef;
649       ++InitsRef;
650     }
651   }
652   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
653 }
654 
655 void CodeGenFunction::EmitOMPPrivateClause(
656     const OMPExecutableDirective &D,
657     CodeGenFunction::OMPPrivateScope &PrivateScope) {
658   if (!HaveInsertPoint())
659     return;
660   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
661   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
662     auto IRef = C->varlist_begin();
663     for (auto IInit : C->private_copies()) {
664       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
665       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
666         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
667         bool IsRegistered =
668             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
669               // Emit private VarDecl with copy init.
670               EmitDecl(*VD);
671               return GetAddrOfLocalVar(VD);
672             });
673         assert(IsRegistered && "private var already registered as private");
674         // Silence the warning about unused variable.
675         (void)IsRegistered;
676       }
677       ++IRef;
678     }
679   }
680 }
681 
682 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
683   if (!HaveInsertPoint())
684     return false;
685   // threadprivate_var1 = master_threadprivate_var1;
686   // operator=(threadprivate_var2, master_threadprivate_var2);
687   // ...
688   // __kmpc_barrier(&loc, global_tid);
689   llvm::DenseSet<const VarDecl *> CopiedVars;
690   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
691   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
692     auto IRef = C->varlist_begin();
693     auto ISrcRef = C->source_exprs().begin();
694     auto IDestRef = C->destination_exprs().begin();
695     for (auto *AssignOp : C->assignment_ops()) {
696       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
697       QualType Type = VD->getType();
698       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
699         // Get the address of the master variable. If we are emitting code with
700         // TLS support, the address is passed from the master as field in the
701         // captured declaration.
702         Address MasterAddr = Address::invalid();
703         if (getLangOpts().OpenMPUseTLS &&
704             getContext().getTargetInfo().isTLSSupported()) {
705           assert(CapturedStmtInfo->lookup(VD) &&
706                  "Copyin threadprivates should have been captured!");
707           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
708                           VK_LValue, (*IRef)->getExprLoc());
709           MasterAddr = EmitLValue(&DRE).getAddress();
710           LocalDeclMap.erase(VD);
711         } else {
712           MasterAddr =
713             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
714                                         : CGM.GetAddrOfGlobal(VD),
715                     getContext().getDeclAlign(VD));
716         }
717         // Get the address of the threadprivate variable.
718         Address PrivateAddr = EmitLValue(*IRef).getAddress();
719         if (CopiedVars.size() == 1) {
720           // At first check if current thread is a master thread. If it is, no
721           // need to copy data.
722           CopyBegin = createBasicBlock("copyin.not.master");
723           CopyEnd = createBasicBlock("copyin.not.master.end");
724           Builder.CreateCondBr(
725               Builder.CreateICmpNE(
726                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
727                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
728               CopyBegin, CopyEnd);
729           EmitBlock(CopyBegin);
730         }
731         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
732         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
733         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
734       }
735       ++IRef;
736       ++ISrcRef;
737       ++IDestRef;
738     }
739   }
740   if (CopyEnd) {
741     // Exit out of copying procedure for non-master thread.
742     EmitBlock(CopyEnd, /*IsFinished=*/true);
743     return true;
744   }
745   return false;
746 }
747 
748 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
749     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
750   if (!HaveInsertPoint())
751     return false;
752   bool HasAtLeastOneLastprivate = false;
753   llvm::DenseSet<const VarDecl *> SIMDLCVs;
754   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
755     auto *LoopDirective = cast<OMPLoopDirective>(&D);
756     for (auto *C : LoopDirective->counters()) {
757       SIMDLCVs.insert(
758           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
759     }
760   }
761   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
762   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
763     HasAtLeastOneLastprivate = true;
764     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
765       break;
766     auto IRef = C->varlist_begin();
767     auto IDestRef = C->destination_exprs().begin();
768     for (auto *IInit : C->private_copies()) {
769       // Keep the address of the original variable for future update at the end
770       // of the loop.
771       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
772       // Taskloops do not require additional initialization, it is done in
773       // runtime support library.
774       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
775         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
776         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
777           DeclRefExpr DRE(
778               const_cast<VarDecl *>(OrigVD),
779               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
780                   OrigVD) != nullptr,
781               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
782           return EmitLValue(&DRE).getAddress();
783         });
784         // Check if the variable is also a firstprivate: in this case IInit is
785         // not generated. Initialization of this variable will happen in codegen
786         // for 'firstprivate' clause.
787         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
788           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
789           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
790             // Emit private VarDecl with copy init.
791             EmitDecl(*VD);
792             return GetAddrOfLocalVar(VD);
793           });
794           assert(IsRegistered &&
795                  "lastprivate var already registered as private");
796           (void)IsRegistered;
797         }
798       }
799       ++IRef;
800       ++IDestRef;
801     }
802   }
803   return HasAtLeastOneLastprivate;
804 }
805 
806 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
807     const OMPExecutableDirective &D, bool NoFinals,
808     llvm::Value *IsLastIterCond) {
809   if (!HaveInsertPoint())
810     return;
811   // Emit following code:
812   // if (<IsLastIterCond>) {
813   //   orig_var1 = private_orig_var1;
814   //   ...
815   //   orig_varn = private_orig_varn;
816   // }
817   llvm::BasicBlock *ThenBB = nullptr;
818   llvm::BasicBlock *DoneBB = nullptr;
819   if (IsLastIterCond) {
820     ThenBB = createBasicBlock(".omp.lastprivate.then");
821     DoneBB = createBasicBlock(".omp.lastprivate.done");
822     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
823     EmitBlock(ThenBB);
824   }
825   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
826   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
827   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
828     auto IC = LoopDirective->counters().begin();
829     for (auto F : LoopDirective->finals()) {
830       auto *D =
831           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
832       if (NoFinals)
833         AlreadyEmittedVars.insert(D);
834       else
835         LoopCountersAndUpdates[D] = F;
836       ++IC;
837     }
838   }
839   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
840     auto IRef = C->varlist_begin();
841     auto ISrcRef = C->source_exprs().begin();
842     auto IDestRef = C->destination_exprs().begin();
843     for (auto *AssignOp : C->assignment_ops()) {
844       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
845       QualType Type = PrivateVD->getType();
846       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
847       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
848         // If lastprivate variable is a loop control variable for loop-based
849         // directive, update its value before copyin back to original
850         // variable.
851         if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
852           EmitIgnoredExpr(FinalExpr);
853         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
854         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
855         // Get the address of the original variable.
856         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
857         // Get the address of the private variable.
858         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
859         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
860           PrivateAddr =
861               Address(Builder.CreateLoad(PrivateAddr),
862                       getNaturalTypeAlignment(RefTy->getPointeeType()));
863         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
864       }
865       ++IRef;
866       ++ISrcRef;
867       ++IDestRef;
868     }
869     if (auto *PostUpdate = C->getPostUpdateExpr())
870       EmitIgnoredExpr(PostUpdate);
871   }
872   if (IsLastIterCond)
873     EmitBlock(DoneBB, /*IsFinished=*/true);
874 }
875 
876 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
877                           LValue BaseLV, llvm::Value *Addr) {
878   Address Tmp = Address::invalid();
879   Address TopTmp = Address::invalid();
880   Address MostTopTmp = Address::invalid();
881   BaseTy = BaseTy.getNonReferenceType();
882   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
883          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
884     Tmp = CGF.CreateMemTemp(BaseTy);
885     if (TopTmp.isValid())
886       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
887     else
888       MostTopTmp = Tmp;
889     TopTmp = Tmp;
890     BaseTy = BaseTy->getPointeeType();
891   }
892   llvm::Type *Ty = BaseLV.getPointer()->getType();
893   if (Tmp.isValid())
894     Ty = Tmp.getElementType();
895   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
896   if (Tmp.isValid()) {
897     CGF.Builder.CreateStore(Addr, Tmp);
898     return MostTopTmp;
899   }
900   return Address(Addr, BaseLV.getAlignment());
901 }
902 
903 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
904                           LValue BaseLV) {
905   BaseTy = BaseTy.getNonReferenceType();
906   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
907          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
908     if (auto *PtrTy = BaseTy->getAs<PointerType>())
909       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
910     else {
911       BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
912                                              BaseTy->castAs<ReferenceType>());
913     }
914     BaseTy = BaseTy->getPointeeType();
915   }
916   return CGF.MakeAddrLValue(
917       Address(
918           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
919               BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
920           BaseLV.getAlignment()),
921       BaseLV.getType(), BaseLV.getAlignmentSource());
922 }
923 
924 void CodeGenFunction::EmitOMPReductionClauseInit(
925     const OMPExecutableDirective &D,
926     CodeGenFunction::OMPPrivateScope &PrivateScope) {
927   if (!HaveInsertPoint())
928     return;
929   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
930     auto ILHS = C->lhs_exprs().begin();
931     auto IRHS = C->rhs_exprs().begin();
932     auto IPriv = C->privates().begin();
933     auto IRed = C->reduction_ops().begin();
934     for (auto IRef : C->varlists()) {
935       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
936       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
937       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
938       auto *DRD = getReductionInit(*IRed);
939       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
940         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
941         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
942           Base = TempOASE->getBase()->IgnoreParenImpCasts();
943         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
944           Base = TempASE->getBase()->IgnoreParenImpCasts();
945         auto *DE = cast<DeclRefExpr>(Base);
946         auto *OrigVD = cast<VarDecl>(DE->getDecl());
947         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
948         auto OASELValueUB =
949             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
950         auto OriginalBaseLValue = EmitLValue(DE);
951         LValue BaseLValue =
952             loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
953                         OriginalBaseLValue);
954         // Store the address of the original variable associated with the LHS
955         // implicit variable.
956         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
957           return OASELValueLB.getAddress();
958         });
959         // Emit reduction copy.
960         bool IsRegistered = PrivateScope.addPrivate(
961             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
962                      OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
963               // Emit VarDecl with copy init for arrays.
964               // Get the address of the original variable captured in current
965               // captured region.
966               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
967                                                  OASELValueLB.getPointer());
968               Size = Builder.CreateNUWAdd(
969                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
970               CodeGenFunction::OpaqueValueMapping OpaqueMap(
971                   *this, cast<OpaqueValueExpr>(
972                              getContext()
973                                  .getAsVariableArrayType(PrivateVD->getType())
974                                  ->getSizeExpr()),
975                   RValue::get(Size));
976               EmitVariablyModifiedType(PrivateVD->getType());
977               auto Emission = EmitAutoVarAlloca(*PrivateVD);
978               auto Addr = Emission.getAllocatedAddress();
979               auto *Init = PrivateVD->getInit();
980               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
981                                    DRD ? *IRed : Init,
982                                    OASELValueLB.getAddress());
983               EmitAutoVarCleanups(Emission);
984               // Emit private VarDecl with reduction init.
985               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
986                                                    OASELValueLB.getPointer());
987               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
988               return castToBase(*this, OrigVD->getType(),
989                                 OASELValueLB.getType(), OriginalBaseLValue,
990                                 Ptr);
991             });
992         assert(IsRegistered && "private var already registered as private");
993         // Silence the warning about unused variable.
994         (void)IsRegistered;
995         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
996           return GetAddrOfLocalVar(PrivateVD);
997         });
998       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
999         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1000         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1001           Base = TempASE->getBase()->IgnoreParenImpCasts();
1002         auto *DE = cast<DeclRefExpr>(Base);
1003         auto *OrigVD = cast<VarDecl>(DE->getDecl());
1004         auto ASELValue = EmitLValue(ASE);
1005         auto OriginalBaseLValue = EmitLValue(DE);
1006         LValue BaseLValue = loadToBegin(
1007             *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
1008         // Store the address of the original variable associated with the LHS
1009         // implicit variable.
1010         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
1011           return ASELValue.getAddress();
1012         });
1013         // Emit reduction copy.
1014         bool IsRegistered = PrivateScope.addPrivate(
1015             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
1016                      OriginalBaseLValue, DRD, IRed]() -> Address {
1017               // Emit private VarDecl with reduction init.
1018               AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1019               auto Addr = Emission.getAllocatedAddress();
1020               if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1021                 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1022                                                  ASELValue.getAddress(),
1023                                                  ASELValue.getType());
1024               } else
1025                 EmitAutoVarInit(Emission);
1026               EmitAutoVarCleanups(Emission);
1027               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1028                                                    ASELValue.getPointer());
1029               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1030               return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
1031                                 OriginalBaseLValue, Ptr);
1032             });
1033         assert(IsRegistered && "private var already registered as private");
1034         // Silence the warning about unused variable.
1035         (void)IsRegistered;
1036         PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1037           return Builder.CreateElementBitCast(
1038               GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
1039               "rhs.begin");
1040         });
1041       } else {
1042         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
1043         QualType Type = PrivateVD->getType();
1044         if (getContext().getAsArrayType(Type)) {
1045           // Store the address of the original variable associated with the LHS
1046           // implicit variable.
1047           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1048                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
1049                           IRef->getType(), VK_LValue, IRef->getExprLoc());
1050           Address OriginalAddr = EmitLValue(&DRE).getAddress();
1051           PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
1052                                           LHSVD]() -> Address {
1053             OriginalAddr = Builder.CreateElementBitCast(
1054                 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1055             return OriginalAddr;
1056           });
1057           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
1058             if (Type->isVariablyModifiedType()) {
1059               CodeGenFunction::OpaqueValueMapping OpaqueMap(
1060                   *this, cast<OpaqueValueExpr>(
1061                              getContext()
1062                                  .getAsVariableArrayType(PrivateVD->getType())
1063                                  ->getSizeExpr()),
1064                   RValue::get(
1065                       getTypeSize(OrigVD->getType().getNonReferenceType())));
1066               EmitVariablyModifiedType(Type);
1067             }
1068             auto Emission = EmitAutoVarAlloca(*PrivateVD);
1069             auto Addr = Emission.getAllocatedAddress();
1070             auto *Init = PrivateVD->getInit();
1071             EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1072                                  DRD ? *IRed : Init, OriginalAddr);
1073             EmitAutoVarCleanups(Emission);
1074             return Emission.getAllocatedAddress();
1075           });
1076           assert(IsRegistered && "private var already registered as private");
1077           // Silence the warning about unused variable.
1078           (void)IsRegistered;
1079           PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1080             return Builder.CreateElementBitCast(
1081                 GetAddrOfLocalVar(PrivateVD),
1082                 ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
1083           });
1084         } else {
1085           // Store the address of the original variable associated with the LHS
1086           // implicit variable.
1087           Address OriginalAddr = Address::invalid();
1088           PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
1089                                           &OriginalAddr]() -> Address {
1090             DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1091                             CapturedStmtInfo->lookup(OrigVD) != nullptr,
1092                             IRef->getType(), VK_LValue, IRef->getExprLoc());
1093             OriginalAddr = EmitLValue(&DRE).getAddress();
1094             return OriginalAddr;
1095           });
1096           // Emit reduction copy.
1097           bool IsRegistered = PrivateScope.addPrivate(
1098               OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
1099                 // Emit private VarDecl with reduction init.
1100                 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1101                 auto Addr = Emission.getAllocatedAddress();
1102                 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1103                   emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1104                                                    OriginalAddr,
1105                                                    PrivateVD->getType());
1106                 } else
1107                   EmitAutoVarInit(Emission);
1108                 EmitAutoVarCleanups(Emission);
1109                 return Addr;
1110               });
1111           assert(IsRegistered && "private var already registered as private");
1112           // Silence the warning about unused variable.
1113           (void)IsRegistered;
1114           PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1115             return GetAddrOfLocalVar(PrivateVD);
1116           });
1117         }
1118       }
1119       ++ILHS;
1120       ++IRHS;
1121       ++IPriv;
1122       ++IRed;
1123     }
1124   }
1125 }
1126 
1127 void CodeGenFunction::EmitOMPReductionClauseFinal(
1128     const OMPExecutableDirective &D) {
1129   if (!HaveInsertPoint())
1130     return;
1131   llvm::SmallVector<const Expr *, 8> Privates;
1132   llvm::SmallVector<const Expr *, 8> LHSExprs;
1133   llvm::SmallVector<const Expr *, 8> RHSExprs;
1134   llvm::SmallVector<const Expr *, 8> ReductionOps;
1135   bool HasAtLeastOneReduction = false;
1136   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1137     HasAtLeastOneReduction = true;
1138     Privates.append(C->privates().begin(), C->privates().end());
1139     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1140     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1141     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1142   }
1143   if (HasAtLeastOneReduction) {
1144     // Emit nowait reduction if nowait clause is present or directive is a
1145     // parallel directive (it always has implicit barrier).
1146     CGM.getOpenMPRuntime().emitReduction(
1147         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1148         D.getSingleClause<OMPNowaitClause>() ||
1149             isOpenMPParallelDirective(D.getDirectiveKind()) ||
1150             D.getDirectiveKind() == OMPD_simd,
1151         D.getDirectiveKind() == OMPD_simd);
1152   }
1153 }
1154 
1155 static void emitPostUpdateForReductionClause(
1156     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1157     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1158   if (!CGF.HaveInsertPoint())
1159     return;
1160   llvm::BasicBlock *DoneBB = nullptr;
1161   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1162     if (auto *PostUpdate = C->getPostUpdateExpr()) {
1163       if (!DoneBB) {
1164         if (auto *Cond = CondGen(CGF)) {
1165           // If the first post-update expression is found, emit conditional
1166           // block if it was requested.
1167           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1168           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1169           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1170           CGF.EmitBlock(ThenBB);
1171         }
1172       }
1173       CGF.EmitIgnoredExpr(PostUpdate);
1174     }
1175   }
1176   if (DoneBB)
1177     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1178 }
1179 
1180 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
1181                                            const OMPExecutableDirective &S,
1182                                            OpenMPDirectiveKind InnermostKind,
1183                                            const RegionCodeGenTy &CodeGen) {
1184   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1185   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
1186       emitParallelOrTeamsOutlinedFunction(S,
1187           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1188   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1189     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1190     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1191                                          /*IgnoreResultAssign*/ true);
1192     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1193         CGF, NumThreads, NumThreadsClause->getLocStart());
1194   }
1195   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1196     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1197     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1198         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1199   }
1200   const Expr *IfCond = nullptr;
1201   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1202     if (C->getNameModifier() == OMPD_unknown ||
1203         C->getNameModifier() == OMPD_parallel) {
1204       IfCond = C->getCondition();
1205       break;
1206     }
1207   }
1208 
1209   OMPLexicalScope Scope(CGF, S);
1210   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1211   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1212   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1213                                               CapturedVars, IfCond);
1214 }
1215 
1216 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1217   // Emit parallel region as a standalone region.
1218   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1219     OMPPrivateScope PrivateScope(CGF);
1220     bool Copyins = CGF.EmitOMPCopyinClause(S);
1221     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1222     if (Copyins) {
1223       // Emit implicit barrier to synchronize threads and avoid data races on
1224       // propagation master's thread values of threadprivate variables to local
1225       // instances of that variables of all other implicit threads.
1226       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1227           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1228           /*ForceSimpleCall=*/true);
1229     }
1230     CGF.EmitOMPPrivateClause(S, PrivateScope);
1231     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1232     (void)PrivateScope.Privatize();
1233     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1234     CGF.EmitOMPReductionClauseFinal(S);
1235   };
1236   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
1237   emitPostUpdateForReductionClause(
1238       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1239 }
1240 
1241 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1242                                       JumpDest LoopExit) {
1243   RunCleanupsScope BodyScope(*this);
1244   // Update counters values on current iteration.
1245   for (auto I : D.updates()) {
1246     EmitIgnoredExpr(I);
1247   }
1248   // Update the linear variables.
1249   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1250     for (auto *U : C->updates())
1251       EmitIgnoredExpr(U);
1252   }
1253 
1254   // On a continue in the body, jump to the end.
1255   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1256   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1257   // Emit loop body.
1258   EmitStmt(D.getBody());
1259   // The end (updates/cleanups).
1260   EmitBlock(Continue.getBlock());
1261   BreakContinueStack.pop_back();
1262 }
1263 
1264 void CodeGenFunction::EmitOMPInnerLoop(
1265     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1266     const Expr *IncExpr,
1267     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1268     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1269   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1270 
1271   // Start the loop with a block that tests the condition.
1272   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1273   EmitBlock(CondBlock);
1274   LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
1275 
1276   // If there are any cleanups between here and the loop-exit scope,
1277   // create a block to stage a loop exit along.
1278   auto ExitBlock = LoopExit.getBlock();
1279   if (RequiresCleanup)
1280     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1281 
1282   auto LoopBody = createBasicBlock("omp.inner.for.body");
1283 
1284   // Emit condition.
1285   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1286   if (ExitBlock != LoopExit.getBlock()) {
1287     EmitBlock(ExitBlock);
1288     EmitBranchThroughCleanup(LoopExit);
1289   }
1290 
1291   EmitBlock(LoopBody);
1292   incrementProfileCounter(&S);
1293 
1294   // Create a block for the increment.
1295   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1296   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1297 
1298   BodyGen(*this);
1299 
1300   // Emit "IV = IV + 1" and a back-edge to the condition block.
1301   EmitBlock(Continue.getBlock());
1302   EmitIgnoredExpr(IncExpr);
1303   PostIncGen(*this);
1304   BreakContinueStack.pop_back();
1305   EmitBranch(CondBlock);
1306   LoopStack.pop();
1307   // Emit the fall-through block.
1308   EmitBlock(LoopExit.getBlock());
1309 }
1310 
1311 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1312   if (!HaveInsertPoint())
1313     return;
1314   // Emit inits for the linear variables.
1315   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1316     for (auto *Init : C->inits()) {
1317       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1318       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1319         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1320         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1321         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1322                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1323                         VD->getInit()->getType(), VK_LValue,
1324                         VD->getInit()->getExprLoc());
1325         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1326                                                 VD->getType()),
1327                        /*capturedByInit=*/false);
1328         EmitAutoVarCleanups(Emission);
1329       } else
1330         EmitVarDecl(*VD);
1331     }
1332     // Emit the linear steps for the linear clauses.
1333     // If a step is not constant, it is pre-calculated before the loop.
1334     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1335       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1336         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1337         // Emit calculation of the linear step.
1338         EmitIgnoredExpr(CS);
1339       }
1340   }
1341 }
1342 
1343 void CodeGenFunction::EmitOMPLinearClauseFinal(
1344     const OMPLoopDirective &D,
1345     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1346   if (!HaveInsertPoint())
1347     return;
1348   llvm::BasicBlock *DoneBB = nullptr;
1349   // Emit the final values of the linear variables.
1350   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1351     auto IC = C->varlist_begin();
1352     for (auto *F : C->finals()) {
1353       if (!DoneBB) {
1354         if (auto *Cond = CondGen(*this)) {
1355           // If the first post-update expression is found, emit conditional
1356           // block if it was requested.
1357           auto *ThenBB = createBasicBlock(".omp.linear.pu");
1358           DoneBB = createBasicBlock(".omp.linear.pu.done");
1359           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1360           EmitBlock(ThenBB);
1361         }
1362       }
1363       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1364       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1365                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1366                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1367       Address OrigAddr = EmitLValue(&DRE).getAddress();
1368       CodeGenFunction::OMPPrivateScope VarScope(*this);
1369       VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1370       (void)VarScope.Privatize();
1371       EmitIgnoredExpr(F);
1372       ++IC;
1373     }
1374     if (auto *PostUpdate = C->getPostUpdateExpr())
1375       EmitIgnoredExpr(PostUpdate);
1376   }
1377   if (DoneBB)
1378     EmitBlock(DoneBB, /*IsFinished=*/true);
1379 }
1380 
1381 static void emitAlignedClause(CodeGenFunction &CGF,
1382                               const OMPExecutableDirective &D) {
1383   if (!CGF.HaveInsertPoint())
1384     return;
1385   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1386     unsigned ClauseAlignment = 0;
1387     if (auto AlignmentExpr = Clause->getAlignment()) {
1388       auto AlignmentCI =
1389           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1390       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1391     }
1392     for (auto E : Clause->varlists()) {
1393       unsigned Alignment = ClauseAlignment;
1394       if (Alignment == 0) {
1395         // OpenMP [2.8.1, Description]
1396         // If no optional parameter is specified, implementation-defined default
1397         // alignments for SIMD instructions on the target platforms are assumed.
1398         Alignment =
1399             CGF.getContext()
1400                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1401                     E->getType()->getPointeeType()))
1402                 .getQuantity();
1403       }
1404       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1405              "alignment is not power of 2");
1406       if (Alignment != 0) {
1407         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1408         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1409       }
1410     }
1411   }
1412 }
1413 
1414 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1415     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1416   if (!HaveInsertPoint())
1417     return;
1418   auto I = S.private_counters().begin();
1419   for (auto *E : S.counters()) {
1420     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1421     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1422     (void)LoopScope.addPrivate(VD, [&]() -> Address {
1423       // Emit var without initialization.
1424       if (!LocalDeclMap.count(PrivateVD)) {
1425         auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1426         EmitAutoVarCleanups(VarEmission);
1427       }
1428       DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1429                       /*RefersToEnclosingVariableOrCapture=*/false,
1430                       (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1431       return EmitLValue(&DRE).getAddress();
1432     });
1433     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1434         VD->hasGlobalStorage()) {
1435       (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1436         DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1437                         LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1438                         E->getType(), VK_LValue, E->getExprLoc());
1439         return EmitLValue(&DRE).getAddress();
1440       });
1441     }
1442     ++I;
1443   }
1444 }
1445 
1446 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1447                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1448                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1449   if (!CGF.HaveInsertPoint())
1450     return;
1451   {
1452     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1453     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1454     (void)PreCondScope.Privatize();
1455     // Get initial values of real counters.
1456     for (auto I : S.inits()) {
1457       CGF.EmitIgnoredExpr(I);
1458     }
1459   }
1460   // Check that loop is executed at least one time.
1461   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1462 }
1463 
1464 void CodeGenFunction::EmitOMPLinearClause(
1465     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1466   if (!HaveInsertPoint())
1467     return;
1468   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1469   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1470     auto *LoopDirective = cast<OMPLoopDirective>(&D);
1471     for (auto *C : LoopDirective->counters()) {
1472       SIMDLCVs.insert(
1473           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1474     }
1475   }
1476   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1477     auto CurPrivate = C->privates().begin();
1478     for (auto *E : C->varlists()) {
1479       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1480       auto *PrivateVD =
1481           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1482       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1483         bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1484           // Emit private VarDecl with copy init.
1485           EmitVarDecl(*PrivateVD);
1486           return GetAddrOfLocalVar(PrivateVD);
1487         });
1488         assert(IsRegistered && "linear var already registered as private");
1489         // Silence the warning about unused variable.
1490         (void)IsRegistered;
1491       } else
1492         EmitVarDecl(*PrivateVD);
1493       ++CurPrivate;
1494     }
1495   }
1496 }
1497 
1498 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1499                                      const OMPExecutableDirective &D,
1500                                      bool IsMonotonic) {
1501   if (!CGF.HaveInsertPoint())
1502     return;
1503   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1504     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1505                                  /*ignoreResult=*/true);
1506     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1507     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1508     // In presence of finite 'safelen', it may be unsafe to mark all
1509     // the memory instructions parallel, because loop-carried
1510     // dependences of 'safelen' iterations are possible.
1511     if (!IsMonotonic)
1512       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1513   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1514     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1515                                  /*ignoreResult=*/true);
1516     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1517     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1518     // In presence of finite 'safelen', it may be unsafe to mark all
1519     // the memory instructions parallel, because loop-carried
1520     // dependences of 'safelen' iterations are possible.
1521     CGF.LoopStack.setParallel(false);
1522   }
1523 }
1524 
1525 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1526                                       bool IsMonotonic) {
1527   // Walk clauses and process safelen/lastprivate.
1528   LoopStack.setParallel(!IsMonotonic);
1529   LoopStack.setVectorizeEnable(true);
1530   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1531 }
1532 
1533 void CodeGenFunction::EmitOMPSimdFinal(
1534     const OMPLoopDirective &D,
1535     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1536   if (!HaveInsertPoint())
1537     return;
1538   llvm::BasicBlock *DoneBB = nullptr;
1539   auto IC = D.counters().begin();
1540   auto IPC = D.private_counters().begin();
1541   for (auto F : D.finals()) {
1542     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1543     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1544     auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1545     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1546         OrigVD->hasGlobalStorage() || CED) {
1547       if (!DoneBB) {
1548         if (auto *Cond = CondGen(*this)) {
1549           // If the first post-update expression is found, emit conditional
1550           // block if it was requested.
1551           auto *ThenBB = createBasicBlock(".omp.final.then");
1552           DoneBB = createBasicBlock(".omp.final.done");
1553           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1554           EmitBlock(ThenBB);
1555         }
1556       }
1557       Address OrigAddr = Address::invalid();
1558       if (CED)
1559         OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1560       else {
1561         DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1562                         /*RefersToEnclosingVariableOrCapture=*/false,
1563                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1564         OrigAddr = EmitLValue(&DRE).getAddress();
1565       }
1566       OMPPrivateScope VarScope(*this);
1567       VarScope.addPrivate(OrigVD,
1568                           [OrigAddr]() -> Address { return OrigAddr; });
1569       (void)VarScope.Privatize();
1570       EmitIgnoredExpr(F);
1571     }
1572     ++IC;
1573     ++IPC;
1574   }
1575   if (DoneBB)
1576     EmitBlock(DoneBB, /*IsFinished=*/true);
1577 }
1578 
1579 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1580   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1581     OMPLoopScope PreInitScope(CGF, S);
1582     // if (PreCond) {
1583     //   for (IV in 0..LastIteration) BODY;
1584     //   <Final counter/linear vars updates>;
1585     // }
1586     //
1587 
1588     // Emit: if (PreCond) - begin.
1589     // If the condition constant folds and can be elided, avoid emitting the
1590     // whole loop.
1591     bool CondConstant;
1592     llvm::BasicBlock *ContBlock = nullptr;
1593     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1594       if (!CondConstant)
1595         return;
1596     } else {
1597       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1598       ContBlock = CGF.createBasicBlock("simd.if.end");
1599       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1600                   CGF.getProfileCount(&S));
1601       CGF.EmitBlock(ThenBlock);
1602       CGF.incrementProfileCounter(&S);
1603     }
1604 
1605     // Emit the loop iteration variable.
1606     const Expr *IVExpr = S.getIterationVariable();
1607     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1608     CGF.EmitVarDecl(*IVDecl);
1609     CGF.EmitIgnoredExpr(S.getInit());
1610 
1611     // Emit the iterations count variable.
1612     // If it is not a variable, Sema decided to calculate iterations count on
1613     // each iteration (e.g., it is foldable into a constant).
1614     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1615       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1616       // Emit calculation of the iterations count.
1617       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1618     }
1619 
1620     CGF.EmitOMPSimdInit(S);
1621 
1622     emitAlignedClause(CGF, S);
1623     CGF.EmitOMPLinearClauseInit(S);
1624     {
1625       OMPPrivateScope LoopScope(CGF);
1626       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1627       CGF.EmitOMPLinearClause(S, LoopScope);
1628       CGF.EmitOMPPrivateClause(S, LoopScope);
1629       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1630       bool HasLastprivateClause =
1631           CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1632       (void)LoopScope.Privatize();
1633       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1634                            S.getInc(),
1635                            [&S](CodeGenFunction &CGF) {
1636                              CGF.EmitOMPLoopBody(S, JumpDest());
1637                              CGF.EmitStopPoint(&S);
1638                            },
1639                            [](CodeGenFunction &) {});
1640       CGF.EmitOMPSimdFinal(
1641           S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1642       // Emit final copy of the lastprivate variables at the end of loops.
1643       if (HasLastprivateClause)
1644         CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1645       CGF.EmitOMPReductionClauseFinal(S);
1646       emitPostUpdateForReductionClause(
1647           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1648     }
1649     CGF.EmitOMPLinearClauseFinal(
1650         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1651     // Emit: if (PreCond) - end.
1652     if (ContBlock) {
1653       CGF.EmitBranch(ContBlock);
1654       CGF.EmitBlock(ContBlock, true);
1655     }
1656   };
1657   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1658   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1659 }
1660 
1661 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
1662     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1663     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1664   auto &RT = CGM.getOpenMPRuntime();
1665 
1666   const Expr *IVExpr = S.getIterationVariable();
1667   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1668   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1669 
1670   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1671 
1672   // Start the loop with a block that tests the condition.
1673   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1674   EmitBlock(CondBlock);
1675   LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
1676 
1677   llvm::Value *BoolCondVal = nullptr;
1678   if (!DynamicOrOrdered) {
1679     // UB = min(UB, GlobalUB)
1680     EmitIgnoredExpr(S.getEnsureUpperBound());
1681     // IV = LB
1682     EmitIgnoredExpr(S.getInit());
1683     // IV < UB
1684     BoolCondVal = EvaluateExprAsBool(S.getCond());
1685   } else {
1686     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
1687                                  LB, UB, ST);
1688   }
1689 
1690   // If there are any cleanups between here and the loop-exit scope,
1691   // create a block to stage a loop exit along.
1692   auto ExitBlock = LoopExit.getBlock();
1693   if (LoopScope.requiresCleanups())
1694     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1695 
1696   auto LoopBody = createBasicBlock("omp.dispatch.body");
1697   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1698   if (ExitBlock != LoopExit.getBlock()) {
1699     EmitBlock(ExitBlock);
1700     EmitBranchThroughCleanup(LoopExit);
1701   }
1702   EmitBlock(LoopBody);
1703 
1704   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1705   // LB for loop condition and emitted it above).
1706   if (DynamicOrOrdered)
1707     EmitIgnoredExpr(S.getInit());
1708 
1709   // Create a block for the increment.
1710   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1711   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1712 
1713   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1714   // with dynamic/guided scheduling and without ordered clause.
1715   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1716     LoopStack.setParallel(!IsMonotonic);
1717   else
1718     EmitOMPSimdInit(S, IsMonotonic);
1719 
1720   SourceLocation Loc = S.getLocStart();
1721   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
1722                    [&S, LoopExit](CodeGenFunction &CGF) {
1723                      CGF.EmitOMPLoopBody(S, LoopExit);
1724                      CGF.EmitStopPoint(&S);
1725                    },
1726                    [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
1727                      if (Ordered) {
1728                        CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
1729                            CGF, Loc, IVSize, IVSigned);
1730                      }
1731                    });
1732 
1733   EmitBlock(Continue.getBlock());
1734   BreakContinueStack.pop_back();
1735   if (!DynamicOrOrdered) {
1736     // Emit "LB = LB + Stride", "UB = UB + Stride".
1737     EmitIgnoredExpr(S.getNextLowerBound());
1738     EmitIgnoredExpr(S.getNextUpperBound());
1739   }
1740 
1741   EmitBranch(CondBlock);
1742   LoopStack.pop();
1743   // Emit the fall-through block.
1744   EmitBlock(LoopExit.getBlock());
1745 
1746   // Tell the runtime we are done.
1747   if (!DynamicOrOrdered)
1748     RT.emitForStaticFinish(*this, S.getLocEnd());
1749 
1750 }
1751 
1752 void CodeGenFunction::EmitOMPForOuterLoop(
1753     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1754     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1755     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1756   auto &RT = CGM.getOpenMPRuntime();
1757 
1758   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1759   const bool DynamicOrOrdered =
1760       Ordered || RT.isDynamic(ScheduleKind.Schedule);
1761 
1762   assert((Ordered ||
1763           !RT.isStaticNonchunked(ScheduleKind.Schedule,
1764                                  /*Chunked=*/Chunk != nullptr)) &&
1765          "static non-chunked schedule does not need outer loop");
1766 
1767   // Emit outer loop.
1768   //
1769   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1770   // When schedule(dynamic,chunk_size) is specified, the iterations are
1771   // distributed to threads in the team in chunks as the threads request them.
1772   // Each thread executes a chunk of iterations, then requests another chunk,
1773   // until no chunks remain to be distributed. Each chunk contains chunk_size
1774   // iterations, except for the last chunk to be distributed, which may have
1775   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1776   //
1777   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1778   // to threads in the team in chunks as the executing threads request them.
1779   // Each thread executes a chunk of iterations, then requests another chunk,
1780   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1781   // each chunk is proportional to the number of unassigned iterations divided
1782   // by the number of threads in the team, decreasing to 1. For a chunk_size
1783   // with value k (greater than 1), the size of each chunk is determined in the
1784   // same way, with the restriction that the chunks do not contain fewer than k
1785   // iterations (except for the last chunk to be assigned, which may have fewer
1786   // than k iterations).
1787   //
1788   // When schedule(auto) is specified, the decision regarding scheduling is
1789   // delegated to the compiler and/or runtime system. The programmer gives the
1790   // implementation the freedom to choose any possible mapping of iterations to
1791   // threads in the team.
1792   //
1793   // When schedule(runtime) is specified, the decision regarding scheduling is
1794   // deferred until run time, and the schedule and chunk size are taken from the
1795   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1796   // implementation defined
1797   //
1798   // while(__kmpc_dispatch_next(&LB, &UB)) {
1799   //   idx = LB;
1800   //   while (idx <= UB) { BODY; ++idx;
1801   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1802   //   } // inner loop
1803   // }
1804   //
1805   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1806   // When schedule(static, chunk_size) is specified, iterations are divided into
1807   // chunks of size chunk_size, and the chunks are assigned to the threads in
1808   // the team in a round-robin fashion in the order of the thread number.
1809   //
1810   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1811   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1812   //   LB = LB + ST;
1813   //   UB = UB + ST;
1814   // }
1815   //
1816 
1817   const Expr *IVExpr = S.getIterationVariable();
1818   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1819   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1820 
1821   if (DynamicOrOrdered) {
1822     llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
1823     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
1824                            IVSigned, Ordered, UBVal, Chunk);
1825   } else {
1826     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1827                          Ordered, IL, LB, UB, ST, Chunk);
1828   }
1829 
1830   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
1831                    ST, IL, Chunk);
1832 }
1833 
1834 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1835     OpenMPDistScheduleClauseKind ScheduleKind,
1836     const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
1837     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1838 
1839   auto &RT = CGM.getOpenMPRuntime();
1840 
1841   // Emit outer loop.
1842   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1843   // dynamic
1844   //
1845 
1846   const Expr *IVExpr = S.getIterationVariable();
1847   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1848   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1849 
1850   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
1851                               IVSize, IVSigned, /* Ordered = */ false,
1852                               IL, LB, UB, ST, Chunk);
1853 
1854   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
1855                    S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
1856 }
1857 
1858 /// \brief Emit a helper variable and return corresponding lvalue.
1859 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1860                                const DeclRefExpr *Helper) {
1861   auto VDecl = cast<VarDecl>(Helper->getDecl());
1862   CGF.EmitVarDecl(*VDecl);
1863   return CGF.EmitLValue(Helper);
1864 }
1865 
1866 namespace {
1867   struct ScheduleKindModifiersTy {
1868     OpenMPScheduleClauseKind Kind;
1869     OpenMPScheduleClauseModifier M1;
1870     OpenMPScheduleClauseModifier M2;
1871     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
1872                             OpenMPScheduleClauseModifier M1,
1873                             OpenMPScheduleClauseModifier M2)
1874         : Kind(Kind), M1(M1), M2(M2) {}
1875   };
1876 } // namespace
1877 
1878 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
1879   // Emit the loop iteration variable.
1880   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
1881   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
1882   EmitVarDecl(*IVDecl);
1883 
1884   // Emit the iterations count variable.
1885   // If it is not a variable, Sema decided to calculate iterations count on each
1886   // iteration (e.g., it is foldable into a constant).
1887   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1888     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1889     // Emit calculation of the iterations count.
1890     EmitIgnoredExpr(S.getCalcLastIteration());
1891   }
1892 
1893   auto &RT = CGM.getOpenMPRuntime();
1894 
1895   bool HasLastprivateClause;
1896   // Check pre-condition.
1897   {
1898     OMPLoopScope PreInitScope(*this, S);
1899     // Skip the entire loop if we don't meet the precondition.
1900     // If the condition constant folds and can be elided, avoid emitting the
1901     // whole loop.
1902     bool CondConstant;
1903     llvm::BasicBlock *ContBlock = nullptr;
1904     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1905       if (!CondConstant)
1906         return false;
1907     } else {
1908       auto *ThenBlock = createBasicBlock("omp.precond.then");
1909       ContBlock = createBasicBlock("omp.precond.end");
1910       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
1911                   getProfileCount(&S));
1912       EmitBlock(ThenBlock);
1913       incrementProfileCounter(&S);
1914     }
1915 
1916     bool Ordered = false;
1917     if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
1918       if (OrderedClause->getNumForLoops())
1919         RT.emitDoacrossInit(*this, S);
1920       else
1921         Ordered = true;
1922     }
1923 
1924     llvm::DenseSet<const Expr *> EmittedFinals;
1925     emitAlignedClause(*this, S);
1926     EmitOMPLinearClauseInit(S);
1927     // Emit helper vars inits.
1928     LValue LB =
1929         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1930     LValue UB =
1931         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1932     LValue ST =
1933         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
1934     LValue IL =
1935         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
1936 
1937     // Emit 'then' code.
1938     {
1939       OMPPrivateScope LoopScope(*this);
1940       if (EmitOMPFirstprivateClause(S, LoopScope)) {
1941         // Emit implicit barrier to synchronize threads and avoid data races on
1942         // initialization of firstprivate variables and post-update of
1943         // lastprivate variables.
1944         CGM.getOpenMPRuntime().emitBarrierCall(
1945             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1946             /*ForceSimpleCall=*/true);
1947       }
1948       EmitOMPPrivateClause(S, LoopScope);
1949       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
1950       EmitOMPReductionClauseInit(S, LoopScope);
1951       EmitOMPPrivateLoopCounters(S, LoopScope);
1952       EmitOMPLinearClause(S, LoopScope);
1953       (void)LoopScope.Privatize();
1954 
1955       // Detect the loop schedule kind and chunk.
1956       llvm::Value *Chunk = nullptr;
1957       OpenMPScheduleTy ScheduleKind;
1958       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
1959         ScheduleKind.Schedule = C->getScheduleKind();
1960         ScheduleKind.M1 = C->getFirstScheduleModifier();
1961         ScheduleKind.M2 = C->getSecondScheduleModifier();
1962         if (const auto *Ch = C->getChunkSize()) {
1963           Chunk = EmitScalarExpr(Ch);
1964           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
1965                                        S.getIterationVariable()->getType(),
1966                                        S.getLocStart());
1967         }
1968       }
1969       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1970       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1971       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
1972       // If the static schedule kind is specified or if the ordered clause is
1973       // specified, and if no monotonic modifier is specified, the effect will
1974       // be as if the monotonic modifier was specified.
1975       if (RT.isStaticNonchunked(ScheduleKind.Schedule,
1976                                 /* Chunked */ Chunk != nullptr) &&
1977           !Ordered) {
1978         if (isOpenMPSimdDirective(S.getDirectiveKind()))
1979           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
1980         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1981         // When no chunk_size is specified, the iteration space is divided into
1982         // chunks that are approximately equal in size, and at most one chunk is
1983         // distributed to each thread. Note that the size of the chunks is
1984         // unspecified in this case.
1985         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
1986                              IVSize, IVSigned, Ordered,
1987                              IL.getAddress(), LB.getAddress(),
1988                              UB.getAddress(), ST.getAddress());
1989         auto LoopExit =
1990             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
1991         // UB = min(UB, GlobalUB);
1992         EmitIgnoredExpr(S.getEnsureUpperBound());
1993         // IV = LB;
1994         EmitIgnoredExpr(S.getInit());
1995         // while (idx <= UB) { BODY; ++idx; }
1996         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1997                          S.getInc(),
1998                          [&S, LoopExit](CodeGenFunction &CGF) {
1999                            CGF.EmitOMPLoopBody(S, LoopExit);
2000                            CGF.EmitStopPoint(&S);
2001                          },
2002                          [](CodeGenFunction &) {});
2003         EmitBlock(LoopExit.getBlock());
2004         // Tell the runtime we are done.
2005         RT.emitForStaticFinish(*this, S.getLocStart());
2006       } else {
2007         const bool IsMonotonic =
2008             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2009             ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2010             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2011             ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2012         // Emit the outer loop, which requests its work chunk [LB..UB] from
2013         // runtime and runs the inner loop to process it.
2014         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2015                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2016                             IL.getAddress(), Chunk);
2017       }
2018       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2019         EmitOMPSimdFinal(S,
2020                          [&](CodeGenFunction &CGF) -> llvm::Value * {
2021                            return CGF.Builder.CreateIsNotNull(
2022                                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2023                          });
2024       }
2025       EmitOMPReductionClauseFinal(S);
2026       // Emit post-update of the reduction variables if IsLastIter != 0.
2027       emitPostUpdateForReductionClause(
2028           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2029             return CGF.Builder.CreateIsNotNull(
2030                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2031           });
2032       // Emit final copy of the lastprivate variables if IsLastIter != 0.
2033       if (HasLastprivateClause)
2034         EmitOMPLastprivateClauseFinal(
2035             S, isOpenMPSimdDirective(S.getDirectiveKind()),
2036             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2037     }
2038     EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2039       return CGF.Builder.CreateIsNotNull(
2040           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2041     });
2042     // We're now done with the loop, so jump to the continuation block.
2043     if (ContBlock) {
2044       EmitBranch(ContBlock);
2045       EmitBlock(ContBlock, true);
2046     }
2047   }
2048   return HasLastprivateClause;
2049 }
2050 
2051 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2052   bool HasLastprivates = false;
2053   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2054                                           PrePostActionTy &) {
2055     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
2056   };
2057   {
2058     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2059     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2060                                                 S.hasCancel());
2061   }
2062 
2063   // Emit an implicit barrier at the end.
2064   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2065     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2066   }
2067 }
2068 
2069 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2070   bool HasLastprivates = false;
2071   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2072                                           PrePostActionTy &) {
2073     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
2074   };
2075   {
2076     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2077     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2078   }
2079 
2080   // Emit an implicit barrier at the end.
2081   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2082     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2083   }
2084 }
2085 
2086 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2087                                 const Twine &Name,
2088                                 llvm::Value *Init = nullptr) {
2089   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2090   if (Init)
2091     CGF.EmitScalarInit(Init, LVal);
2092   return LVal;
2093 }
2094 
2095 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2096   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2097   auto *CS = dyn_cast<CompoundStmt>(Stmt);
2098   bool HasLastprivates = false;
2099   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2100                                                     PrePostActionTy &) {
2101     auto &C = CGF.CGM.getContext();
2102     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2103     // Emit helper vars inits.
2104     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2105                                   CGF.Builder.getInt32(0));
2106     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2107                                       : CGF.Builder.getInt32(0);
2108     LValue UB =
2109         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2110     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2111                                   CGF.Builder.getInt32(1));
2112     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2113                                   CGF.Builder.getInt32(0));
2114     // Loop counter.
2115     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2116     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2117     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2118     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2119     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2120     // Generate condition for loop.
2121     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2122                         OK_Ordinary, S.getLocStart(),
2123                         /*fpContractable=*/false);
2124     // Increment for loop counter.
2125     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2126                       S.getLocStart());
2127     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2128       // Iterate through all sections and emit a switch construct:
2129       // switch (IV) {
2130       //   case 0:
2131       //     <SectionStmt[0]>;
2132       //     break;
2133       // ...
2134       //   case <NumSection> - 1:
2135       //     <SectionStmt[<NumSection> - 1]>;
2136       //     break;
2137       // }
2138       // .omp.sections.exit:
2139       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2140       auto *SwitchStmt = CGF.Builder.CreateSwitch(
2141           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2142           CS == nullptr ? 1 : CS->size());
2143       if (CS) {
2144         unsigned CaseNumber = 0;
2145         for (auto *SubStmt : CS->children()) {
2146           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2147           CGF.EmitBlock(CaseBB);
2148           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2149           CGF.EmitStmt(SubStmt);
2150           CGF.EmitBranch(ExitBB);
2151           ++CaseNumber;
2152         }
2153       } else {
2154         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2155         CGF.EmitBlock(CaseBB);
2156         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2157         CGF.EmitStmt(Stmt);
2158         CGF.EmitBranch(ExitBB);
2159       }
2160       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2161     };
2162 
2163     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2164     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2165       // Emit implicit barrier to synchronize threads and avoid data races on
2166       // initialization of firstprivate variables and post-update of lastprivate
2167       // variables.
2168       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2169           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2170           /*ForceSimpleCall=*/true);
2171     }
2172     CGF.EmitOMPPrivateClause(S, LoopScope);
2173     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2174     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2175     (void)LoopScope.Privatize();
2176 
2177     // Emit static non-chunked loop.
2178     OpenMPScheduleTy ScheduleKind;
2179     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2180     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2181         CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
2182         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
2183         UB.getAddress(), ST.getAddress());
2184     // UB = min(UB, GlobalUB);
2185     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2186     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2187         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2188     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2189     // IV = LB;
2190     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2191     // while (idx <= UB) { BODY; ++idx; }
2192     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2193                          [](CodeGenFunction &) {});
2194     // Tell the runtime we are done.
2195     CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
2196     CGF.EmitOMPReductionClauseFinal(S);
2197     // Emit post-update of the reduction variables if IsLastIter != 0.
2198     emitPostUpdateForReductionClause(
2199         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2200           return CGF.Builder.CreateIsNotNull(
2201               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2202         });
2203 
2204     // Emit final copy of the lastprivate variables if IsLastIter != 0.
2205     if (HasLastprivates)
2206       CGF.EmitOMPLastprivateClauseFinal(
2207           S, /*NoFinals=*/false,
2208           CGF.Builder.CreateIsNotNull(
2209               CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2210   };
2211 
2212   bool HasCancel = false;
2213   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2214     HasCancel = OSD->hasCancel();
2215   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2216     HasCancel = OPSD->hasCancel();
2217   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2218                                               HasCancel);
2219   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2220   // clause. Otherwise the barrier will be generated by the codegen for the
2221   // directive.
2222   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2223     // Emit implicit barrier to synchronize threads and avoid data races on
2224     // initialization of firstprivate variables.
2225     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2226                                            OMPD_unknown);
2227   }
2228 }
2229 
2230 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2231   {
2232     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2233     EmitSections(S);
2234   }
2235   // Emit an implicit barrier at the end.
2236   if (!S.getSingleClause<OMPNowaitClause>()) {
2237     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2238                                            OMPD_sections);
2239   }
2240 }
2241 
2242 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2243   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2244     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2245   };
2246   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2247   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2248                                               S.hasCancel());
2249 }
2250 
2251 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2252   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2253   llvm::SmallVector<const Expr *, 8> DestExprs;
2254   llvm::SmallVector<const Expr *, 8> SrcExprs;
2255   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2256   // Check if there are any 'copyprivate' clauses associated with this
2257   // 'single' construct.
2258   // Build a list of copyprivate variables along with helper expressions
2259   // (<source>, <destination>, <destination>=<source> expressions)
2260   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2261     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2262     DestExprs.append(C->destination_exprs().begin(),
2263                      C->destination_exprs().end());
2264     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2265     AssignmentOps.append(C->assignment_ops().begin(),
2266                          C->assignment_ops().end());
2267   }
2268   // Emit code for 'single' region along with 'copyprivate' clauses
2269   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2270     Action.Enter(CGF);
2271     OMPPrivateScope SingleScope(CGF);
2272     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2273     CGF.EmitOMPPrivateClause(S, SingleScope);
2274     (void)SingleScope.Privatize();
2275     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2276   };
2277   {
2278     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2279     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2280                                             CopyprivateVars, DestExprs,
2281                                             SrcExprs, AssignmentOps);
2282   }
2283   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2284   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2285   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2286     CGM.getOpenMPRuntime().emitBarrierCall(
2287         *this, S.getLocStart(),
2288         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2289   }
2290 }
2291 
2292 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2293   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2294     Action.Enter(CGF);
2295     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2296   };
2297   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2298   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2299 }
2300 
2301 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2302   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2303     Action.Enter(CGF);
2304     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2305   };
2306   Expr *Hint = nullptr;
2307   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2308     Hint = HintClause->getHint();
2309   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2310   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2311                                             S.getDirectiveName().getAsString(),
2312                                             CodeGen, S.getLocStart(), Hint);
2313 }
2314 
2315 void CodeGenFunction::EmitOMPParallelForDirective(
2316     const OMPParallelForDirective &S) {
2317   // Emit directive as a combined directive that consists of two implicit
2318   // directives: 'parallel' with 'for' directive.
2319   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2320     CGF.EmitOMPWorksharingLoop(S);
2321   };
2322   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
2323 }
2324 
2325 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2326     const OMPParallelForSimdDirective &S) {
2327   // Emit directive as a combined directive that consists of two implicit
2328   // directives: 'parallel' with 'for' directive.
2329   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2330     CGF.EmitOMPWorksharingLoop(S);
2331   };
2332   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
2333 }
2334 
2335 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2336     const OMPParallelSectionsDirective &S) {
2337   // Emit directive as a combined directive that consists of two implicit
2338   // directives: 'parallel' with 'sections' directive.
2339   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2340     CGF.EmitSections(S);
2341   };
2342   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
2343 }
2344 
2345 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2346                                                 const RegionCodeGenTy &BodyGen,
2347                                                 const TaskGenTy &TaskGen,
2348                                                 OMPTaskDataTy &Data) {
2349   // Emit outlined function for task construct.
2350   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2351   auto *I = CS->getCapturedDecl()->param_begin();
2352   auto *PartId = std::next(I);
2353   auto *TaskT = std::next(I, 4);
2354   // Check if the task is final
2355   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2356     // If the condition constant folds and can be elided, try to avoid emitting
2357     // the condition and the dead arm of the if/else.
2358     auto *Cond = Clause->getCondition();
2359     bool CondConstant;
2360     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2361       Data.Final.setInt(CondConstant);
2362     else
2363       Data.Final.setPointer(EvaluateExprAsBool(Cond));
2364   } else {
2365     // By default the task is not final.
2366     Data.Final.setInt(/*IntVal=*/false);
2367   }
2368   // Check if the task has 'priority' clause.
2369   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2370     // Runtime currently does not support codegen for priority clause argument.
2371     // TODO: Add codegen for priority clause arg when runtime lib support it.
2372     auto *Prio = Clause->getPriority();
2373     Data.Priority.setInt(Prio);
2374     Data.Priority.setPointer(EmitScalarConversion(
2375         EmitScalarExpr(Prio), Prio->getType(),
2376         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2377         Prio->getExprLoc()));
2378   }
2379   // The first function argument for tasks is a thread id, the second one is a
2380   // part id (0 for tied tasks, >=0 for untied task).
2381   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2382   // Get list of private variables.
2383   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2384     auto IRef = C->varlist_begin();
2385     for (auto *IInit : C->private_copies()) {
2386       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2387       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2388         Data.PrivateVars.push_back(*IRef);
2389         Data.PrivateCopies.push_back(IInit);
2390       }
2391       ++IRef;
2392     }
2393   }
2394   EmittedAsPrivate.clear();
2395   // Get list of firstprivate variables.
2396   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2397     auto IRef = C->varlist_begin();
2398     auto IElemInitRef = C->inits().begin();
2399     for (auto *IInit : C->private_copies()) {
2400       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2401       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2402         Data.FirstprivateVars.push_back(*IRef);
2403         Data.FirstprivateCopies.push_back(IInit);
2404         Data.FirstprivateInits.push_back(*IElemInitRef);
2405       }
2406       ++IRef;
2407       ++IElemInitRef;
2408     }
2409   }
2410   // Get list of lastprivate variables (for taskloops).
2411   llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2412   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2413     auto IRef = C->varlist_begin();
2414     auto ID = C->destination_exprs().begin();
2415     for (auto *IInit : C->private_copies()) {
2416       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2417       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2418         Data.LastprivateVars.push_back(*IRef);
2419         Data.LastprivateCopies.push_back(IInit);
2420       }
2421       LastprivateDstsOrigs.insert(
2422           {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2423            cast<DeclRefExpr>(*IRef)});
2424       ++IRef;
2425       ++ID;
2426     }
2427   }
2428   // Build list of dependences.
2429   for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2430     for (auto *IRef : C->varlists())
2431       Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2432   auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs](
2433       CodeGenFunction &CGF, PrePostActionTy &Action) {
2434     // Set proper addresses for generated private copies.
2435     OMPPrivateScope Scope(CGF);
2436     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
2437         !Data.LastprivateVars.empty()) {
2438       auto *CopyFn = CGF.Builder.CreateLoad(
2439           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2440       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2441           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2442       // Map privates.
2443       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
2444       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2445       CallArgs.push_back(PrivatesPtr);
2446       for (auto *E : Data.PrivateVars) {
2447         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2448         Address PrivatePtr = CGF.CreateMemTemp(
2449             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
2450         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2451         CallArgs.push_back(PrivatePtr.getPointer());
2452       }
2453       for (auto *E : Data.FirstprivateVars) {
2454         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2455         Address PrivatePtr =
2456             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2457                               ".firstpriv.ptr.addr");
2458         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2459         CallArgs.push_back(PrivatePtr.getPointer());
2460       }
2461       for (auto *E : Data.LastprivateVars) {
2462         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2463         Address PrivatePtr =
2464             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
2465                               ".lastpriv.ptr.addr");
2466         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2467         CallArgs.push_back(PrivatePtr.getPointer());
2468       }
2469       CGF.EmitRuntimeCall(CopyFn, CallArgs);
2470       for (auto &&Pair : LastprivateDstsOrigs) {
2471         auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
2472         DeclRefExpr DRE(
2473             const_cast<VarDecl *>(OrigVD),
2474             /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
2475                 OrigVD) != nullptr,
2476             Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
2477         Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
2478           return CGF.EmitLValue(&DRE).getAddress();
2479         });
2480       }
2481       for (auto &&Pair : PrivatePtrs) {
2482         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2483                             CGF.getContext().getDeclAlign(Pair.first));
2484         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2485       }
2486     }
2487     (void)Scope.Privatize();
2488 
2489     Action.Enter(CGF);
2490     BodyGen(CGF);
2491   };
2492   auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2493       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
2494       Data.NumberOfParts);
2495   OMPLexicalScope Scope(*this, S);
2496   TaskGen(*this, OutlinedFn, Data);
2497 }
2498 
2499 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2500   // Emit outlined function for task construct.
2501   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2502   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2503   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2504   const Expr *IfCond = nullptr;
2505   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2506     if (C->getNameModifier() == OMPD_unknown ||
2507         C->getNameModifier() == OMPD_task) {
2508       IfCond = C->getCondition();
2509       break;
2510     }
2511   }
2512 
2513   OMPTaskDataTy Data;
2514   // Check if we should emit tied or untied task.
2515   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
2516   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
2517     CGF.EmitStmt(CS->getCapturedStmt());
2518   };
2519   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
2520                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
2521                             const OMPTaskDataTy &Data) {
2522     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
2523                                             SharedsTy, CapturedStruct, IfCond,
2524                                             Data);
2525   };
2526   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
2527 }
2528 
2529 void CodeGenFunction::EmitOMPTaskyieldDirective(
2530     const OMPTaskyieldDirective &S) {
2531   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2532 }
2533 
2534 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2535   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2536 }
2537 
2538 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2539   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2540 }
2541 
2542 void CodeGenFunction::EmitOMPTaskgroupDirective(
2543     const OMPTaskgroupDirective &S) {
2544   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2545     Action.Enter(CGF);
2546     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2547   };
2548   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2549   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2550 }
2551 
2552 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2553   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2554     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2555       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2556                                 FlushClause->varlist_end());
2557     }
2558     return llvm::None;
2559   }(), S.getLocStart());
2560 }
2561 
2562 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
2563   // Emit the loop iteration variable.
2564   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2565   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2566   EmitVarDecl(*IVDecl);
2567 
2568   // Emit the iterations count variable.
2569   // If it is not a variable, Sema decided to calculate iterations count on each
2570   // iteration (e.g., it is foldable into a constant).
2571   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2572     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2573     // Emit calculation of the iterations count.
2574     EmitIgnoredExpr(S.getCalcLastIteration());
2575   }
2576 
2577   auto &RT = CGM.getOpenMPRuntime();
2578 
2579   // Check pre-condition.
2580   {
2581     OMPLoopScope PreInitScope(*this, S);
2582     // Skip the entire loop if we don't meet the precondition.
2583     // If the condition constant folds and can be elided, avoid emitting the
2584     // whole loop.
2585     bool CondConstant;
2586     llvm::BasicBlock *ContBlock = nullptr;
2587     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2588       if (!CondConstant)
2589         return;
2590     } else {
2591       auto *ThenBlock = createBasicBlock("omp.precond.then");
2592       ContBlock = createBasicBlock("omp.precond.end");
2593       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2594                   getProfileCount(&S));
2595       EmitBlock(ThenBlock);
2596       incrementProfileCounter(&S);
2597     }
2598 
2599     // Emit 'then' code.
2600     {
2601       // Emit helper vars inits.
2602       LValue LB =
2603           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2604       LValue UB =
2605           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2606       LValue ST =
2607           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2608       LValue IL =
2609           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2610 
2611       OMPPrivateScope LoopScope(*this);
2612       EmitOMPPrivateLoopCounters(S, LoopScope);
2613       (void)LoopScope.Privatize();
2614 
2615       // Detect the distribute schedule kind and chunk.
2616       llvm::Value *Chunk = nullptr;
2617       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
2618       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
2619         ScheduleKind = C->getDistScheduleKind();
2620         if (const auto *Ch = C->getChunkSize()) {
2621           Chunk = EmitScalarExpr(Ch);
2622           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2623           S.getIterationVariable()->getType(),
2624           S.getLocStart());
2625         }
2626       }
2627       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2628       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2629 
2630       // OpenMP [2.10.8, distribute Construct, Description]
2631       // If dist_schedule is specified, kind must be static. If specified,
2632       // iterations are divided into chunks of size chunk_size, chunks are
2633       // assigned to the teams of the league in a round-robin fashion in the
2634       // order of the team number. When no chunk_size is specified, the
2635       // iteration space is divided into chunks that are approximately equal
2636       // in size, and at most one chunk is distributed to each team of the
2637       // league. The size of the chunks is unspecified in this case.
2638       if (RT.isStaticNonchunked(ScheduleKind,
2639                                 /* Chunked */ Chunk != nullptr)) {
2640         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
2641                              IVSize, IVSigned, /* Ordered = */ false,
2642                              IL.getAddress(), LB.getAddress(),
2643                              UB.getAddress(), ST.getAddress());
2644         auto LoopExit =
2645             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2646         // UB = min(UB, GlobalUB);
2647         EmitIgnoredExpr(S.getEnsureUpperBound());
2648         // IV = LB;
2649         EmitIgnoredExpr(S.getInit());
2650         // while (idx <= UB) { BODY; ++idx; }
2651         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2652                          S.getInc(),
2653                          [&S, LoopExit](CodeGenFunction &CGF) {
2654                            CGF.EmitOMPLoopBody(S, LoopExit);
2655                            CGF.EmitStopPoint(&S);
2656                          },
2657                          [](CodeGenFunction &) {});
2658         EmitBlock(LoopExit.getBlock());
2659         // Tell the runtime we are done.
2660         RT.emitForStaticFinish(*this, S.getLocStart());
2661       } else {
2662         // Emit the outer loop, which requests its work chunk [LB..UB] from
2663         // runtime and runs the inner loop to process it.
2664         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
2665                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2666                             IL.getAddress(), Chunk);
2667       }
2668     }
2669 
2670     // We're now done with the loop, so jump to the continuation block.
2671     if (ContBlock) {
2672       EmitBranch(ContBlock);
2673       EmitBlock(ContBlock, true);
2674     }
2675   }
2676 }
2677 
2678 void CodeGenFunction::EmitOMPDistributeDirective(
2679     const OMPDistributeDirective &S) {
2680   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2681     CGF.EmitOMPDistributeLoop(S);
2682   };
2683   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2684   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2685                                               false);
2686 }
2687 
2688 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
2689                                                    const CapturedStmt *S) {
2690   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
2691   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
2692   CGF.CapturedStmtInfo = &CapStmtInfo;
2693   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
2694   Fn->addFnAttr(llvm::Attribute::NoInline);
2695   return Fn;
2696 }
2697 
2698 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
2699   if (!S.getAssociatedStmt()) {
2700     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
2701       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
2702     return;
2703   }
2704   auto *C = S.getSingleClause<OMPSIMDClause>();
2705   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
2706                                  PrePostActionTy &Action) {
2707     if (C) {
2708       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2709       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2710       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
2711       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
2712       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
2713     } else {
2714       Action.Enter(CGF);
2715       CGF.EmitStmt(
2716           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2717     }
2718   };
2719   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2720   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
2721 }
2722 
2723 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
2724                                          QualType SrcType, QualType DestType,
2725                                          SourceLocation Loc) {
2726   assert(CGF.hasScalarEvaluationKind(DestType) &&
2727          "DestType must have scalar evaluation kind.");
2728   assert(!Val.isAggregate() && "Must be a scalar or complex.");
2729   return Val.isScalar()
2730              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
2731                                         Loc)
2732              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
2733                                                  DestType, Loc);
2734 }
2735 
2736 static CodeGenFunction::ComplexPairTy
2737 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
2738                       QualType DestType, SourceLocation Loc) {
2739   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
2740          "DestType must have complex evaluation kind.");
2741   CodeGenFunction::ComplexPairTy ComplexVal;
2742   if (Val.isScalar()) {
2743     // Convert the input element to the element type of the complex.
2744     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2745     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
2746                                               DestElementType, Loc);
2747     ComplexVal = CodeGenFunction::ComplexPairTy(
2748         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
2749   } else {
2750     assert(Val.isComplex() && "Must be a scalar or complex.");
2751     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
2752     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2753     ComplexVal.first = CGF.EmitScalarConversion(
2754         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
2755     ComplexVal.second = CGF.EmitScalarConversion(
2756         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
2757   }
2758   return ComplexVal;
2759 }
2760 
2761 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
2762                                   LValue LVal, RValue RVal) {
2763   if (LVal.isGlobalReg()) {
2764     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
2765   } else {
2766     CGF.EmitAtomicStore(RVal, LVal,
2767                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2768                                  : llvm::AtomicOrdering::Monotonic,
2769                         LVal.isVolatile(), /*IsInit=*/false);
2770   }
2771 }
2772 
2773 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
2774                                          QualType RValTy, SourceLocation Loc) {
2775   switch (getEvaluationKind(LVal.getType())) {
2776   case TEK_Scalar:
2777     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
2778                                *this, RVal, RValTy, LVal.getType(), Loc)),
2779                            LVal);
2780     break;
2781   case TEK_Complex:
2782     EmitStoreOfComplex(
2783         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
2784         /*isInit=*/false);
2785     break;
2786   case TEK_Aggregate:
2787     llvm_unreachable("Must be a scalar or complex.");
2788   }
2789 }
2790 
2791 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
2792                                   const Expr *X, const Expr *V,
2793                                   SourceLocation Loc) {
2794   // v = x;
2795   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
2796   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
2797   LValue XLValue = CGF.EmitLValue(X);
2798   LValue VLValue = CGF.EmitLValue(V);
2799   RValue Res = XLValue.isGlobalReg()
2800                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
2801                    : CGF.EmitAtomicLoad(
2802                          XLValue, Loc,
2803                          IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2804                                   : llvm::AtomicOrdering::Monotonic,
2805                          XLValue.isVolatile());
2806   // OpenMP, 2.12.6, atomic Construct
2807   // Any atomic construct with a seq_cst clause forces the atomically
2808   // performed operation to include an implicit flush operation without a
2809   // list.
2810   if (IsSeqCst)
2811     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2812   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
2813 }
2814 
2815 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
2816                                    const Expr *X, const Expr *E,
2817                                    SourceLocation Loc) {
2818   // x = expr;
2819   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
2820   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
2821   // OpenMP, 2.12.6, atomic Construct
2822   // Any atomic construct with a seq_cst clause forces the atomically
2823   // performed operation to include an implicit flush operation without a
2824   // list.
2825   if (IsSeqCst)
2826     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2827 }
2828 
2829 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
2830                                                 RValue Update,
2831                                                 BinaryOperatorKind BO,
2832                                                 llvm::AtomicOrdering AO,
2833                                                 bool IsXLHSInRHSPart) {
2834   auto &Context = CGF.CGM.getContext();
2835   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
2836   // expression is simple and atomic is allowed for the given type for the
2837   // target platform.
2838   if (BO == BO_Comma || !Update.isScalar() ||
2839       !Update.getScalarVal()->getType()->isIntegerTy() ||
2840       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
2841                         (Update.getScalarVal()->getType() !=
2842                          X.getAddress().getElementType())) ||
2843       !X.getAddress().getElementType()->isIntegerTy() ||
2844       !Context.getTargetInfo().hasBuiltinAtomic(
2845           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
2846     return std::make_pair(false, RValue::get(nullptr));
2847 
2848   llvm::AtomicRMWInst::BinOp RMWOp;
2849   switch (BO) {
2850   case BO_Add:
2851     RMWOp = llvm::AtomicRMWInst::Add;
2852     break;
2853   case BO_Sub:
2854     if (!IsXLHSInRHSPart)
2855       return std::make_pair(false, RValue::get(nullptr));
2856     RMWOp = llvm::AtomicRMWInst::Sub;
2857     break;
2858   case BO_And:
2859     RMWOp = llvm::AtomicRMWInst::And;
2860     break;
2861   case BO_Or:
2862     RMWOp = llvm::AtomicRMWInst::Or;
2863     break;
2864   case BO_Xor:
2865     RMWOp = llvm::AtomicRMWInst::Xor;
2866     break;
2867   case BO_LT:
2868     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2869                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
2870                                    : llvm::AtomicRMWInst::Max)
2871                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
2872                                    : llvm::AtomicRMWInst::UMax);
2873     break;
2874   case BO_GT:
2875     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2876                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
2877                                    : llvm::AtomicRMWInst::Min)
2878                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
2879                                    : llvm::AtomicRMWInst::UMin);
2880     break;
2881   case BO_Assign:
2882     RMWOp = llvm::AtomicRMWInst::Xchg;
2883     break;
2884   case BO_Mul:
2885   case BO_Div:
2886   case BO_Rem:
2887   case BO_Shl:
2888   case BO_Shr:
2889   case BO_LAnd:
2890   case BO_LOr:
2891     return std::make_pair(false, RValue::get(nullptr));
2892   case BO_PtrMemD:
2893   case BO_PtrMemI:
2894   case BO_LE:
2895   case BO_GE:
2896   case BO_EQ:
2897   case BO_NE:
2898   case BO_AddAssign:
2899   case BO_SubAssign:
2900   case BO_AndAssign:
2901   case BO_OrAssign:
2902   case BO_XorAssign:
2903   case BO_MulAssign:
2904   case BO_DivAssign:
2905   case BO_RemAssign:
2906   case BO_ShlAssign:
2907   case BO_ShrAssign:
2908   case BO_Comma:
2909     llvm_unreachable("Unsupported atomic update operation");
2910   }
2911   auto *UpdateVal = Update.getScalarVal();
2912   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
2913     UpdateVal = CGF.Builder.CreateIntCast(
2914         IC, X.getAddress().getElementType(),
2915         X.getType()->hasSignedIntegerRepresentation());
2916   }
2917   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
2918   return std::make_pair(true, RValue::get(Res));
2919 }
2920 
2921 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
2922     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
2923     llvm::AtomicOrdering AO, SourceLocation Loc,
2924     const llvm::function_ref<RValue(RValue)> &CommonGen) {
2925   // Update expressions are allowed to have the following forms:
2926   // x binop= expr; -> xrval + expr;
2927   // x++, ++x -> xrval + 1;
2928   // x--, --x -> xrval - 1;
2929   // x = x binop expr; -> xrval binop expr
2930   // x = expr Op x; - > expr binop xrval;
2931   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
2932   if (!Res.first) {
2933     if (X.isGlobalReg()) {
2934       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
2935       // 'xrval'.
2936       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
2937     } else {
2938       // Perform compare-and-swap procedure.
2939       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
2940     }
2941   }
2942   return Res;
2943 }
2944 
2945 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
2946                                     const Expr *X, const Expr *E,
2947                                     const Expr *UE, bool IsXLHSInRHSPart,
2948                                     SourceLocation Loc) {
2949   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2950          "Update expr in 'atomic update' must be a binary operator.");
2951   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2952   // Update expressions are allowed to have the following forms:
2953   // x binop= expr; -> xrval + expr;
2954   // x++, ++x -> xrval + 1;
2955   // x--, --x -> xrval - 1;
2956   // x = x binop expr; -> xrval binop expr
2957   // x = expr Op x; - > expr binop xrval;
2958   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
2959   LValue XLValue = CGF.EmitLValue(X);
2960   RValue ExprRValue = CGF.EmitAnyExpr(E);
2961   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
2962                      : llvm::AtomicOrdering::Monotonic;
2963   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2964   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2965   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2966   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2967   auto Gen =
2968       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
2969         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2970         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2971         return CGF.EmitAnyExpr(UE);
2972       };
2973   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
2974       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2975   // OpenMP, 2.12.6, atomic Construct
2976   // Any atomic construct with a seq_cst clause forces the atomically
2977   // performed operation to include an implicit flush operation without a
2978   // list.
2979   if (IsSeqCst)
2980     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2981 }
2982 
2983 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
2984                             QualType SourceType, QualType ResType,
2985                             SourceLocation Loc) {
2986   switch (CGF.getEvaluationKind(ResType)) {
2987   case TEK_Scalar:
2988     return RValue::get(
2989         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
2990   case TEK_Complex: {
2991     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
2992     return RValue::getComplex(Res.first, Res.second);
2993   }
2994   case TEK_Aggregate:
2995     break;
2996   }
2997   llvm_unreachable("Must be a scalar or complex.");
2998 }
2999 
3000 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3001                                      bool IsPostfixUpdate, const Expr *V,
3002                                      const Expr *X, const Expr *E,
3003                                      const Expr *UE, bool IsXLHSInRHSPart,
3004                                      SourceLocation Loc) {
3005   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3006   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3007   RValue NewVVal;
3008   LValue VLValue = CGF.EmitLValue(V);
3009   LValue XLValue = CGF.EmitLValue(X);
3010   RValue ExprRValue = CGF.EmitAnyExpr(E);
3011   auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3012                      : llvm::AtomicOrdering::Monotonic;
3013   QualType NewVValType;
3014   if (UE) {
3015     // 'x' is updated with some additional value.
3016     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3017            "Update expr in 'atomic capture' must be a binary operator.");
3018     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3019     // Update expressions are allowed to have the following forms:
3020     // x binop= expr; -> xrval + expr;
3021     // x++, ++x -> xrval + 1;
3022     // x--, --x -> xrval - 1;
3023     // x = x binop expr; -> xrval binop expr
3024     // x = expr Op x; - > expr binop xrval;
3025     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3026     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3027     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3028     NewVValType = XRValExpr->getType();
3029     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3030     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3031                   IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
3032       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3033       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3034       RValue Res = CGF.EmitAnyExpr(UE);
3035       NewVVal = IsPostfixUpdate ? XRValue : Res;
3036       return Res;
3037     };
3038     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3039         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3040     if (Res.first) {
3041       // 'atomicrmw' instruction was generated.
3042       if (IsPostfixUpdate) {
3043         // Use old value from 'atomicrmw'.
3044         NewVVal = Res.second;
3045       } else {
3046         // 'atomicrmw' does not provide new value, so evaluate it using old
3047         // value of 'x'.
3048         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3049         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3050         NewVVal = CGF.EmitAnyExpr(UE);
3051       }
3052     }
3053   } else {
3054     // 'x' is simply rewritten with some 'expr'.
3055     NewVValType = X->getType().getNonReferenceType();
3056     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3057                                X->getType().getNonReferenceType(), Loc);
3058     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
3059       NewVVal = XRValue;
3060       return ExprRValue;
3061     };
3062     // Try to perform atomicrmw xchg, otherwise simple exchange.
3063     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3064         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3065         Loc, Gen);
3066     if (Res.first) {
3067       // 'atomicrmw' instruction was generated.
3068       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3069     }
3070   }
3071   // Emit post-update store to 'v' of old/new 'x' value.
3072   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3073   // OpenMP, 2.12.6, atomic Construct
3074   // Any atomic construct with a seq_cst clause forces the atomically
3075   // performed operation to include an implicit flush operation without a
3076   // list.
3077   if (IsSeqCst)
3078     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3079 }
3080 
3081 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3082                               bool IsSeqCst, bool IsPostfixUpdate,
3083                               const Expr *X, const Expr *V, const Expr *E,
3084                               const Expr *UE, bool IsXLHSInRHSPart,
3085                               SourceLocation Loc) {
3086   switch (Kind) {
3087   case OMPC_read:
3088     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3089     break;
3090   case OMPC_write:
3091     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3092     break;
3093   case OMPC_unknown:
3094   case OMPC_update:
3095     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3096     break;
3097   case OMPC_capture:
3098     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3099                              IsXLHSInRHSPart, Loc);
3100     break;
3101   case OMPC_if:
3102   case OMPC_final:
3103   case OMPC_num_threads:
3104   case OMPC_private:
3105   case OMPC_firstprivate:
3106   case OMPC_lastprivate:
3107   case OMPC_reduction:
3108   case OMPC_safelen:
3109   case OMPC_simdlen:
3110   case OMPC_collapse:
3111   case OMPC_default:
3112   case OMPC_seq_cst:
3113   case OMPC_shared:
3114   case OMPC_linear:
3115   case OMPC_aligned:
3116   case OMPC_copyin:
3117   case OMPC_copyprivate:
3118   case OMPC_flush:
3119   case OMPC_proc_bind:
3120   case OMPC_schedule:
3121   case OMPC_ordered:
3122   case OMPC_nowait:
3123   case OMPC_untied:
3124   case OMPC_threadprivate:
3125   case OMPC_depend:
3126   case OMPC_mergeable:
3127   case OMPC_device:
3128   case OMPC_threads:
3129   case OMPC_simd:
3130   case OMPC_map:
3131   case OMPC_num_teams:
3132   case OMPC_thread_limit:
3133   case OMPC_priority:
3134   case OMPC_grainsize:
3135   case OMPC_nogroup:
3136   case OMPC_num_tasks:
3137   case OMPC_hint:
3138   case OMPC_dist_schedule:
3139   case OMPC_defaultmap:
3140   case OMPC_uniform:
3141   case OMPC_to:
3142   case OMPC_from:
3143     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3144   }
3145 }
3146 
3147 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3148   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3149   OpenMPClauseKind Kind = OMPC_unknown;
3150   for (auto *C : S.clauses()) {
3151     // Find first clause (skip seq_cst clause, if it is first).
3152     if (C->getClauseKind() != OMPC_seq_cst) {
3153       Kind = C->getClauseKind();
3154       break;
3155     }
3156   }
3157 
3158   const auto *CS =
3159       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3160   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3161     enterFullExpression(EWC);
3162   }
3163   // Processing for statements under 'atomic capture'.
3164   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3165     for (const auto *C : Compound->body()) {
3166       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3167         enterFullExpression(EWC);
3168       }
3169     }
3170   }
3171 
3172   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3173                                             PrePostActionTy &) {
3174     CGF.EmitStopPoint(CS);
3175     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3176                       S.getV(), S.getExpr(), S.getUpdateExpr(),
3177                       S.isXLHSInRHSPart(), S.getLocStart());
3178   };
3179   OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3180   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3181 }
3182 
3183 std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
3184 CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
3185     CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
3186     bool IsOffloadEntry) {
3187   llvm::Function *OutlinedFn = nullptr;
3188   llvm::Constant *OutlinedFnID = nullptr;
3189   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3190     OMPPrivateScope PrivateScope(CGF);
3191     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3192     CGF.EmitOMPPrivateClause(S, PrivateScope);
3193     (void)PrivateScope.Privatize();
3194 
3195     Action.Enter(CGF);
3196     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3197   };
3198   // Emit target region as a standalone region.
3199   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3200       S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
3201   return std::make_pair(OutlinedFn, OutlinedFnID);
3202 }
3203 
3204 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3205   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3206 
3207   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3208   GenerateOpenMPCapturedVars(CS, CapturedVars);
3209 
3210   llvm::Function *Fn = nullptr;
3211   llvm::Constant *FnID = nullptr;
3212 
3213   // Check if we have any if clause associated with the directive.
3214   const Expr *IfCond = nullptr;
3215 
3216   if (auto *C = S.getSingleClause<OMPIfClause>()) {
3217     IfCond = C->getCondition();
3218   }
3219 
3220   // Check if we have any device clause associated with the directive.
3221   const Expr *Device = nullptr;
3222   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3223     Device = C->getDevice();
3224   }
3225 
3226   // Check if we have an if clause whose conditional always evaluates to false
3227   // or if we do not have any targets specified. If so the target region is not
3228   // an offload entry point.
3229   bool IsOffloadEntry = true;
3230   if (IfCond) {
3231     bool Val;
3232     if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3233       IsOffloadEntry = false;
3234   }
3235   if (CGM.getLangOpts().OMPTargetTriples.empty())
3236     IsOffloadEntry = false;
3237 
3238   assert(CurFuncDecl && "No parent declaration for target region!");
3239   StringRef ParentName;
3240   // In case we have Ctors/Dtors we use the complete type variant to produce
3241   // the mangling of the device outlined kernel.
3242   if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
3243     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3244   else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
3245     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3246   else
3247     ParentName =
3248         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
3249 
3250   std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
3251       CGM, S, ParentName, IsOffloadEntry);
3252   OMPLexicalScope Scope(*this, S);
3253   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
3254                                         CapturedVars);
3255 }
3256 
3257 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3258                                         const OMPExecutableDirective &S,
3259                                         OpenMPDirectiveKind InnermostKind,
3260                                         const RegionCodeGenTy &CodeGen) {
3261   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3262   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
3263       emitParallelOrTeamsOutlinedFunction(S,
3264           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3265 
3266   const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
3267   const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
3268   const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
3269   if (NT || TL) {
3270     Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3271     Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3272 
3273     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3274                                                   S.getLocStart());
3275   }
3276 
3277   OMPLexicalScope Scope(CGF, S);
3278   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3279   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3280   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3281                                            CapturedVars);
3282 }
3283 
3284 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3285   // Emit parallel region as a standalone region.
3286   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3287     OMPPrivateScope PrivateScope(CGF);
3288     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3289     CGF.EmitOMPPrivateClause(S, PrivateScope);
3290     (void)PrivateScope.Privatize();
3291     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3292   };
3293   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3294 }
3295 
3296 void CodeGenFunction::EmitOMPCancellationPointDirective(
3297     const OMPCancellationPointDirective &S) {
3298   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3299                                                    S.getCancelRegion());
3300 }
3301 
3302 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3303   const Expr *IfCond = nullptr;
3304   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3305     if (C->getNameModifier() == OMPD_unknown ||
3306         C->getNameModifier() == OMPD_cancel) {
3307       IfCond = C->getCondition();
3308       break;
3309     }
3310   }
3311   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3312                                         S.getCancelRegion());
3313 }
3314 
3315 CodeGenFunction::JumpDest
3316 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3317   if (Kind == OMPD_parallel || Kind == OMPD_task)
3318     return ReturnBlock;
3319   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3320          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
3321   return BreakContinueStack.back().BreakBlock;
3322 }
3323 
3324 // Generate the instructions for '#pragma omp target data' directive.
3325 void CodeGenFunction::EmitOMPTargetDataDirective(
3326     const OMPTargetDataDirective &S) {
3327   // The target data enclosed region is implemented just by emitting the
3328   // statement.
3329   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3330     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3331   };
3332 
3333   // If we don't have target devices, don't bother emitting the data mapping
3334   // code.
3335   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
3336     OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3337 
3338     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data,
3339                                                 CodeGen);
3340     return;
3341   }
3342 
3343   // Check if we have any if clause associated with the directive.
3344   const Expr *IfCond = nullptr;
3345   if (auto *C = S.getSingleClause<OMPIfClause>())
3346     IfCond = C->getCondition();
3347 
3348   // Check if we have any device clause associated with the directive.
3349   const Expr *Device = nullptr;
3350   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3351     Device = C->getDevice();
3352 
3353   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen);
3354 }
3355 
3356 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
3357     const OMPTargetEnterDataDirective &S) {
3358   // If we don't have target devices, don't bother emitting the data mapping
3359   // code.
3360   if (CGM.getLangOpts().OMPTargetTriples.empty())
3361     return;
3362 
3363   // Check if we have any if clause associated with the directive.
3364   const Expr *IfCond = nullptr;
3365   if (auto *C = S.getSingleClause<OMPIfClause>())
3366     IfCond = C->getCondition();
3367 
3368   // Check if we have any device clause associated with the directive.
3369   const Expr *Device = nullptr;
3370   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3371     Device = C->getDevice();
3372 
3373   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3374 }
3375 
3376 void CodeGenFunction::EmitOMPTargetExitDataDirective(
3377     const OMPTargetExitDataDirective &S) {
3378   // If we don't have target devices, don't bother emitting the data mapping
3379   // code.
3380   if (CGM.getLangOpts().OMPTargetTriples.empty())
3381     return;
3382 
3383   // Check if we have any if clause associated with the directive.
3384   const Expr *IfCond = nullptr;
3385   if (auto *C = S.getSingleClause<OMPIfClause>())
3386     IfCond = C->getCondition();
3387 
3388   // Check if we have any device clause associated with the directive.
3389   const Expr *Device = nullptr;
3390   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3391     Device = C->getDevice();
3392 
3393   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3394 }
3395 
3396 void CodeGenFunction::EmitOMPTargetParallelDirective(
3397     const OMPTargetParallelDirective &S) {
3398   // TODO: codegen for target parallel.
3399 }
3400 
3401 void CodeGenFunction::EmitOMPTargetParallelForDirective(
3402     const OMPTargetParallelForDirective &S) {
3403   // TODO: codegen for target parallel for.
3404 }
3405 
3406 /// Emit a helper variable and return corresponding lvalue.
3407 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
3408                      const ImplicitParamDecl *PVD,
3409                      CodeGenFunction::OMPPrivateScope &Privates) {
3410   auto *VDecl = cast<VarDecl>(Helper->getDecl());
3411   Privates.addPrivate(
3412       VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
3413 }
3414 
3415 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
3416   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
3417   // Emit outlined function for task construct.
3418   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3419   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3420   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3421   const Expr *IfCond = nullptr;
3422   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3423     if (C->getNameModifier() == OMPD_unknown ||
3424         C->getNameModifier() == OMPD_taskloop) {
3425       IfCond = C->getCondition();
3426       break;
3427     }
3428   }
3429 
3430   OMPTaskDataTy Data;
3431   // Check if taskloop must be emitted without taskgroup.
3432   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
3433   // TODO: Check if we should emit tied or untied task.
3434   Data.Tied = true;
3435   // Set scheduling for taskloop
3436   if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
3437     // grainsize clause
3438     Data.Schedule.setInt(/*IntVal=*/false);
3439     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
3440   } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
3441     // num_tasks clause
3442     Data.Schedule.setInt(/*IntVal=*/true);
3443     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
3444   }
3445 
3446   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
3447     // if (PreCond) {
3448     //   for (IV in 0..LastIteration) BODY;
3449     //   <Final counter/linear vars updates>;
3450     // }
3451     //
3452 
3453     // Emit: if (PreCond) - begin.
3454     // If the condition constant folds and can be elided, avoid emitting the
3455     // whole loop.
3456     bool CondConstant;
3457     llvm::BasicBlock *ContBlock = nullptr;
3458     OMPLoopScope PreInitScope(CGF, S);
3459     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3460       if (!CondConstant)
3461         return;
3462     } else {
3463       auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
3464       ContBlock = CGF.createBasicBlock("taskloop.if.end");
3465       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
3466                   CGF.getProfileCount(&S));
3467       CGF.EmitBlock(ThenBlock);
3468       CGF.incrementProfileCounter(&S);
3469     }
3470 
3471     if (isOpenMPSimdDirective(S.getDirectiveKind()))
3472       CGF.EmitOMPSimdInit(S);
3473 
3474     OMPPrivateScope LoopScope(CGF);
3475     // Emit helper vars inits.
3476     enum { LowerBound = 5, UpperBound, Stride, LastIter };
3477     auto *I = CS->getCapturedDecl()->param_begin();
3478     auto *LBP = std::next(I, LowerBound);
3479     auto *UBP = std::next(I, UpperBound);
3480     auto *STP = std::next(I, Stride);
3481     auto *LIP = std::next(I, LastIter);
3482     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
3483              LoopScope);
3484     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
3485              LoopScope);
3486     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
3487     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
3488              LoopScope);
3489     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
3490     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3491     (void)LoopScope.Privatize();
3492     // Emit the loop iteration variable.
3493     const Expr *IVExpr = S.getIterationVariable();
3494     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
3495     CGF.EmitVarDecl(*IVDecl);
3496     CGF.EmitIgnoredExpr(S.getInit());
3497 
3498     // Emit the iterations count variable.
3499     // If it is not a variable, Sema decided to calculate iterations count on
3500     // each iteration (e.g., it is foldable into a constant).
3501     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3502       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3503       // Emit calculation of the iterations count.
3504       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
3505     }
3506 
3507     CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
3508                          S.getInc(),
3509                          [&S](CodeGenFunction &CGF) {
3510                            CGF.EmitOMPLoopBody(S, JumpDest());
3511                            CGF.EmitStopPoint(&S);
3512                          },
3513                          [](CodeGenFunction &) {});
3514     // Emit: if (PreCond) - end.
3515     if (ContBlock) {
3516       CGF.EmitBranch(ContBlock);
3517       CGF.EmitBlock(ContBlock, true);
3518     }
3519     // Emit final copy of the lastprivate variables if IsLastIter != 0.
3520     if (HasLastprivateClause) {
3521       CGF.EmitOMPLastprivateClauseFinal(
3522           S, isOpenMPSimdDirective(S.getDirectiveKind()),
3523           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
3524               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
3525               (*LIP)->getType(), S.getLocStart())));
3526     }
3527   };
3528   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3529                     IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3530                             const OMPTaskDataTy &Data) {
3531     auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
3532       OMPLoopScope PreInitScope(CGF, S);
3533       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
3534                                                   OutlinedFn, SharedsTy,
3535                                                   CapturedStruct, IfCond, Data);
3536     };
3537     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
3538                                                     CodeGen);
3539   };
3540   EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
3541 }
3542 
3543 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
3544   EmitOMPTaskLoopBasedDirective(S);
3545 }
3546 
3547 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
3548     const OMPTaskLoopSimdDirective &S) {
3549   EmitOMPTaskLoopBasedDirective(S);
3550 }
3551 
3552 // Generate the instructions for '#pragma omp target update' directive.
3553 void CodeGenFunction::EmitOMPTargetUpdateDirective(
3554     const OMPTargetUpdateDirective &S) {
3555   // If we don't have target devices, don't bother emitting the data mapping
3556   // code.
3557   if (CGM.getLangOpts().OMPTargetTriples.empty())
3558     return;
3559 
3560   // Check if we have any if clause associated with the directive.
3561   const Expr *IfCond = nullptr;
3562   if (auto *C = S.getSingleClause<OMPIfClause>())
3563     IfCond = C->getCondition();
3564 
3565   // Check if we have any device clause associated with the directive.
3566   const Expr *Device = nullptr;
3567   if (auto *C = S.getSingleClause<OMPDeviceClause>())
3568     Device = C->getDevice();
3569 
3570   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
3571 }
3572