1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 using namespace clang;
23 using namespace CodeGen;
24 
25 namespace {
26 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
27 /// for captured expressions.
28 class OMPLexicalScope {
29   CodeGenFunction::LexicalScope Scope;
30   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31     for (const auto *C : S.clauses()) {
32       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34           for (const auto *I : PreInit->decls()) {
35             if (!I->hasAttr<OMPCaptureNoInitAttr>())
36               CGF.EmitVarDecl(cast<VarDecl>(*I));
37             else {
38               CodeGenFunction::AutoVarEmission Emission =
39                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40               CGF.EmitAutoVarCleanups(Emission);
41             }
42           }
43         }
44       }
45     }
46   }
47 
48 public:
49   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
50       : Scope(CGF, S.getSourceRange()) {
51     emitPreInitStmt(CGF, S);
52   }
53 };
54 } // namespace
55 
56 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
57   auto &C = getContext();
58   llvm::Value *Size = nullptr;
59   auto SizeInChars = C.getTypeSizeInChars(Ty);
60   if (SizeInChars.isZero()) {
61     // getTypeSizeInChars() returns 0 for a VLA.
62     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
63       llvm::Value *ArraySize;
64       std::tie(ArraySize, Ty) = getVLASize(VAT);
65       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
66     }
67     SizeInChars = C.getTypeSizeInChars(Ty);
68     if (SizeInChars.isZero())
69       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
70     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
71   } else
72     Size = CGM.getSize(SizeInChars);
73   return Size;
74 }
75 
76 void CodeGenFunction::GenerateOpenMPCapturedVars(
77     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
78   const RecordDecl *RD = S.getCapturedRecordDecl();
79   auto CurField = RD->field_begin();
80   auto CurCap = S.captures().begin();
81   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
82                                                  E = S.capture_init_end();
83        I != E; ++I, ++CurField, ++CurCap) {
84     if (CurField->hasCapturedVLAType()) {
85       auto VAT = CurField->getCapturedVLAType();
86       auto *Val = VLASizeMap[VAT->getSizeExpr()];
87       CapturedVars.push_back(Val);
88     } else if (CurCap->capturesThis())
89       CapturedVars.push_back(CXXThisValue);
90     else if (CurCap->capturesVariableByCopy())
91       CapturedVars.push_back(
92           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
93     else {
94       assert(CurCap->capturesVariable() && "Expected capture by reference.");
95       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
96     }
97   }
98 }
99 
100 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
101                                     StringRef Name, LValue AddrLV,
102                                     bool isReferenceType = false) {
103   ASTContext &Ctx = CGF.getContext();
104 
105   auto *CastedPtr = CGF.EmitScalarConversion(
106       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
107       Ctx.getPointerType(DstType), SourceLocation());
108   auto TmpAddr =
109       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
110           .getAddress();
111 
112   // If we are dealing with references we need to return the address of the
113   // reference instead of the reference of the value.
114   if (isReferenceType) {
115     QualType RefType = Ctx.getLValueReferenceType(DstType);
116     auto *RefVal = TmpAddr.getPointer();
117     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
118     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
119     CGF.EmitScalarInit(RefVal, TmpLVal);
120   }
121 
122   return TmpAddr;
123 }
124 
125 llvm::Function *
126 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
127   assert(
128       CapturedStmtInfo &&
129       "CapturedStmtInfo should be set when generating the captured function");
130   const CapturedDecl *CD = S.getCapturedDecl();
131   const RecordDecl *RD = S.getCapturedRecordDecl();
132   assert(CD->hasBody() && "missing CapturedDecl body");
133 
134   // Build the argument list.
135   ASTContext &Ctx = CGM.getContext();
136   FunctionArgList Args;
137   Args.append(CD->param_begin(),
138               std::next(CD->param_begin(), CD->getContextParamPosition()));
139   auto I = S.captures().begin();
140   for (auto *FD : RD->fields()) {
141     QualType ArgType = FD->getType();
142     IdentifierInfo *II = nullptr;
143     VarDecl *CapVar = nullptr;
144 
145     // If this is a capture by copy and the type is not a pointer, the outlined
146     // function argument type should be uintptr and the value properly casted to
147     // uintptr. This is necessary given that the runtime library is only able to
148     // deal with pointers. We can pass in the same way the VLA type sizes to the
149     // outlined function.
150     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
151         I->capturesVariableArrayType())
152       ArgType = Ctx.getUIntPtrType();
153 
154     if (I->capturesVariable() || I->capturesVariableByCopy()) {
155       CapVar = I->getCapturedVar();
156       II = CapVar->getIdentifier();
157     } else if (I->capturesThis())
158       II = &getContext().Idents.get("this");
159     else {
160       assert(I->capturesVariableArrayType());
161       II = &getContext().Idents.get("vla");
162     }
163     if (ArgType->isVariablyModifiedType())
164       ArgType = getContext().getVariableArrayDecayedType(ArgType);
165     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
166                                              FD->getLocation(), II, ArgType));
167     ++I;
168   }
169   Args.append(
170       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
171       CD->param_end());
172 
173   // Create the function declaration.
174   FunctionType::ExtInfo ExtInfo;
175   const CGFunctionInfo &FuncInfo =
176       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
177   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
178 
179   llvm::Function *F = llvm::Function::Create(
180       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
181       CapturedStmtInfo->getHelperName(), &CGM.getModule());
182   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
183   if (CD->isNothrow())
184     F->addFnAttr(llvm::Attribute::NoUnwind);
185 
186   // Generate the function.
187   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
188                 CD->getBody()->getLocStart());
189   unsigned Cnt = CD->getContextParamPosition();
190   I = S.captures().begin();
191   for (auto *FD : RD->fields()) {
192     // If we are capturing a pointer by copy we don't need to do anything, just
193     // use the value that we get from the arguments.
194     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
195       setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
196       ++Cnt;
197       ++I;
198       continue;
199     }
200 
201     LValue ArgLVal =
202         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
203                        AlignmentSource::Decl);
204     if (FD->hasCapturedVLAType()) {
205       LValue CastedArgLVal =
206           MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
207                                               Args[Cnt]->getName(), ArgLVal),
208                          FD->getType(), AlignmentSource::Decl);
209       auto *ExprArg =
210           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
211       auto VAT = FD->getCapturedVLAType();
212       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
213     } else if (I->capturesVariable()) {
214       auto *Var = I->getCapturedVar();
215       QualType VarTy = Var->getType();
216       Address ArgAddr = ArgLVal.getAddress();
217       if (!VarTy->isReferenceType()) {
218         ArgAddr = EmitLoadOfReference(
219             ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
220       }
221       setAddrOfLocalVar(
222           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
223     } else if (I->capturesVariableByCopy()) {
224       assert(!FD->getType()->isAnyPointerType() &&
225              "Not expecting a captured pointer.");
226       auto *Var = I->getCapturedVar();
227       QualType VarTy = Var->getType();
228       setAddrOfLocalVar(I->getCapturedVar(),
229                         castValueFromUintptr(*this, FD->getType(),
230                                              Args[Cnt]->getName(), ArgLVal,
231                                              VarTy->isReferenceType()));
232     } else {
233       // If 'this' is captured, load it into CXXThisValue.
234       assert(I->capturesThis());
235       CXXThisValue =
236           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
237     }
238     ++Cnt;
239     ++I;
240   }
241 
242   PGO.assignRegionCounters(GlobalDecl(CD), F);
243   CapturedStmtInfo->EmitBody(*this, CD->getBody());
244   FinishFunction(CD->getBodyRBrace());
245 
246   return F;
247 }
248 
249 //===----------------------------------------------------------------------===//
250 //                              OpenMP Directive Emission
251 //===----------------------------------------------------------------------===//
252 void CodeGenFunction::EmitOMPAggregateAssign(
253     Address DestAddr, Address SrcAddr, QualType OriginalType,
254     const llvm::function_ref<void(Address, Address)> &CopyGen) {
255   // Perform element-by-element initialization.
256   QualType ElementTy;
257 
258   // Drill down to the base element type on both arrays.
259   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
260   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
261   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
262 
263   auto SrcBegin = SrcAddr.getPointer();
264   auto DestBegin = DestAddr.getPointer();
265   // Cast from pointer to array type to pointer to single element.
266   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
267   // The basic structure here is a while-do loop.
268   auto BodyBB = createBasicBlock("omp.arraycpy.body");
269   auto DoneBB = createBasicBlock("omp.arraycpy.done");
270   auto IsEmpty =
271       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
272   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
273 
274   // Enter the loop body, making that address the current address.
275   auto EntryBB = Builder.GetInsertBlock();
276   EmitBlock(BodyBB);
277 
278   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
279 
280   llvm::PHINode *SrcElementPHI =
281     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
282   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
283   Address SrcElementCurrent =
284       Address(SrcElementPHI,
285               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
286 
287   llvm::PHINode *DestElementPHI =
288     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
289   DestElementPHI->addIncoming(DestBegin, EntryBB);
290   Address DestElementCurrent =
291     Address(DestElementPHI,
292             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
293 
294   // Emit copy.
295   CopyGen(DestElementCurrent, SrcElementCurrent);
296 
297   // Shift the address forward by one element.
298   auto DestElementNext = Builder.CreateConstGEP1_32(
299       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
300   auto SrcElementNext = Builder.CreateConstGEP1_32(
301       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
302   // Check whether we've reached the end.
303   auto Done =
304       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
305   Builder.CreateCondBr(Done, DoneBB, BodyBB);
306   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
307   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
308 
309   // Done.
310   EmitBlock(DoneBB, /*IsFinished=*/true);
311 }
312 
313 /// \brief Emit initialization of arrays of complex types.
314 /// \param DestAddr Address of the array.
315 /// \param Type Type of array.
316 /// \param Init Initial expression of array.
317 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
318                                  QualType Type, const Expr *Init) {
319   // Perform element-by-element initialization.
320   QualType ElementTy;
321 
322   // Drill down to the base element type on both arrays.
323   auto ArrayTy = Type->getAsArrayTypeUnsafe();
324   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
325   DestAddr =
326       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
327 
328   auto DestBegin = DestAddr.getPointer();
329   // Cast from pointer to array type to pointer to single element.
330   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
331   // The basic structure here is a while-do loop.
332   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
333   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
334   auto IsEmpty =
335       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
336   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
337 
338   // Enter the loop body, making that address the current address.
339   auto EntryBB = CGF.Builder.GetInsertBlock();
340   CGF.EmitBlock(BodyBB);
341 
342   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
343 
344   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
345       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
346   DestElementPHI->addIncoming(DestBegin, EntryBB);
347   Address DestElementCurrent =
348       Address(DestElementPHI,
349               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
350 
351   // Emit copy.
352   {
353     CodeGenFunction::RunCleanupsScope InitScope(CGF);
354     CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
355                          /*IsInitializer=*/false);
356   }
357 
358   // Shift the address forward by one element.
359   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
360       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
361   // Check whether we've reached the end.
362   auto Done =
363       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
364   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
365   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
366 
367   // Done.
368   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
369 }
370 
371 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
372                                   Address SrcAddr, const VarDecl *DestVD,
373                                   const VarDecl *SrcVD, const Expr *Copy) {
374   if (OriginalType->isArrayType()) {
375     auto *BO = dyn_cast<BinaryOperator>(Copy);
376     if (BO && BO->getOpcode() == BO_Assign) {
377       // Perform simple memcpy for simple copying.
378       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
379     } else {
380       // For arrays with complex element types perform element by element
381       // copying.
382       EmitOMPAggregateAssign(
383           DestAddr, SrcAddr, OriginalType,
384           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
385             // Working with the single array element, so have to remap
386             // destination and source variables to corresponding array
387             // elements.
388             CodeGenFunction::OMPPrivateScope Remap(*this);
389             Remap.addPrivate(DestVD, [DestElement]() -> Address {
390               return DestElement;
391             });
392             Remap.addPrivate(
393                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
394             (void)Remap.Privatize();
395             EmitIgnoredExpr(Copy);
396           });
397     }
398   } else {
399     // Remap pseudo source variable to private copy.
400     CodeGenFunction::OMPPrivateScope Remap(*this);
401     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
402     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
403     (void)Remap.Privatize();
404     // Emit copying of the whole variable.
405     EmitIgnoredExpr(Copy);
406   }
407 }
408 
409 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
410                                                 OMPPrivateScope &PrivateScope) {
411   if (!HaveInsertPoint())
412     return false;
413   bool FirstprivateIsLastprivate = false;
414   llvm::DenseSet<const VarDecl *> Lastprivates;
415   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
416     for (const auto *D : C->varlists())
417       Lastprivates.insert(
418           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
419   }
420   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
421   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
422     auto IRef = C->varlist_begin();
423     auto InitsRef = C->inits().begin();
424     for (auto IInit : C->private_copies()) {
425       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
426       FirstprivateIsLastprivate =
427           FirstprivateIsLastprivate ||
428           (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0);
429       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
430         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
431         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
432         bool IsRegistered;
433         DeclRefExpr DRE(
434             const_cast<VarDecl *>(OrigVD),
435             /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
436                 OrigVD) != nullptr,
437             (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
438         Address OriginalAddr = EmitLValue(&DRE).getAddress();
439         QualType Type = OrigVD->getType();
440         if (Type->isArrayType()) {
441           // Emit VarDecl with copy init for arrays.
442           // Get the address of the original variable captured in current
443           // captured region.
444           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
445             auto Emission = EmitAutoVarAlloca(*VD);
446             auto *Init = VD->getInit();
447             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
448               // Perform simple memcpy.
449               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
450                                   Type);
451             } else {
452               EmitOMPAggregateAssign(
453                   Emission.getAllocatedAddress(), OriginalAddr, Type,
454                   [this, VDInit, Init](Address DestElement,
455                                        Address SrcElement) {
456                     // Clean up any temporaries needed by the initialization.
457                     RunCleanupsScope InitScope(*this);
458                     // Emit initialization for single element.
459                     setAddrOfLocalVar(VDInit, SrcElement);
460                     EmitAnyExprToMem(Init, DestElement,
461                                      Init->getType().getQualifiers(),
462                                      /*IsInitializer*/ false);
463                     LocalDeclMap.erase(VDInit);
464                   });
465             }
466             EmitAutoVarCleanups(Emission);
467             return Emission.getAllocatedAddress();
468           });
469         } else {
470           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
471             // Emit private VarDecl with copy init.
472             // Remap temp VDInit variable to the address of the original
473             // variable
474             // (for proper handling of captured global variables).
475             setAddrOfLocalVar(VDInit, OriginalAddr);
476             EmitDecl(*VD);
477             LocalDeclMap.erase(VDInit);
478             return GetAddrOfLocalVar(VD);
479           });
480         }
481         assert(IsRegistered &&
482                "firstprivate var already registered as private");
483         // Silence the warning about unused variable.
484         (void)IsRegistered;
485       }
486       ++IRef;
487       ++InitsRef;
488     }
489   }
490   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
491 }
492 
493 void CodeGenFunction::EmitOMPPrivateClause(
494     const OMPExecutableDirective &D,
495     CodeGenFunction::OMPPrivateScope &PrivateScope) {
496   if (!HaveInsertPoint())
497     return;
498   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
499   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
500     auto IRef = C->varlist_begin();
501     for (auto IInit : C->private_copies()) {
502       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
503       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
504         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
505         bool IsRegistered =
506             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
507               // Emit private VarDecl with copy init.
508               EmitDecl(*VD);
509               return GetAddrOfLocalVar(VD);
510             });
511         assert(IsRegistered && "private var already registered as private");
512         // Silence the warning about unused variable.
513         (void)IsRegistered;
514       }
515       ++IRef;
516     }
517   }
518 }
519 
520 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
521   if (!HaveInsertPoint())
522     return false;
523   // threadprivate_var1 = master_threadprivate_var1;
524   // operator=(threadprivate_var2, master_threadprivate_var2);
525   // ...
526   // __kmpc_barrier(&loc, global_tid);
527   llvm::DenseSet<const VarDecl *> CopiedVars;
528   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
529   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
530     auto IRef = C->varlist_begin();
531     auto ISrcRef = C->source_exprs().begin();
532     auto IDestRef = C->destination_exprs().begin();
533     for (auto *AssignOp : C->assignment_ops()) {
534       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
535       QualType Type = VD->getType();
536       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
537         // Get the address of the master variable. If we are emitting code with
538         // TLS support, the address is passed from the master as field in the
539         // captured declaration.
540         Address MasterAddr = Address::invalid();
541         if (getLangOpts().OpenMPUseTLS &&
542             getContext().getTargetInfo().isTLSSupported()) {
543           assert(CapturedStmtInfo->lookup(VD) &&
544                  "Copyin threadprivates should have been captured!");
545           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
546                           VK_LValue, (*IRef)->getExprLoc());
547           MasterAddr = EmitLValue(&DRE).getAddress();
548           LocalDeclMap.erase(VD);
549         } else {
550           MasterAddr =
551             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
552                                         : CGM.GetAddrOfGlobal(VD),
553                     getContext().getDeclAlign(VD));
554         }
555         // Get the address of the threadprivate variable.
556         Address PrivateAddr = EmitLValue(*IRef).getAddress();
557         if (CopiedVars.size() == 1) {
558           // At first check if current thread is a master thread. If it is, no
559           // need to copy data.
560           CopyBegin = createBasicBlock("copyin.not.master");
561           CopyEnd = createBasicBlock("copyin.not.master.end");
562           Builder.CreateCondBr(
563               Builder.CreateICmpNE(
564                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
565                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
566               CopyBegin, CopyEnd);
567           EmitBlock(CopyBegin);
568         }
569         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
570         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
571         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
572       }
573       ++IRef;
574       ++ISrcRef;
575       ++IDestRef;
576     }
577   }
578   if (CopyEnd) {
579     // Exit out of copying procedure for non-master thread.
580     EmitBlock(CopyEnd, /*IsFinished=*/true);
581     return true;
582   }
583   return false;
584 }
585 
586 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
587     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
588   if (!HaveInsertPoint())
589     return false;
590   bool HasAtLeastOneLastprivate = false;
591   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
592   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
593     HasAtLeastOneLastprivate = true;
594     auto IRef = C->varlist_begin();
595     auto IDestRef = C->destination_exprs().begin();
596     for (auto *IInit : C->private_copies()) {
597       // Keep the address of the original variable for future update at the end
598       // of the loop.
599       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
600       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
601         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
602         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
603           DeclRefExpr DRE(
604               const_cast<VarDecl *>(OrigVD),
605               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
606                   OrigVD) != nullptr,
607               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
608           return EmitLValue(&DRE).getAddress();
609         });
610         // Check if the variable is also a firstprivate: in this case IInit is
611         // not generated. Initialization of this variable will happen in codegen
612         // for 'firstprivate' clause.
613         if (IInit) {
614           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
615           bool IsRegistered =
616               PrivateScope.addPrivate(OrigVD, [&]() -> Address {
617                 // Emit private VarDecl with copy init.
618                 EmitDecl(*VD);
619                 return GetAddrOfLocalVar(VD);
620               });
621           assert(IsRegistered &&
622                  "lastprivate var already registered as private");
623           (void)IsRegistered;
624         }
625       }
626       ++IRef;
627       ++IDestRef;
628     }
629   }
630   return HasAtLeastOneLastprivate;
631 }
632 
633 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
634     const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
635   if (!HaveInsertPoint())
636     return;
637   // Emit following code:
638   // if (<IsLastIterCond>) {
639   //   orig_var1 = private_orig_var1;
640   //   ...
641   //   orig_varn = private_orig_varn;
642   // }
643   llvm::BasicBlock *ThenBB = nullptr;
644   llvm::BasicBlock *DoneBB = nullptr;
645   if (IsLastIterCond) {
646     ThenBB = createBasicBlock(".omp.lastprivate.then");
647     DoneBB = createBasicBlock(".omp.lastprivate.done");
648     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
649     EmitBlock(ThenBB);
650   }
651   llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
652   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
653     auto IC = LoopDirective->counters().begin();
654     for (auto F : LoopDirective->finals()) {
655       auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl();
656       LoopCountersAndUpdates[D] = F;
657       ++IC;
658     }
659   }
660   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
661   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
662     auto IRef = C->varlist_begin();
663     auto ISrcRef = C->source_exprs().begin();
664     auto IDestRef = C->destination_exprs().begin();
665     for (auto *AssignOp : C->assignment_ops()) {
666       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
667       QualType Type = PrivateVD->getType();
668       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
669       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
670         // If lastprivate variable is a loop control variable for loop-based
671         // directive, update its value before copyin back to original
672         // variable.
673         if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
674           EmitIgnoredExpr(UpExpr);
675         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
676         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
677         // Get the address of the original variable.
678         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
679         // Get the address of the private variable.
680         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
681         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
682           PrivateAddr =
683               Address(Builder.CreateLoad(PrivateAddr),
684                       getNaturalTypeAlignment(RefTy->getPointeeType()));
685         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
686       }
687       ++IRef;
688       ++ISrcRef;
689       ++IDestRef;
690     }
691     if (auto *PostUpdate = C->getPostUpdateExpr())
692       EmitIgnoredExpr(PostUpdate);
693   }
694   if (IsLastIterCond)
695     EmitBlock(DoneBB, /*IsFinished=*/true);
696 }
697 
698 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
699                           LValue BaseLV, llvm::Value *Addr) {
700   Address Tmp = Address::invalid();
701   Address TopTmp = Address::invalid();
702   Address MostTopTmp = Address::invalid();
703   BaseTy = BaseTy.getNonReferenceType();
704   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
705          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
706     Tmp = CGF.CreateMemTemp(BaseTy);
707     if (TopTmp.isValid())
708       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
709     else
710       MostTopTmp = Tmp;
711     TopTmp = Tmp;
712     BaseTy = BaseTy->getPointeeType();
713   }
714   llvm::Type *Ty = BaseLV.getPointer()->getType();
715   if (Tmp.isValid())
716     Ty = Tmp.getElementType();
717   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
718   if (Tmp.isValid()) {
719     CGF.Builder.CreateStore(Addr, Tmp);
720     return MostTopTmp;
721   }
722   return Address(Addr, BaseLV.getAlignment());
723 }
724 
725 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
726                           LValue BaseLV) {
727   BaseTy = BaseTy.getNonReferenceType();
728   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
729          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
730     if (auto *PtrTy = BaseTy->getAs<PointerType>())
731       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
732     else {
733       BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
734                                              BaseTy->castAs<ReferenceType>());
735     }
736     BaseTy = BaseTy->getPointeeType();
737   }
738   return CGF.MakeAddrLValue(
739       Address(
740           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
741               BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
742           BaseLV.getAlignment()),
743       BaseLV.getType(), BaseLV.getAlignmentSource());
744 }
745 
746 void CodeGenFunction::EmitOMPReductionClauseInit(
747     const OMPExecutableDirective &D,
748     CodeGenFunction::OMPPrivateScope &PrivateScope) {
749   if (!HaveInsertPoint())
750     return;
751   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
752     auto ILHS = C->lhs_exprs().begin();
753     auto IRHS = C->rhs_exprs().begin();
754     auto IPriv = C->privates().begin();
755     for (auto IRef : C->varlists()) {
756       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
757       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
758       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
759       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
760         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
761         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
762           Base = TempOASE->getBase()->IgnoreParenImpCasts();
763         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
764           Base = TempASE->getBase()->IgnoreParenImpCasts();
765         auto *DE = cast<DeclRefExpr>(Base);
766         auto *OrigVD = cast<VarDecl>(DE->getDecl());
767         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
768         auto OASELValueUB =
769             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
770         auto OriginalBaseLValue = EmitLValue(DE);
771         LValue BaseLValue =
772             loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
773                         OriginalBaseLValue);
774         // Store the address of the original variable associated with the LHS
775         // implicit variable.
776         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
777           return OASELValueLB.getAddress();
778         });
779         // Emit reduction copy.
780         bool IsRegistered = PrivateScope.addPrivate(
781             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
782                      OASELValueUB, OriginalBaseLValue]() -> Address {
783               // Emit VarDecl with copy init for arrays.
784               // Get the address of the original variable captured in current
785               // captured region.
786               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
787                                                  OASELValueLB.getPointer());
788               Size = Builder.CreateNUWAdd(
789                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
790               CodeGenFunction::OpaqueValueMapping OpaqueMap(
791                   *this, cast<OpaqueValueExpr>(
792                              getContext()
793                                  .getAsVariableArrayType(PrivateVD->getType())
794                                  ->getSizeExpr()),
795                   RValue::get(Size));
796               EmitVariablyModifiedType(PrivateVD->getType());
797               auto Emission = EmitAutoVarAlloca(*PrivateVD);
798               auto Addr = Emission.getAllocatedAddress();
799               auto *Init = PrivateVD->getInit();
800               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
801               EmitAutoVarCleanups(Emission);
802               // Emit private VarDecl with reduction init.
803               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
804                                                    OASELValueLB.getPointer());
805               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
806               return castToBase(*this, OrigVD->getType(),
807                                 OASELValueLB.getType(), OriginalBaseLValue,
808                                 Ptr);
809             });
810         assert(IsRegistered && "private var already registered as private");
811         // Silence the warning about unused variable.
812         (void)IsRegistered;
813         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
814           return GetAddrOfLocalVar(PrivateVD);
815         });
816       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
817         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
818         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
819           Base = TempASE->getBase()->IgnoreParenImpCasts();
820         auto *DE = cast<DeclRefExpr>(Base);
821         auto *OrigVD = cast<VarDecl>(DE->getDecl());
822         auto ASELValue = EmitLValue(ASE);
823         auto OriginalBaseLValue = EmitLValue(DE);
824         LValue BaseLValue = loadToBegin(
825             *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
826         // Store the address of the original variable associated with the LHS
827         // implicit variable.
828         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
829           return ASELValue.getAddress();
830         });
831         // Emit reduction copy.
832         bool IsRegistered = PrivateScope.addPrivate(
833             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
834                      OriginalBaseLValue]() -> Address {
835               // Emit private VarDecl with reduction init.
836               EmitDecl(*PrivateVD);
837               auto Addr = GetAddrOfLocalVar(PrivateVD);
838               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
839                                                    ASELValue.getPointer());
840               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
841               return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
842                                 OriginalBaseLValue, Ptr);
843             });
844         assert(IsRegistered && "private var already registered as private");
845         // Silence the warning about unused variable.
846         (void)IsRegistered;
847         PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
848           return Builder.CreateElementBitCast(
849               GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
850               "rhs.begin");
851         });
852       } else {
853         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
854         QualType Type = PrivateVD->getType();
855         if (getContext().getAsArrayType(Type)) {
856           // Store the address of the original variable associated with the LHS
857           // implicit variable.
858           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
859                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
860                           IRef->getType(), VK_LValue, IRef->getExprLoc());
861           Address OriginalAddr = EmitLValue(&DRE).getAddress();
862           PrivateScope.addPrivate(LHSVD, [this, OriginalAddr,
863                                           LHSVD]() -> Address {
864             return Builder.CreateElementBitCast(
865                 OriginalAddr, ConvertTypeForMem(LHSVD->getType()),
866                 "lhs.begin");
867           });
868           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
869             if (Type->isVariablyModifiedType()) {
870               CodeGenFunction::OpaqueValueMapping OpaqueMap(
871                   *this, cast<OpaqueValueExpr>(
872                              getContext()
873                                  .getAsVariableArrayType(PrivateVD->getType())
874                                  ->getSizeExpr()),
875                   RValue::get(
876                       getTypeSize(OrigVD->getType().getNonReferenceType())));
877               EmitVariablyModifiedType(Type);
878             }
879             auto Emission = EmitAutoVarAlloca(*PrivateVD);
880             auto Addr = Emission.getAllocatedAddress();
881             auto *Init = PrivateVD->getInit();
882             EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
883             EmitAutoVarCleanups(Emission);
884             return Emission.getAllocatedAddress();
885           });
886           assert(IsRegistered && "private var already registered as private");
887           // Silence the warning about unused variable.
888           (void)IsRegistered;
889           PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
890             return Builder.CreateElementBitCast(
891                 GetAddrOfLocalVar(PrivateVD),
892                 ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
893           });
894         } else {
895           // Store the address of the original variable associated with the LHS
896           // implicit variable.
897           PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address {
898             DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
899                             CapturedStmtInfo->lookup(OrigVD) != nullptr,
900                             IRef->getType(), VK_LValue, IRef->getExprLoc());
901             return EmitLValue(&DRE).getAddress();
902           });
903           // Emit reduction copy.
904           bool IsRegistered =
905               PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address {
906                 // Emit private VarDecl with reduction init.
907                 EmitDecl(*PrivateVD);
908                 return GetAddrOfLocalVar(PrivateVD);
909               });
910           assert(IsRegistered && "private var already registered as private");
911           // Silence the warning about unused variable.
912           (void)IsRegistered;
913           PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
914             return GetAddrOfLocalVar(PrivateVD);
915           });
916         }
917       }
918       ++ILHS;
919       ++IRHS;
920       ++IPriv;
921     }
922   }
923 }
924 
925 void CodeGenFunction::EmitOMPReductionClauseFinal(
926     const OMPExecutableDirective &D) {
927   if (!HaveInsertPoint())
928     return;
929   llvm::SmallVector<const Expr *, 8> Privates;
930   llvm::SmallVector<const Expr *, 8> LHSExprs;
931   llvm::SmallVector<const Expr *, 8> RHSExprs;
932   llvm::SmallVector<const Expr *, 8> ReductionOps;
933   bool HasAtLeastOneReduction = false;
934   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
935     HasAtLeastOneReduction = true;
936     Privates.append(C->privates().begin(), C->privates().end());
937     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
938     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
939     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
940   }
941   if (HasAtLeastOneReduction) {
942     // Emit nowait reduction if nowait clause is present or directive is a
943     // parallel directive (it always has implicit barrier).
944     CGM.getOpenMPRuntime().emitReduction(
945         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
946         D.getSingleClause<OMPNowaitClause>() ||
947             isOpenMPParallelDirective(D.getDirectiveKind()) ||
948             D.getDirectiveKind() == OMPD_simd,
949         D.getDirectiveKind() == OMPD_simd);
950   }
951 }
952 
953 static void emitPostUpdateForReductionClause(
954     CodeGenFunction &CGF, const OMPExecutableDirective &D,
955     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
956   if (!CGF.HaveInsertPoint())
957     return;
958   llvm::BasicBlock *DoneBB = nullptr;
959   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
960     if (auto *PostUpdate = C->getPostUpdateExpr()) {
961       if (!DoneBB) {
962         if (auto *Cond = CondGen(CGF)) {
963           // If the first post-update expression is found, emit conditional
964           // block if it was requested.
965           auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
966           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
967           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
968           CGF.EmitBlock(ThenBB);
969         }
970       }
971       CGF.EmitIgnoredExpr(PostUpdate);
972     }
973   }
974   if (DoneBB)
975     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
976 }
977 
978 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
979                                            const OMPExecutableDirective &S,
980                                            OpenMPDirectiveKind InnermostKind,
981                                            const RegionCodeGenTy &CodeGen) {
982   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
983   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
984   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
985   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
986       emitParallelOrTeamsOutlinedFunction(S,
987           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
988   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
989     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
990     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
991                                          /*IgnoreResultAssign*/ true);
992     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
993         CGF, NumThreads, NumThreadsClause->getLocStart());
994   }
995   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
996     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
997     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
998         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
999   }
1000   const Expr *IfCond = nullptr;
1001   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1002     if (C->getNameModifier() == OMPD_unknown ||
1003         C->getNameModifier() == OMPD_parallel) {
1004       IfCond = C->getCondition();
1005       break;
1006     }
1007   }
1008   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1009                                               CapturedVars, IfCond);
1010 }
1011 
1012 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1013   OMPLexicalScope Scope(*this, S);
1014   // Emit parallel region as a standalone region.
1015   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1016     OMPPrivateScope PrivateScope(CGF);
1017     bool Copyins = CGF.EmitOMPCopyinClause(S);
1018     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1019     if (Copyins) {
1020       // Emit implicit barrier to synchronize threads and avoid data races on
1021       // propagation master's thread values of threadprivate variables to local
1022       // instances of that variables of all other implicit threads.
1023       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1024           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1025           /*ForceSimpleCall=*/true);
1026     }
1027     CGF.EmitOMPPrivateClause(S, PrivateScope);
1028     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1029     (void)PrivateScope.Privatize();
1030     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1031     CGF.EmitOMPReductionClauseFinal(S);
1032   };
1033   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
1034   emitPostUpdateForReductionClause(
1035       *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1036 }
1037 
1038 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1039                                       JumpDest LoopExit) {
1040   RunCleanupsScope BodyScope(*this);
1041   // Update counters values on current iteration.
1042   for (auto I : D.updates()) {
1043     EmitIgnoredExpr(I);
1044   }
1045   // Update the linear variables.
1046   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1047     for (auto U : C->updates()) {
1048       EmitIgnoredExpr(U);
1049     }
1050   }
1051 
1052   // On a continue in the body, jump to the end.
1053   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1054   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1055   // Emit loop body.
1056   EmitStmt(D.getBody());
1057   // The end (updates/cleanups).
1058   EmitBlock(Continue.getBlock());
1059   BreakContinueStack.pop_back();
1060 }
1061 
1062 void CodeGenFunction::EmitOMPInnerLoop(
1063     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1064     const Expr *IncExpr,
1065     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1066     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1067   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1068 
1069   // Start the loop with a block that tests the condition.
1070   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1071   EmitBlock(CondBlock);
1072   LoopStack.push(CondBlock);
1073 
1074   // If there are any cleanups between here and the loop-exit scope,
1075   // create a block to stage a loop exit along.
1076   auto ExitBlock = LoopExit.getBlock();
1077   if (RequiresCleanup)
1078     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1079 
1080   auto LoopBody = createBasicBlock("omp.inner.for.body");
1081 
1082   // Emit condition.
1083   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1084   if (ExitBlock != LoopExit.getBlock()) {
1085     EmitBlock(ExitBlock);
1086     EmitBranchThroughCleanup(LoopExit);
1087   }
1088 
1089   EmitBlock(LoopBody);
1090   incrementProfileCounter(&S);
1091 
1092   // Create a block for the increment.
1093   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1094   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1095 
1096   BodyGen(*this);
1097 
1098   // Emit "IV = IV + 1" and a back-edge to the condition block.
1099   EmitBlock(Continue.getBlock());
1100   EmitIgnoredExpr(IncExpr);
1101   PostIncGen(*this);
1102   BreakContinueStack.pop_back();
1103   EmitBranch(CondBlock);
1104   LoopStack.pop();
1105   // Emit the fall-through block.
1106   EmitBlock(LoopExit.getBlock());
1107 }
1108 
1109 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1110   if (!HaveInsertPoint())
1111     return;
1112   // Emit inits for the linear variables.
1113   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1114     for (auto Init : C->inits()) {
1115       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1116       if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1117         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1118         auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1119         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1120                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1121                         VD->getInit()->getType(), VK_LValue,
1122                         VD->getInit()->getExprLoc());
1123         EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1124                                                 VD->getType()),
1125                        /*capturedByInit=*/false);
1126         EmitAutoVarCleanups(Emission);
1127       } else
1128         EmitVarDecl(*VD);
1129     }
1130     // Emit the linear steps for the linear clauses.
1131     // If a step is not constant, it is pre-calculated before the loop.
1132     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1133       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1134         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1135         // Emit calculation of the linear step.
1136         EmitIgnoredExpr(CS);
1137       }
1138   }
1139 }
1140 
1141 static void emitLinearClauseFinal(
1142     CodeGenFunction &CGF, const OMPLoopDirective &D,
1143     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1144   if (!CGF.HaveInsertPoint())
1145     return;
1146   llvm::BasicBlock *DoneBB = nullptr;
1147   // Emit the final values of the linear variables.
1148   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1149     auto IC = C->varlist_begin();
1150     for (auto F : C->finals()) {
1151       if (!DoneBB) {
1152         if (auto *Cond = CondGen(CGF)) {
1153           // If the first post-update expression is found, emit conditional
1154           // block if it was requested.
1155           auto *ThenBB = CGF.createBasicBlock(".omp.linear.pu");
1156           DoneBB = CGF.createBasicBlock(".omp.linear.pu.done");
1157           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1158           CGF.EmitBlock(ThenBB);
1159         }
1160       }
1161       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1162       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1163                       CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
1164                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1165       Address OrigAddr = CGF.EmitLValue(&DRE).getAddress();
1166       CodeGenFunction::OMPPrivateScope VarScope(CGF);
1167       VarScope.addPrivate(OrigVD,
1168                           [OrigAddr]() -> Address { return OrigAddr; });
1169       (void)VarScope.Privatize();
1170       CGF.EmitIgnoredExpr(F);
1171       ++IC;
1172     }
1173     if (auto *PostUpdate = C->getPostUpdateExpr())
1174       CGF.EmitIgnoredExpr(PostUpdate);
1175   }
1176   if (DoneBB)
1177     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1178 }
1179 
1180 static void emitAlignedClause(CodeGenFunction &CGF,
1181                               const OMPExecutableDirective &D) {
1182   if (!CGF.HaveInsertPoint())
1183     return;
1184   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1185     unsigned ClauseAlignment = 0;
1186     if (auto AlignmentExpr = Clause->getAlignment()) {
1187       auto AlignmentCI =
1188           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1189       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1190     }
1191     for (auto E : Clause->varlists()) {
1192       unsigned Alignment = ClauseAlignment;
1193       if (Alignment == 0) {
1194         // OpenMP [2.8.1, Description]
1195         // If no optional parameter is specified, implementation-defined default
1196         // alignments for SIMD instructions on the target platforms are assumed.
1197         Alignment =
1198             CGF.getContext()
1199                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1200                     E->getType()->getPointeeType()))
1201                 .getQuantity();
1202       }
1203       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1204              "alignment is not power of 2");
1205       if (Alignment != 0) {
1206         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1207         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1208       }
1209     }
1210   }
1211 }
1212 
1213 static void emitPrivateLoopCounters(CodeGenFunction &CGF,
1214                                     CodeGenFunction::OMPPrivateScope &LoopScope,
1215                                     ArrayRef<Expr *> Counters,
1216                                     ArrayRef<Expr *> PrivateCounters) {
1217   if (!CGF.HaveInsertPoint())
1218     return;
1219   auto I = PrivateCounters.begin();
1220   for (auto *E : Counters) {
1221     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1222     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1223     Address Addr = Address::invalid();
1224     (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1225       // Emit var without initialization.
1226       auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD);
1227       CGF.EmitAutoVarCleanups(VarEmission);
1228       Addr = VarEmission.getAllocatedAddress();
1229       return Addr;
1230     });
1231     (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; });
1232     ++I;
1233   }
1234 }
1235 
1236 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1237                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1238                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1239   if (!CGF.HaveInsertPoint())
1240     return;
1241   {
1242     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1243     emitPrivateLoopCounters(CGF, PreCondScope, S.counters(),
1244                             S.private_counters());
1245     (void)PreCondScope.Privatize();
1246     // Get initial values of real counters.
1247     for (auto I : S.inits()) {
1248       CGF.EmitIgnoredExpr(I);
1249     }
1250   }
1251   // Check that loop is executed at least one time.
1252   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1253 }
1254 
1255 static void
1256 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
1257                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
1258   if (!CGF.HaveInsertPoint())
1259     return;
1260   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1261     auto CurPrivate = C->privates().begin();
1262     for (auto *E : C->varlists()) {
1263       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1264       auto *PrivateVD =
1265           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1266       bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1267         // Emit private VarDecl with copy init.
1268         CGF.EmitVarDecl(*PrivateVD);
1269         return CGF.GetAddrOfLocalVar(PrivateVD);
1270       });
1271       assert(IsRegistered && "linear var already registered as private");
1272       // Silence the warning about unused variable.
1273       (void)IsRegistered;
1274       ++CurPrivate;
1275     }
1276   }
1277 }
1278 
1279 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1280                                      const OMPExecutableDirective &D,
1281                                      bool IsMonotonic) {
1282   if (!CGF.HaveInsertPoint())
1283     return;
1284   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1285     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1286                                  /*ignoreResult=*/true);
1287     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1288     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1289     // In presence of finite 'safelen', it may be unsafe to mark all
1290     // the memory instructions parallel, because loop-carried
1291     // dependences of 'safelen' iterations are possible.
1292     if (!IsMonotonic)
1293       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1294   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1295     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1296                                  /*ignoreResult=*/true);
1297     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1298     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1299     // In presence of finite 'safelen', it may be unsafe to mark all
1300     // the memory instructions parallel, because loop-carried
1301     // dependences of 'safelen' iterations are possible.
1302     CGF.LoopStack.setParallel(false);
1303   }
1304 }
1305 
1306 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1307                                       bool IsMonotonic) {
1308   // Walk clauses and process safelen/lastprivate.
1309   LoopStack.setParallel(!IsMonotonic);
1310   LoopStack.setVectorizeEnable(true);
1311   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1312 }
1313 
1314 void CodeGenFunction::EmitOMPSimdFinal(
1315     const OMPLoopDirective &D,
1316     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1317   if (!HaveInsertPoint())
1318     return;
1319   llvm::BasicBlock *DoneBB = nullptr;
1320   auto IC = D.counters().begin();
1321   for (auto F : D.finals()) {
1322     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1323     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
1324       if (!DoneBB) {
1325         if (auto *Cond = CondGen(*this)) {
1326           // If the first post-update expression is found, emit conditional
1327           // block if it was requested.
1328           auto *ThenBB = createBasicBlock(".omp.final.then");
1329           DoneBB = createBasicBlock(".omp.final.done");
1330           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1331           EmitBlock(ThenBB);
1332         }
1333       }
1334       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1335                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1336                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1337       Address OrigAddr = EmitLValue(&DRE).getAddress();
1338       OMPPrivateScope VarScope(*this);
1339       VarScope.addPrivate(OrigVD,
1340                           [OrigAddr]() -> Address { return OrigAddr; });
1341       (void)VarScope.Privatize();
1342       EmitIgnoredExpr(F);
1343     }
1344     ++IC;
1345   }
1346   if (DoneBB)
1347     EmitBlock(DoneBB, /*IsFinished=*/true);
1348 }
1349 
1350 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1351   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1352     // if (PreCond) {
1353     //   for (IV in 0..LastIteration) BODY;
1354     //   <Final counter/linear vars updates>;
1355     // }
1356     //
1357 
1358     // Emit: if (PreCond) - begin.
1359     // If the condition constant folds and can be elided, avoid emitting the
1360     // whole loop.
1361     bool CondConstant;
1362     llvm::BasicBlock *ContBlock = nullptr;
1363     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1364       if (!CondConstant)
1365         return;
1366     } else {
1367       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1368       ContBlock = CGF.createBasicBlock("simd.if.end");
1369       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1370                   CGF.getProfileCount(&S));
1371       CGF.EmitBlock(ThenBlock);
1372       CGF.incrementProfileCounter(&S);
1373     }
1374 
1375     // Emit the loop iteration variable.
1376     const Expr *IVExpr = S.getIterationVariable();
1377     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1378     CGF.EmitVarDecl(*IVDecl);
1379     CGF.EmitIgnoredExpr(S.getInit());
1380 
1381     // Emit the iterations count variable.
1382     // If it is not a variable, Sema decided to calculate iterations count on
1383     // each iteration (e.g., it is foldable into a constant).
1384     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1385       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1386       // Emit calculation of the iterations count.
1387       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1388     }
1389 
1390     CGF.EmitOMPSimdInit(S);
1391 
1392     emitAlignedClause(CGF, S);
1393     CGF.EmitOMPLinearClauseInit(S);
1394     bool HasLastprivateClause;
1395     {
1396       OMPPrivateScope LoopScope(CGF);
1397       emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
1398                               S.private_counters());
1399       emitPrivateLinearVars(CGF, S, LoopScope);
1400       CGF.EmitOMPPrivateClause(S, LoopScope);
1401       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1402       HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1403       (void)LoopScope.Privatize();
1404       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1405                            S.getInc(),
1406                            [&S](CodeGenFunction &CGF) {
1407                              CGF.EmitOMPLoopBody(S, JumpDest());
1408                              CGF.EmitStopPoint(&S);
1409                            },
1410                            [](CodeGenFunction &) {});
1411       // Emit final copy of the lastprivate variables at the end of loops.
1412       if (HasLastprivateClause) {
1413         CGF.EmitOMPLastprivateClauseFinal(S);
1414       }
1415       CGF.EmitOMPReductionClauseFinal(S);
1416       emitPostUpdateForReductionClause(
1417           CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1418     }
1419     CGF.EmitOMPSimdFinal(
1420         S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1421     emitLinearClauseFinal(
1422         CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1423     // Emit: if (PreCond) - end.
1424     if (ContBlock) {
1425       CGF.EmitBranch(ContBlock);
1426       CGF.EmitBlock(ContBlock, true);
1427     }
1428   };
1429   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1430 }
1431 
1432 void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
1433     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1434     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1435   auto &RT = CGM.getOpenMPRuntime();
1436 
1437   const Expr *IVExpr = S.getIterationVariable();
1438   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1439   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1440 
1441   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1442 
1443   // Start the loop with a block that tests the condition.
1444   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1445   EmitBlock(CondBlock);
1446   LoopStack.push(CondBlock);
1447 
1448   llvm::Value *BoolCondVal = nullptr;
1449   if (!DynamicOrOrdered) {
1450     // UB = min(UB, GlobalUB)
1451     EmitIgnoredExpr(S.getEnsureUpperBound());
1452     // IV = LB
1453     EmitIgnoredExpr(S.getInit());
1454     // IV < UB
1455     BoolCondVal = EvaluateExprAsBool(S.getCond());
1456   } else {
1457     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
1458                                     IL, LB, UB, ST);
1459   }
1460 
1461   // If there are any cleanups between here and the loop-exit scope,
1462   // create a block to stage a loop exit along.
1463   auto ExitBlock = LoopExit.getBlock();
1464   if (LoopScope.requiresCleanups())
1465     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1466 
1467   auto LoopBody = createBasicBlock("omp.dispatch.body");
1468   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1469   if (ExitBlock != LoopExit.getBlock()) {
1470     EmitBlock(ExitBlock);
1471     EmitBranchThroughCleanup(LoopExit);
1472   }
1473   EmitBlock(LoopBody);
1474 
1475   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1476   // LB for loop condition and emitted it above).
1477   if (DynamicOrOrdered)
1478     EmitIgnoredExpr(S.getInit());
1479 
1480   // Create a block for the increment.
1481   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1482   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1483 
1484   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1485   // with dynamic/guided scheduling and without ordered clause.
1486   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1487     LoopStack.setParallel(!IsMonotonic);
1488   else
1489     EmitOMPSimdInit(S, IsMonotonic);
1490 
1491   SourceLocation Loc = S.getLocStart();
1492   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
1493                    [&S, LoopExit](CodeGenFunction &CGF) {
1494                      CGF.EmitOMPLoopBody(S, LoopExit);
1495                      CGF.EmitStopPoint(&S);
1496                    },
1497                    [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
1498                      if (Ordered) {
1499                        CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
1500                            CGF, Loc, IVSize, IVSigned);
1501                      }
1502                    });
1503 
1504   EmitBlock(Continue.getBlock());
1505   BreakContinueStack.pop_back();
1506   if (!DynamicOrOrdered) {
1507     // Emit "LB = LB + Stride", "UB = UB + Stride".
1508     EmitIgnoredExpr(S.getNextLowerBound());
1509     EmitIgnoredExpr(S.getNextUpperBound());
1510   }
1511 
1512   EmitBranch(CondBlock);
1513   LoopStack.pop();
1514   // Emit the fall-through block.
1515   EmitBlock(LoopExit.getBlock());
1516 
1517   // Tell the runtime we are done.
1518   if (!DynamicOrOrdered)
1519     RT.emitForStaticFinish(*this, S.getLocEnd());
1520 
1521 }
1522 
1523 void CodeGenFunction::EmitOMPForOuterLoop(
1524     OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic,
1525     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1526     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1527   auto &RT = CGM.getOpenMPRuntime();
1528 
1529   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1530   const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
1531 
1532   assert((Ordered ||
1533           !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
1534          "static non-chunked schedule does not need outer loop");
1535 
1536   // Emit outer loop.
1537   //
1538   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1539   // When schedule(dynamic,chunk_size) is specified, the iterations are
1540   // distributed to threads in the team in chunks as the threads request them.
1541   // Each thread executes a chunk of iterations, then requests another chunk,
1542   // until no chunks remain to be distributed. Each chunk contains chunk_size
1543   // iterations, except for the last chunk to be distributed, which may have
1544   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1545   //
1546   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1547   // to threads in the team in chunks as the executing threads request them.
1548   // Each thread executes a chunk of iterations, then requests another chunk,
1549   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1550   // each chunk is proportional to the number of unassigned iterations divided
1551   // by the number of threads in the team, decreasing to 1. For a chunk_size
1552   // with value k (greater than 1), the size of each chunk is determined in the
1553   // same way, with the restriction that the chunks do not contain fewer than k
1554   // iterations (except for the last chunk to be assigned, which may have fewer
1555   // than k iterations).
1556   //
1557   // When schedule(auto) is specified, the decision regarding scheduling is
1558   // delegated to the compiler and/or runtime system. The programmer gives the
1559   // implementation the freedom to choose any possible mapping of iterations to
1560   // threads in the team.
1561   //
1562   // When schedule(runtime) is specified, the decision regarding scheduling is
1563   // deferred until run time, and the schedule and chunk size are taken from the
1564   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1565   // implementation defined
1566   //
1567   // while(__kmpc_dispatch_next(&LB, &UB)) {
1568   //   idx = LB;
1569   //   while (idx <= UB) { BODY; ++idx;
1570   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1571   //   } // inner loop
1572   // }
1573   //
1574   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1575   // When schedule(static, chunk_size) is specified, iterations are divided into
1576   // chunks of size chunk_size, and the chunks are assigned to the threads in
1577   // the team in a round-robin fashion in the order of the thread number.
1578   //
1579   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1580   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1581   //   LB = LB + ST;
1582   //   UB = UB + ST;
1583   // }
1584   //
1585 
1586   const Expr *IVExpr = S.getIterationVariable();
1587   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1588   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1589 
1590   if (DynamicOrOrdered) {
1591     llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
1592     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind,
1593                            IVSize, IVSigned, Ordered, UBVal, Chunk);
1594   } else {
1595     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
1596                          Ordered, IL, LB, UB, ST, Chunk);
1597   }
1598 
1599   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
1600                    ST, IL, Chunk);
1601 }
1602 
1603 void CodeGenFunction::EmitOMPDistributeOuterLoop(
1604     OpenMPDistScheduleClauseKind ScheduleKind,
1605     const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
1606     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1607 
1608   auto &RT = CGM.getOpenMPRuntime();
1609 
1610   // Emit outer loop.
1611   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
1612   // dynamic
1613   //
1614 
1615   const Expr *IVExpr = S.getIterationVariable();
1616   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1617   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1618 
1619   RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
1620                               IVSize, IVSigned, /* Ordered = */ false,
1621                               IL, LB, UB, ST, Chunk);
1622 
1623   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
1624                    S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
1625 }
1626 
1627 /// \brief Emit a helper variable and return corresponding lvalue.
1628 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1629                                const DeclRefExpr *Helper) {
1630   auto VDecl = cast<VarDecl>(Helper->getDecl());
1631   CGF.EmitVarDecl(*VDecl);
1632   return CGF.EmitLValue(Helper);
1633 }
1634 
1635 namespace {
1636   struct ScheduleKindModifiersTy {
1637     OpenMPScheduleClauseKind Kind;
1638     OpenMPScheduleClauseModifier M1;
1639     OpenMPScheduleClauseModifier M2;
1640     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
1641                             OpenMPScheduleClauseModifier M1,
1642                             OpenMPScheduleClauseModifier M2)
1643         : Kind(Kind), M1(M1), M2(M2) {}
1644   };
1645 } // namespace
1646 
1647 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
1648   // Emit the loop iteration variable.
1649   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
1650   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
1651   EmitVarDecl(*IVDecl);
1652 
1653   // Emit the iterations count variable.
1654   // If it is not a variable, Sema decided to calculate iterations count on each
1655   // iteration (e.g., it is foldable into a constant).
1656   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1657     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1658     // Emit calculation of the iterations count.
1659     EmitIgnoredExpr(S.getCalcLastIteration());
1660   }
1661 
1662   auto &RT = CGM.getOpenMPRuntime();
1663 
1664   bool HasLastprivateClause;
1665   // Check pre-condition.
1666   {
1667     // Skip the entire loop if we don't meet the precondition.
1668     // If the condition constant folds and can be elided, avoid emitting the
1669     // whole loop.
1670     bool CondConstant;
1671     llvm::BasicBlock *ContBlock = nullptr;
1672     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1673       if (!CondConstant)
1674         return false;
1675     } else {
1676       auto *ThenBlock = createBasicBlock("omp.precond.then");
1677       ContBlock = createBasicBlock("omp.precond.end");
1678       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
1679                   getProfileCount(&S));
1680       EmitBlock(ThenBlock);
1681       incrementProfileCounter(&S);
1682     }
1683 
1684     emitAlignedClause(*this, S);
1685     EmitOMPLinearClauseInit(S);
1686     // Emit helper vars inits.
1687     LValue LB =
1688         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1689     LValue UB =
1690         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1691     LValue ST =
1692         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
1693     LValue IL =
1694         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
1695 
1696     // Emit 'then' code.
1697     {
1698       OMPPrivateScope LoopScope(*this);
1699       if (EmitOMPFirstprivateClause(S, LoopScope)) {
1700         // Emit implicit barrier to synchronize threads and avoid data races on
1701         // initialization of firstprivate variables and post-update of
1702         // lastprivate variables.
1703         CGM.getOpenMPRuntime().emitBarrierCall(
1704             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1705             /*ForceSimpleCall=*/true);
1706       }
1707       EmitOMPPrivateClause(S, LoopScope);
1708       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
1709       EmitOMPReductionClauseInit(S, LoopScope);
1710       emitPrivateLoopCounters(*this, LoopScope, S.counters(),
1711                               S.private_counters());
1712       emitPrivateLinearVars(*this, S, LoopScope);
1713       (void)LoopScope.Privatize();
1714 
1715       // Detect the loop schedule kind and chunk.
1716       llvm::Value *Chunk = nullptr;
1717       OpenMPScheduleClauseKind ScheduleKind = OMPC_SCHEDULE_unknown;
1718       OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown;
1719       OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown;
1720       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
1721         ScheduleKind = C->getScheduleKind();
1722         M1 = C->getFirstScheduleModifier();
1723         M2 = C->getSecondScheduleModifier();
1724         if (const auto *Ch = C->getChunkSize()) {
1725           Chunk = EmitScalarExpr(Ch);
1726           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
1727                                        S.getIterationVariable()->getType(),
1728                                        S.getLocStart());
1729         }
1730       }
1731       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1732       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1733       const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
1734       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
1735       // If the static schedule kind is specified or if the ordered clause is
1736       // specified, and if no monotonic modifier is specified, the effect will
1737       // be as if the monotonic modifier was specified.
1738       if (RT.isStaticNonchunked(ScheduleKind,
1739                                 /* Chunked */ Chunk != nullptr) &&
1740           !Ordered) {
1741         if (isOpenMPSimdDirective(S.getDirectiveKind()))
1742           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
1743         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1744         // When no chunk_size is specified, the iteration space is divided into
1745         // chunks that are approximately equal in size, and at most one chunk is
1746         // distributed to each thread. Note that the size of the chunks is
1747         // unspecified in this case.
1748         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
1749                              IVSize, IVSigned, Ordered,
1750                              IL.getAddress(), LB.getAddress(),
1751                              UB.getAddress(), ST.getAddress());
1752         auto LoopExit =
1753             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
1754         // UB = min(UB, GlobalUB);
1755         EmitIgnoredExpr(S.getEnsureUpperBound());
1756         // IV = LB;
1757         EmitIgnoredExpr(S.getInit());
1758         // while (idx <= UB) { BODY; ++idx; }
1759         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1760                          S.getInc(),
1761                          [&S, LoopExit](CodeGenFunction &CGF) {
1762                            CGF.EmitOMPLoopBody(S, LoopExit);
1763                            CGF.EmitStopPoint(&S);
1764                          },
1765                          [](CodeGenFunction &) {});
1766         EmitBlock(LoopExit.getBlock());
1767         // Tell the runtime we are done.
1768         RT.emitForStaticFinish(*this, S.getLocStart());
1769       } else {
1770         const bool IsMonotonic = Ordered ||
1771                                  ScheduleKind == OMPC_SCHEDULE_static ||
1772                                  ScheduleKind == OMPC_SCHEDULE_unknown ||
1773                                  M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
1774                                  M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
1775         // Emit the outer loop, which requests its work chunk [LB..UB] from
1776         // runtime and runs the inner loop to process it.
1777         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
1778                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
1779                             IL.getAddress(), Chunk);
1780       }
1781       EmitOMPReductionClauseFinal(S);
1782       // Emit post-update of the reduction variables if IsLastIter != 0.
1783       emitPostUpdateForReductionClause(
1784           *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
1785             return CGF.Builder.CreateIsNotNull(
1786                 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
1787           });
1788       // Emit final copy of the lastprivate variables if IsLastIter != 0.
1789       if (HasLastprivateClause)
1790         EmitOMPLastprivateClauseFinal(
1791             S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
1792     }
1793     if (isOpenMPSimdDirective(S.getDirectiveKind())) {
1794       EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
1795         return CGF.Builder.CreateIsNotNull(
1796             CGF.EmitLoadOfScalar(IL, S.getLocStart()));
1797       });
1798     }
1799     emitLinearClauseFinal(*this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
1800       return CGF.Builder.CreateIsNotNull(
1801           CGF.EmitLoadOfScalar(IL, S.getLocStart()));
1802     });
1803     // We're now done with the loop, so jump to the continuation block.
1804     if (ContBlock) {
1805       EmitBranch(ContBlock);
1806       EmitBlock(ContBlock, true);
1807     }
1808   }
1809   return HasLastprivateClause;
1810 }
1811 
1812 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
1813   bool HasLastprivates = false;
1814   {
1815     OMPLexicalScope Scope(*this, S);
1816     auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
1817       HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1818     };
1819     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
1820                                                 S.hasCancel());
1821   }
1822 
1823   // Emit an implicit barrier at the end.
1824   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
1825     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1826   }
1827 }
1828 
1829 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
1830   bool HasLastprivates = false;
1831   {
1832     OMPLexicalScope Scope(*this, S);
1833     auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
1834       HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1835     };
1836     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1837   }
1838 
1839   // Emit an implicit barrier at the end.
1840   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
1841     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1842   }
1843 }
1844 
1845 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
1846                                 const Twine &Name,
1847                                 llvm::Value *Init = nullptr) {
1848   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
1849   if (Init)
1850     CGF.EmitScalarInit(Init, LVal);
1851   return LVal;
1852 }
1853 
1854 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
1855   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
1856   auto *CS = dyn_cast<CompoundStmt>(Stmt);
1857   bool HasLastprivates = false;
1858   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) {
1859     auto &C = CGF.CGM.getContext();
1860     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1861     // Emit helper vars inits.
1862     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
1863                                   CGF.Builder.getInt32(0));
1864     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
1865                                       : CGF.Builder.getInt32(0);
1866     LValue UB =
1867         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
1868     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
1869                                   CGF.Builder.getInt32(1));
1870     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
1871                                   CGF.Builder.getInt32(0));
1872     // Loop counter.
1873     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
1874     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1875     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
1876     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1877     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
1878     // Generate condition for loop.
1879     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
1880                         OK_Ordinary, S.getLocStart(),
1881                         /*fpContractable=*/false);
1882     // Increment for loop counter.
1883     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
1884                       S.getLocStart());
1885     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
1886       // Iterate through all sections and emit a switch construct:
1887       // switch (IV) {
1888       //   case 0:
1889       //     <SectionStmt[0]>;
1890       //     break;
1891       // ...
1892       //   case <NumSection> - 1:
1893       //     <SectionStmt[<NumSection> - 1]>;
1894       //     break;
1895       // }
1896       // .omp.sections.exit:
1897       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
1898       auto *SwitchStmt = CGF.Builder.CreateSwitch(
1899           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
1900           CS == nullptr ? 1 : CS->size());
1901       if (CS) {
1902         unsigned CaseNumber = 0;
1903         for (auto *SubStmt : CS->children()) {
1904           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
1905           CGF.EmitBlock(CaseBB);
1906           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
1907           CGF.EmitStmt(SubStmt);
1908           CGF.EmitBranch(ExitBB);
1909           ++CaseNumber;
1910         }
1911       } else {
1912         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
1913         CGF.EmitBlock(CaseBB);
1914         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
1915         CGF.EmitStmt(Stmt);
1916         CGF.EmitBranch(ExitBB);
1917       }
1918       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1919     };
1920 
1921     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1922     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
1923       // Emit implicit barrier to synchronize threads and avoid data races on
1924       // initialization of firstprivate variables and post-update of lastprivate
1925       // variables.
1926       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1927           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1928           /*ForceSimpleCall=*/true);
1929     }
1930     CGF.EmitOMPPrivateClause(S, LoopScope);
1931     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1932     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1933     (void)LoopScope.Privatize();
1934 
1935     // Emit static non-chunked loop.
1936     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
1937         CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
1938         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
1939         UB.getAddress(), ST.getAddress());
1940     // UB = min(UB, GlobalUB);
1941     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
1942     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
1943         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
1944     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
1945     // IV = LB;
1946     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
1947     // while (idx <= UB) { BODY; ++idx; }
1948     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
1949                          [](CodeGenFunction &) {});
1950     // Tell the runtime we are done.
1951     CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
1952     CGF.EmitOMPReductionClauseFinal(S);
1953     // Emit post-update of the reduction variables if IsLastIter != 0.
1954     emitPostUpdateForReductionClause(
1955         CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
1956           return CGF.Builder.CreateIsNotNull(
1957               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
1958         });
1959 
1960     // Emit final copy of the lastprivate variables if IsLastIter != 0.
1961     if (HasLastprivates)
1962       CGF.EmitOMPLastprivateClauseFinal(
1963           S, CGF.Builder.CreateIsNotNull(
1964                  CGF.EmitLoadOfScalar(IL, S.getLocStart())));
1965   };
1966 
1967   bool HasCancel = false;
1968   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
1969     HasCancel = OSD->hasCancel();
1970   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
1971     HasCancel = OPSD->hasCancel();
1972   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
1973                                               HasCancel);
1974   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
1975   // clause. Otherwise the barrier will be generated by the codegen for the
1976   // directive.
1977   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
1978     // Emit implicit barrier to synchronize threads and avoid data races on
1979     // initialization of firstprivate variables.
1980     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
1981                                            OMPD_unknown);
1982   }
1983 }
1984 
1985 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
1986   {
1987     OMPLexicalScope Scope(*this, S);
1988     EmitSections(S);
1989   }
1990   // Emit an implicit barrier at the end.
1991   if (!S.getSingleClause<OMPNowaitClause>()) {
1992     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
1993                                            OMPD_sections);
1994   }
1995 }
1996 
1997 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
1998   OMPLexicalScope Scope(*this, S);
1999   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2000     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2001   };
2002   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2003                                               S.hasCancel());
2004 }
2005 
2006 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2007   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2008   llvm::SmallVector<const Expr *, 8> DestExprs;
2009   llvm::SmallVector<const Expr *, 8> SrcExprs;
2010   llvm::SmallVector<const Expr *, 8> AssignmentOps;
2011   // Check if there are any 'copyprivate' clauses associated with this
2012   // 'single' construct.
2013   // Build a list of copyprivate variables along with helper expressions
2014   // (<source>, <destination>, <destination>=<source> expressions)
2015   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2016     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2017     DestExprs.append(C->destination_exprs().begin(),
2018                      C->destination_exprs().end());
2019     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2020     AssignmentOps.append(C->assignment_ops().begin(),
2021                          C->assignment_ops().end());
2022   }
2023   {
2024     OMPLexicalScope Scope(*this, S);
2025     // Emit code for 'single' region along with 'copyprivate' clauses
2026     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2027       CodeGenFunction::OMPPrivateScope SingleScope(CGF);
2028       (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2029       CGF.EmitOMPPrivateClause(S, SingleScope);
2030       (void)SingleScope.Privatize();
2031       CGF.EmitStmt(
2032           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2033     };
2034     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2035                                             CopyprivateVars, DestExprs,
2036                                             SrcExprs, AssignmentOps);
2037   }
2038   // Emit an implicit barrier at the end (to avoid data race on firstprivate
2039   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2040   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2041     CGM.getOpenMPRuntime().emitBarrierCall(
2042         *this, S.getLocStart(),
2043         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2044   }
2045 }
2046 
2047 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2048   OMPLexicalScope Scope(*this, S);
2049   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2050     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2051   };
2052   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2053 }
2054 
2055 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2056   OMPLexicalScope Scope(*this, S);
2057   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2058     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2059   };
2060   Expr *Hint = nullptr;
2061   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2062     Hint = HintClause->getHint();
2063   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2064                                             S.getDirectiveName().getAsString(),
2065                                             CodeGen, S.getLocStart(), Hint);
2066 }
2067 
2068 void CodeGenFunction::EmitOMPParallelForDirective(
2069     const OMPParallelForDirective &S) {
2070   // Emit directive as a combined directive that consists of two implicit
2071   // directives: 'parallel' with 'for' directive.
2072   OMPLexicalScope Scope(*this, S);
2073   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2074     CGF.EmitOMPWorksharingLoop(S);
2075   };
2076   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
2077 }
2078 
2079 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2080     const OMPParallelForSimdDirective &S) {
2081   // Emit directive as a combined directive that consists of two implicit
2082   // directives: 'parallel' with 'for' directive.
2083   OMPLexicalScope Scope(*this, S);
2084   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2085     CGF.EmitOMPWorksharingLoop(S);
2086   };
2087   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
2088 }
2089 
2090 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2091     const OMPParallelSectionsDirective &S) {
2092   // Emit directive as a combined directive that consists of two implicit
2093   // directives: 'parallel' with 'sections' directive.
2094   OMPLexicalScope Scope(*this, S);
2095   auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitSections(S); };
2096   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
2097 }
2098 
2099 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
2100   // Emit outlined function for task construct.
2101   OMPLexicalScope Scope(*this, S);
2102   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2103   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
2104   auto *I = CS->getCapturedDecl()->param_begin();
2105   auto *PartId = std::next(I);
2106   // The first function argument for tasks is a thread id, the second one is a
2107   // part id (0 for tied tasks, >=0 for untied task).
2108   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2109   // Get list of private variables.
2110   llvm::SmallVector<const Expr *, 8> PrivateVars;
2111   llvm::SmallVector<const Expr *, 8> PrivateCopies;
2112   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2113     auto IRef = C->varlist_begin();
2114     for (auto *IInit : C->private_copies()) {
2115       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2116       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2117         PrivateVars.push_back(*IRef);
2118         PrivateCopies.push_back(IInit);
2119       }
2120       ++IRef;
2121     }
2122   }
2123   EmittedAsPrivate.clear();
2124   // Get list of firstprivate variables.
2125   llvm::SmallVector<const Expr *, 8> FirstprivateVars;
2126   llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
2127   llvm::SmallVector<const Expr *, 8> FirstprivateInits;
2128   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2129     auto IRef = C->varlist_begin();
2130     auto IElemInitRef = C->inits().begin();
2131     for (auto *IInit : C->private_copies()) {
2132       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2133       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2134         FirstprivateVars.push_back(*IRef);
2135         FirstprivateCopies.push_back(IInit);
2136         FirstprivateInits.push_back(*IElemInitRef);
2137       }
2138       ++IRef;
2139       ++IElemInitRef;
2140     }
2141   }
2142   // Build list of dependences.
2143   llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
2144       Dependences;
2145   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
2146     for (auto *IRef : C->varlists()) {
2147       Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2148     }
2149   }
2150   auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
2151       CodeGenFunction &CGF) {
2152     // Set proper addresses for generated private copies.
2153     auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
2154     OMPPrivateScope Scope(CGF);
2155     if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
2156       auto *CopyFn = CGF.Builder.CreateLoad(
2157           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2158       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2159           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2160       // Map privates.
2161       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16>
2162           PrivatePtrs;
2163       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2164       CallArgs.push_back(PrivatesPtr);
2165       for (auto *E : PrivateVars) {
2166         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2167         Address PrivatePtr =
2168             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
2169         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2170         CallArgs.push_back(PrivatePtr.getPointer());
2171       }
2172       for (auto *E : FirstprivateVars) {
2173         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2174         Address PrivatePtr =
2175             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
2176         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2177         CallArgs.push_back(PrivatePtr.getPointer());
2178       }
2179       CGF.EmitRuntimeCall(CopyFn, CallArgs);
2180       for (auto &&Pair : PrivatePtrs) {
2181         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2182                             CGF.getContext().getDeclAlign(Pair.first));
2183         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2184       }
2185     }
2186     (void)Scope.Privatize();
2187     if (*PartId) {
2188       // TODO: emit code for untied tasks.
2189     }
2190     CGF.EmitStmt(CS->getCapturedStmt());
2191   };
2192   auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2193       S, *I, OMPD_task, CodeGen);
2194   // Check if we should emit tied or untied task.
2195   bool Tied = !S.getSingleClause<OMPUntiedClause>();
2196   // Check if the task is final
2197   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
2198   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2199     // If the condition constant folds and can be elided, try to avoid emitting
2200     // the condition and the dead arm of the if/else.
2201     auto *Cond = Clause->getCondition();
2202     bool CondConstant;
2203     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2204       Final.setInt(CondConstant);
2205     else
2206       Final.setPointer(EvaluateExprAsBool(Cond));
2207   } else {
2208     // By default the task is not final.
2209     Final.setInt(/*IntVal=*/false);
2210   }
2211   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2212   const Expr *IfCond = nullptr;
2213   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2214     if (C->getNameModifier() == OMPD_unknown ||
2215         C->getNameModifier() == OMPD_task) {
2216       IfCond = C->getCondition();
2217       break;
2218     }
2219   }
2220   CGM.getOpenMPRuntime().emitTaskCall(
2221       *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
2222       CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
2223       FirstprivateCopies, FirstprivateInits, Dependences);
2224 }
2225 
2226 void CodeGenFunction::EmitOMPTaskyieldDirective(
2227     const OMPTaskyieldDirective &S) {
2228   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2229 }
2230 
2231 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2232   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2233 }
2234 
2235 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2236   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2237 }
2238 
2239 void CodeGenFunction::EmitOMPTaskgroupDirective(
2240     const OMPTaskgroupDirective &S) {
2241   OMPLexicalScope Scope(*this, S);
2242   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2243     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2244   };
2245   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2246 }
2247 
2248 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2249   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2250     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2251       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2252                                 FlushClause->varlist_end());
2253     }
2254     return llvm::None;
2255   }(), S.getLocStart());
2256 }
2257 
2258 void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
2259   // Emit the loop iteration variable.
2260   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2261   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2262   EmitVarDecl(*IVDecl);
2263 
2264   // Emit the iterations count variable.
2265   // If it is not a variable, Sema decided to calculate iterations count on each
2266   // iteration (e.g., it is foldable into a constant).
2267   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2268     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2269     // Emit calculation of the iterations count.
2270     EmitIgnoredExpr(S.getCalcLastIteration());
2271   }
2272 
2273   auto &RT = CGM.getOpenMPRuntime();
2274 
2275   // Check pre-condition.
2276   {
2277     // Skip the entire loop if we don't meet the precondition.
2278     // If the condition constant folds and can be elided, avoid emitting the
2279     // whole loop.
2280     bool CondConstant;
2281     llvm::BasicBlock *ContBlock = nullptr;
2282     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2283       if (!CondConstant)
2284         return;
2285     } else {
2286       auto *ThenBlock = createBasicBlock("omp.precond.then");
2287       ContBlock = createBasicBlock("omp.precond.end");
2288       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2289                   getProfileCount(&S));
2290       EmitBlock(ThenBlock);
2291       incrementProfileCounter(&S);
2292     }
2293 
2294     // Emit 'then' code.
2295     {
2296       // Emit helper vars inits.
2297       LValue LB =
2298           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2299       LValue UB =
2300           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2301       LValue ST =
2302           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2303       LValue IL =
2304           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2305 
2306       OMPPrivateScope LoopScope(*this);
2307       emitPrivateLoopCounters(*this, LoopScope, S.counters(),
2308                               S.private_counters());
2309       (void)LoopScope.Privatize();
2310 
2311       // Detect the distribute schedule kind and chunk.
2312       llvm::Value *Chunk = nullptr;
2313       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
2314       if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
2315         ScheduleKind = C->getDistScheduleKind();
2316         if (const auto *Ch = C->getChunkSize()) {
2317           Chunk = EmitScalarExpr(Ch);
2318           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2319           S.getIterationVariable()->getType(),
2320           S.getLocStart());
2321         }
2322       }
2323       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2324       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2325 
2326       // OpenMP [2.10.8, distribute Construct, Description]
2327       // If dist_schedule is specified, kind must be static. If specified,
2328       // iterations are divided into chunks of size chunk_size, chunks are
2329       // assigned to the teams of the league in a round-robin fashion in the
2330       // order of the team number. When no chunk_size is specified, the
2331       // iteration space is divided into chunks that are approximately equal
2332       // in size, and at most one chunk is distributed to each team of the
2333       // league. The size of the chunks is unspecified in this case.
2334       if (RT.isStaticNonchunked(ScheduleKind,
2335                                 /* Chunked */ Chunk != nullptr)) {
2336         RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
2337                              IVSize, IVSigned, /* Ordered = */ false,
2338                              IL.getAddress(), LB.getAddress(),
2339                              UB.getAddress(), ST.getAddress());
2340         auto LoopExit =
2341             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2342         // UB = min(UB, GlobalUB);
2343         EmitIgnoredExpr(S.getEnsureUpperBound());
2344         // IV = LB;
2345         EmitIgnoredExpr(S.getInit());
2346         // while (idx <= UB) { BODY; ++idx; }
2347         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2348                          S.getInc(),
2349                          [&S, LoopExit](CodeGenFunction &CGF) {
2350                            CGF.EmitOMPLoopBody(S, LoopExit);
2351                            CGF.EmitStopPoint(&S);
2352                          },
2353                          [](CodeGenFunction &) {});
2354         EmitBlock(LoopExit.getBlock());
2355         // Tell the runtime we are done.
2356         RT.emitForStaticFinish(*this, S.getLocStart());
2357       } else {
2358         // Emit the outer loop, which requests its work chunk [LB..UB] from
2359         // runtime and runs the inner loop to process it.
2360         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
2361                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
2362                             IL.getAddress(), Chunk);
2363       }
2364     }
2365 
2366     // We're now done with the loop, so jump to the continuation block.
2367     if (ContBlock) {
2368       EmitBranch(ContBlock);
2369       EmitBlock(ContBlock, true);
2370     }
2371   }
2372 }
2373 
2374 void CodeGenFunction::EmitOMPDistributeDirective(
2375     const OMPDistributeDirective &S) {
2376   LexicalScope Scope(*this, S.getSourceRange());
2377   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2378     CGF.EmitOMPDistributeLoop(S);
2379   };
2380   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2381                                               false);
2382 }
2383 
2384 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
2385                                                    const CapturedStmt *S) {
2386   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
2387   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
2388   CGF.CapturedStmtInfo = &CapStmtInfo;
2389   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
2390   Fn->addFnAttr(llvm::Attribute::NoInline);
2391   return Fn;
2392 }
2393 
2394 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
2395   if (!S.getAssociatedStmt())
2396     return;
2397   OMPLexicalScope Scope(*this, S);
2398   auto *C = S.getSingleClause<OMPSIMDClause>();
2399   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
2400     if (C) {
2401       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2402       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2403       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
2404       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
2405       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
2406     } else {
2407       CGF.EmitStmt(
2408           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2409     }
2410   };
2411   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
2412 }
2413 
2414 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
2415                                          QualType SrcType, QualType DestType,
2416                                          SourceLocation Loc) {
2417   assert(CGF.hasScalarEvaluationKind(DestType) &&
2418          "DestType must have scalar evaluation kind.");
2419   assert(!Val.isAggregate() && "Must be a scalar or complex.");
2420   return Val.isScalar()
2421              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
2422                                         Loc)
2423              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
2424                                                  DestType, Loc);
2425 }
2426 
2427 static CodeGenFunction::ComplexPairTy
2428 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
2429                       QualType DestType, SourceLocation Loc) {
2430   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
2431          "DestType must have complex evaluation kind.");
2432   CodeGenFunction::ComplexPairTy ComplexVal;
2433   if (Val.isScalar()) {
2434     // Convert the input element to the element type of the complex.
2435     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2436     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
2437                                               DestElementType, Loc);
2438     ComplexVal = CodeGenFunction::ComplexPairTy(
2439         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
2440   } else {
2441     assert(Val.isComplex() && "Must be a scalar or complex.");
2442     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
2443     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2444     ComplexVal.first = CGF.EmitScalarConversion(
2445         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
2446     ComplexVal.second = CGF.EmitScalarConversion(
2447         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
2448   }
2449   return ComplexVal;
2450 }
2451 
2452 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
2453                                   LValue LVal, RValue RVal) {
2454   if (LVal.isGlobalReg()) {
2455     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
2456   } else {
2457     CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent
2458                                              : llvm::Monotonic,
2459                         LVal.isVolatile(), /*IsInit=*/false);
2460   }
2461 }
2462 
2463 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
2464                                          QualType RValTy, SourceLocation Loc) {
2465   switch (getEvaluationKind(LVal.getType())) {
2466   case TEK_Scalar:
2467     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
2468                                *this, RVal, RValTy, LVal.getType(), Loc)),
2469                            LVal);
2470     break;
2471   case TEK_Complex:
2472     EmitStoreOfComplex(
2473         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
2474         /*isInit=*/false);
2475     break;
2476   case TEK_Aggregate:
2477     llvm_unreachable("Must be a scalar or complex.");
2478   }
2479 }
2480 
2481 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
2482                                   const Expr *X, const Expr *V,
2483                                   SourceLocation Loc) {
2484   // v = x;
2485   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
2486   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
2487   LValue XLValue = CGF.EmitLValue(X);
2488   LValue VLValue = CGF.EmitLValue(V);
2489   RValue Res = XLValue.isGlobalReg()
2490                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
2491                    : CGF.EmitAtomicLoad(XLValue, Loc,
2492                                         IsSeqCst ? llvm::SequentiallyConsistent
2493                                                  : llvm::Monotonic,
2494                                         XLValue.isVolatile());
2495   // OpenMP, 2.12.6, atomic Construct
2496   // Any atomic construct with a seq_cst clause forces the atomically
2497   // performed operation to include an implicit flush operation without a
2498   // list.
2499   if (IsSeqCst)
2500     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2501   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
2502 }
2503 
2504 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
2505                                    const Expr *X, const Expr *E,
2506                                    SourceLocation Loc) {
2507   // x = expr;
2508   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
2509   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
2510   // OpenMP, 2.12.6, atomic Construct
2511   // Any atomic construct with a seq_cst clause forces the atomically
2512   // performed operation to include an implicit flush operation without a
2513   // list.
2514   if (IsSeqCst)
2515     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2516 }
2517 
2518 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
2519                                                 RValue Update,
2520                                                 BinaryOperatorKind BO,
2521                                                 llvm::AtomicOrdering AO,
2522                                                 bool IsXLHSInRHSPart) {
2523   auto &Context = CGF.CGM.getContext();
2524   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
2525   // expression is simple and atomic is allowed for the given type for the
2526   // target platform.
2527   if (BO == BO_Comma || !Update.isScalar() ||
2528       !Update.getScalarVal()->getType()->isIntegerTy() ||
2529       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
2530                         (Update.getScalarVal()->getType() !=
2531                          X.getAddress().getElementType())) ||
2532       !X.getAddress().getElementType()->isIntegerTy() ||
2533       !Context.getTargetInfo().hasBuiltinAtomic(
2534           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
2535     return std::make_pair(false, RValue::get(nullptr));
2536 
2537   llvm::AtomicRMWInst::BinOp RMWOp;
2538   switch (BO) {
2539   case BO_Add:
2540     RMWOp = llvm::AtomicRMWInst::Add;
2541     break;
2542   case BO_Sub:
2543     if (!IsXLHSInRHSPart)
2544       return std::make_pair(false, RValue::get(nullptr));
2545     RMWOp = llvm::AtomicRMWInst::Sub;
2546     break;
2547   case BO_And:
2548     RMWOp = llvm::AtomicRMWInst::And;
2549     break;
2550   case BO_Or:
2551     RMWOp = llvm::AtomicRMWInst::Or;
2552     break;
2553   case BO_Xor:
2554     RMWOp = llvm::AtomicRMWInst::Xor;
2555     break;
2556   case BO_LT:
2557     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2558                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
2559                                    : llvm::AtomicRMWInst::Max)
2560                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
2561                                    : llvm::AtomicRMWInst::UMax);
2562     break;
2563   case BO_GT:
2564     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2565                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
2566                                    : llvm::AtomicRMWInst::Min)
2567                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
2568                                    : llvm::AtomicRMWInst::UMin);
2569     break;
2570   case BO_Assign:
2571     RMWOp = llvm::AtomicRMWInst::Xchg;
2572     break;
2573   case BO_Mul:
2574   case BO_Div:
2575   case BO_Rem:
2576   case BO_Shl:
2577   case BO_Shr:
2578   case BO_LAnd:
2579   case BO_LOr:
2580     return std::make_pair(false, RValue::get(nullptr));
2581   case BO_PtrMemD:
2582   case BO_PtrMemI:
2583   case BO_LE:
2584   case BO_GE:
2585   case BO_EQ:
2586   case BO_NE:
2587   case BO_AddAssign:
2588   case BO_SubAssign:
2589   case BO_AndAssign:
2590   case BO_OrAssign:
2591   case BO_XorAssign:
2592   case BO_MulAssign:
2593   case BO_DivAssign:
2594   case BO_RemAssign:
2595   case BO_ShlAssign:
2596   case BO_ShrAssign:
2597   case BO_Comma:
2598     llvm_unreachable("Unsupported atomic update operation");
2599   }
2600   auto *UpdateVal = Update.getScalarVal();
2601   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
2602     UpdateVal = CGF.Builder.CreateIntCast(
2603         IC, X.getAddress().getElementType(),
2604         X.getType()->hasSignedIntegerRepresentation());
2605   }
2606   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
2607   return std::make_pair(true, RValue::get(Res));
2608 }
2609 
2610 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
2611     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
2612     llvm::AtomicOrdering AO, SourceLocation Loc,
2613     const llvm::function_ref<RValue(RValue)> &CommonGen) {
2614   // Update expressions are allowed to have the following forms:
2615   // x binop= expr; -> xrval + expr;
2616   // x++, ++x -> xrval + 1;
2617   // x--, --x -> xrval - 1;
2618   // x = x binop expr; -> xrval binop expr
2619   // x = expr Op x; - > expr binop xrval;
2620   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
2621   if (!Res.first) {
2622     if (X.isGlobalReg()) {
2623       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
2624       // 'xrval'.
2625       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
2626     } else {
2627       // Perform compare-and-swap procedure.
2628       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
2629     }
2630   }
2631   return Res;
2632 }
2633 
2634 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
2635                                     const Expr *X, const Expr *E,
2636                                     const Expr *UE, bool IsXLHSInRHSPart,
2637                                     SourceLocation Loc) {
2638   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2639          "Update expr in 'atomic update' must be a binary operator.");
2640   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2641   // Update expressions are allowed to have the following forms:
2642   // x binop= expr; -> xrval + expr;
2643   // x++, ++x -> xrval + 1;
2644   // x--, --x -> xrval - 1;
2645   // x = x binop expr; -> xrval binop expr
2646   // x = expr Op x; - > expr binop xrval;
2647   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
2648   LValue XLValue = CGF.EmitLValue(X);
2649   RValue ExprRValue = CGF.EmitAnyExpr(E);
2650   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
2651   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2652   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2653   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2654   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2655   auto Gen =
2656       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
2657         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2658         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2659         return CGF.EmitAnyExpr(UE);
2660       };
2661   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
2662       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2663   // OpenMP, 2.12.6, atomic Construct
2664   // Any atomic construct with a seq_cst clause forces the atomically
2665   // performed operation to include an implicit flush operation without a
2666   // list.
2667   if (IsSeqCst)
2668     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2669 }
2670 
2671 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
2672                             QualType SourceType, QualType ResType,
2673                             SourceLocation Loc) {
2674   switch (CGF.getEvaluationKind(ResType)) {
2675   case TEK_Scalar:
2676     return RValue::get(
2677         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
2678   case TEK_Complex: {
2679     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
2680     return RValue::getComplex(Res.first, Res.second);
2681   }
2682   case TEK_Aggregate:
2683     break;
2684   }
2685   llvm_unreachable("Must be a scalar or complex.");
2686 }
2687 
2688 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
2689                                      bool IsPostfixUpdate, const Expr *V,
2690                                      const Expr *X, const Expr *E,
2691                                      const Expr *UE, bool IsXLHSInRHSPart,
2692                                      SourceLocation Loc) {
2693   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
2694   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
2695   RValue NewVVal;
2696   LValue VLValue = CGF.EmitLValue(V);
2697   LValue XLValue = CGF.EmitLValue(X);
2698   RValue ExprRValue = CGF.EmitAnyExpr(E);
2699   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
2700   QualType NewVValType;
2701   if (UE) {
2702     // 'x' is updated with some additional value.
2703     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2704            "Update expr in 'atomic capture' must be a binary operator.");
2705     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2706     // Update expressions are allowed to have the following forms:
2707     // x binop= expr; -> xrval + expr;
2708     // x++, ++x -> xrval + 1;
2709     // x--, --x -> xrval - 1;
2710     // x = x binop expr; -> xrval binop expr
2711     // x = expr Op x; - > expr binop xrval;
2712     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2713     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2714     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2715     NewVValType = XRValExpr->getType();
2716     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2717     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
2718                   IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
2719       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2720       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2721       RValue Res = CGF.EmitAnyExpr(UE);
2722       NewVVal = IsPostfixUpdate ? XRValue : Res;
2723       return Res;
2724     };
2725     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
2726         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2727     if (Res.first) {
2728       // 'atomicrmw' instruction was generated.
2729       if (IsPostfixUpdate) {
2730         // Use old value from 'atomicrmw'.
2731         NewVVal = Res.second;
2732       } else {
2733         // 'atomicrmw' does not provide new value, so evaluate it using old
2734         // value of 'x'.
2735         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2736         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
2737         NewVVal = CGF.EmitAnyExpr(UE);
2738       }
2739     }
2740   } else {
2741     // 'x' is simply rewritten with some 'expr'.
2742     NewVValType = X->getType().getNonReferenceType();
2743     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
2744                                X->getType().getNonReferenceType(), Loc);
2745     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
2746       NewVVal = XRValue;
2747       return ExprRValue;
2748     };
2749     // Try to perform atomicrmw xchg, otherwise simple exchange.
2750     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
2751         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
2752         Loc, Gen);
2753     if (Res.first) {
2754       // 'atomicrmw' instruction was generated.
2755       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
2756     }
2757   }
2758   // Emit post-update store to 'v' of old/new 'x' value.
2759   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
2760   // OpenMP, 2.12.6, atomic Construct
2761   // Any atomic construct with a seq_cst clause forces the atomically
2762   // performed operation to include an implicit flush operation without a
2763   // list.
2764   if (IsSeqCst)
2765     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2766 }
2767 
2768 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
2769                               bool IsSeqCst, bool IsPostfixUpdate,
2770                               const Expr *X, const Expr *V, const Expr *E,
2771                               const Expr *UE, bool IsXLHSInRHSPart,
2772                               SourceLocation Loc) {
2773   switch (Kind) {
2774   case OMPC_read:
2775     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
2776     break;
2777   case OMPC_write:
2778     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
2779     break;
2780   case OMPC_unknown:
2781   case OMPC_update:
2782     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
2783     break;
2784   case OMPC_capture:
2785     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
2786                              IsXLHSInRHSPart, Loc);
2787     break;
2788   case OMPC_if:
2789   case OMPC_final:
2790   case OMPC_num_threads:
2791   case OMPC_private:
2792   case OMPC_firstprivate:
2793   case OMPC_lastprivate:
2794   case OMPC_reduction:
2795   case OMPC_safelen:
2796   case OMPC_simdlen:
2797   case OMPC_collapse:
2798   case OMPC_default:
2799   case OMPC_seq_cst:
2800   case OMPC_shared:
2801   case OMPC_linear:
2802   case OMPC_aligned:
2803   case OMPC_copyin:
2804   case OMPC_copyprivate:
2805   case OMPC_flush:
2806   case OMPC_proc_bind:
2807   case OMPC_schedule:
2808   case OMPC_ordered:
2809   case OMPC_nowait:
2810   case OMPC_untied:
2811   case OMPC_threadprivate:
2812   case OMPC_depend:
2813   case OMPC_mergeable:
2814   case OMPC_device:
2815   case OMPC_threads:
2816   case OMPC_simd:
2817   case OMPC_map:
2818   case OMPC_num_teams:
2819   case OMPC_thread_limit:
2820   case OMPC_priority:
2821   case OMPC_grainsize:
2822   case OMPC_nogroup:
2823   case OMPC_num_tasks:
2824   case OMPC_hint:
2825   case OMPC_dist_schedule:
2826   case OMPC_defaultmap:
2827     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
2828   }
2829 }
2830 
2831 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
2832   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
2833   OpenMPClauseKind Kind = OMPC_unknown;
2834   for (auto *C : S.clauses()) {
2835     // Find first clause (skip seq_cst clause, if it is first).
2836     if (C->getClauseKind() != OMPC_seq_cst) {
2837       Kind = C->getClauseKind();
2838       break;
2839     }
2840   }
2841 
2842   const auto *CS =
2843       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
2844   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
2845     enterFullExpression(EWC);
2846   }
2847   // Processing for statements under 'atomic capture'.
2848   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
2849     for (const auto *C : Compound->body()) {
2850       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
2851         enterFullExpression(EWC);
2852       }
2853     }
2854   }
2855 
2856   OMPLexicalScope Scope(*this, S);
2857   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) {
2858     CGF.EmitStopPoint(CS);
2859     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
2860                       S.getV(), S.getExpr(), S.getUpdateExpr(),
2861                       S.isXLHSInRHSPart(), S.getLocStart());
2862   };
2863   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
2864 }
2865 
2866 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
2867   OMPLexicalScope Scope(*this, S);
2868   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
2869 
2870   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2871   GenerateOpenMPCapturedVars(CS, CapturedVars);
2872 
2873   llvm::Function *Fn = nullptr;
2874   llvm::Constant *FnID = nullptr;
2875 
2876   // Check if we have any if clause associated with the directive.
2877   const Expr *IfCond = nullptr;
2878 
2879   if (auto *C = S.getSingleClause<OMPIfClause>()) {
2880     IfCond = C->getCondition();
2881   }
2882 
2883   // Check if we have any device clause associated with the directive.
2884   const Expr *Device = nullptr;
2885   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
2886     Device = C->getDevice();
2887   }
2888 
2889   // Check if we have an if clause whose conditional always evaluates to false
2890   // or if we do not have any targets specified. If so the target region is not
2891   // an offload entry point.
2892   bool IsOffloadEntry = true;
2893   if (IfCond) {
2894     bool Val;
2895     if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
2896       IsOffloadEntry = false;
2897   }
2898   if (CGM.getLangOpts().OMPTargetTriples.empty())
2899     IsOffloadEntry = false;
2900 
2901   assert(CurFuncDecl && "No parent declaration for target region!");
2902   StringRef ParentName;
2903   // In case we have Ctors/Dtors we use the complete type variant to produce
2904   // the mangling of the device outlined kernel.
2905   if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
2906     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
2907   else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
2908     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
2909   else
2910     ParentName =
2911         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
2912 
2913   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
2914                                                     IsOffloadEntry);
2915 
2916   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
2917                                         CapturedVars);
2918 }
2919 
2920 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
2921                                         const OMPExecutableDirective &S,
2922                                         OpenMPDirectiveKind InnermostKind,
2923                                         const RegionCodeGenTy &CodeGen) {
2924   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2925   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2926   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
2927   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
2928       emitParallelOrTeamsOutlinedFunction(S,
2929           *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
2930 
2931   const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
2932   const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
2933   const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
2934   if (NT || TL) {
2935     llvm::Value *NumTeamsVal = (NT) ? CGF.Builder.CreateIntCast(
2936         CGF.EmitScalarExpr(NT->getNumTeams()), CGF.CGM.Int32Ty,
2937         /* isSigned = */ true) :
2938         CGF.Builder.getInt32(0);
2939 
2940     llvm::Value *ThreadLimitVal = (TL) ? CGF.Builder.CreateIntCast(
2941         CGF.EmitScalarExpr(TL->getThreadLimit()), CGF.CGM.Int32Ty,
2942         /* isSigned = */ true) :
2943         CGF.Builder.getInt32(0);
2944 
2945     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeamsVal,
2946         ThreadLimitVal, S.getLocStart());
2947   }
2948 
2949   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
2950                                            CapturedVars);
2951 }
2952 
2953 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
2954   LexicalScope Scope(*this, S.getSourceRange());
2955   // Emit parallel region as a standalone region.
2956   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2957     OMPPrivateScope PrivateScope(CGF);
2958     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
2959     CGF.EmitOMPPrivateClause(S, PrivateScope);
2960     (void)PrivateScope.Privatize();
2961     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2962   };
2963   emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
2964 }
2965 
2966 void CodeGenFunction::EmitOMPCancellationPointDirective(
2967     const OMPCancellationPointDirective &S) {
2968   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
2969                                                    S.getCancelRegion());
2970 }
2971 
2972 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
2973   const Expr *IfCond = nullptr;
2974   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2975     if (C->getNameModifier() == OMPD_unknown ||
2976         C->getNameModifier() == OMPD_cancel) {
2977       IfCond = C->getCondition();
2978       break;
2979     }
2980   }
2981   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
2982                                         S.getCancelRegion());
2983 }
2984 
2985 CodeGenFunction::JumpDest
2986 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
2987   if (Kind == OMPD_parallel || Kind == OMPD_task)
2988     return ReturnBlock;
2989   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
2990          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
2991   return BreakContinueStack.back().BreakBlock;
2992 }
2993 
2994 // Generate the instructions for '#pragma omp target data' directive.
2995 void CodeGenFunction::EmitOMPTargetDataDirective(
2996     const OMPTargetDataDirective &S) {
2997   // emit the code inside the construct for now
2998   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2999   CGM.getOpenMPRuntime().emitInlinedDirective(
3000       *this, OMPD_target_data,
3001       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
3002 }
3003 
3004 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
3005     const OMPTargetEnterDataDirective &S) {
3006   // TODO: codegen for target enter data.
3007 }
3008 
3009 void CodeGenFunction::EmitOMPTargetExitDataDirective(
3010     const OMPTargetExitDataDirective &S) {
3011   // TODO: codegen for target exit data.
3012 }
3013 
3014 void CodeGenFunction::EmitOMPTargetParallelDirective(
3015     const OMPTargetParallelDirective &S) {
3016   // TODO: codegen for target parallel.
3017 }
3018 
3019 void CodeGenFunction::EmitOMPTargetParallelForDirective(
3020     const OMPTargetParallelForDirective &S) {
3021   // TODO: codegen for target parallel for.
3022 }
3023 
3024 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
3025   // emit the code inside the construct for now
3026   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3027   CGM.getOpenMPRuntime().emitInlinedDirective(
3028       *this, OMPD_taskloop,
3029       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
3030 }
3031 
3032 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
3033     const OMPTaskLoopSimdDirective &S) {
3034   // emit the code inside the construct for now
3035   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3036   CGM.getOpenMPRuntime().emitInlinedDirective(
3037       *this, OMPD_taskloop_simd,
3038       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
3039 }
3040 
3041