1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/AST/StmtOpenMP.h"
20 using namespace clang;
21 using namespace CodeGen;
22 
23 void CodeGenFunction::GenerateOpenMPCapturedVars(
24     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
25   const RecordDecl *RD = S.getCapturedRecordDecl();
26   auto CurField = RD->field_begin();
27   auto CurCap = S.captures().begin();
28   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
29                                                  E = S.capture_init_end();
30        I != E; ++I, ++CurField, ++CurCap) {
31     if (CurField->hasCapturedVLAType()) {
32       auto VAT = CurField->getCapturedVLAType();
33       auto *Val = VLASizeMap[VAT->getSizeExpr()];
34       CapturedVars.push_back(Val);
35     } else if (CurCap->capturesThis())
36       CapturedVars.push_back(CXXThisValue);
37     else if (CurCap->capturesVariableByCopy())
38       CapturedVars.push_back(
39           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
40     else {
41       assert(CurCap->capturesVariable() && "Expected capture by reference.");
42       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
43     }
44   }
45 }
46 
47 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
48                                     StringRef Name, LValue AddrLV,
49                                     bool isReferenceType = false) {
50   ASTContext &Ctx = CGF.getContext();
51 
52   auto *CastedPtr = CGF.EmitScalarConversion(
53       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
54       Ctx.getPointerType(DstType), SourceLocation());
55   auto TmpAddr =
56       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
57           .getAddress();
58 
59   // If we are dealing with references we need to return the address of the
60   // reference instead of the reference of the value.
61   if (isReferenceType) {
62     QualType RefType = Ctx.getLValueReferenceType(DstType);
63     auto *RefVal = TmpAddr.getPointer();
64     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
65     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
66     CGF.EmitScalarInit(RefVal, TmpLVal);
67   }
68 
69   return TmpAddr;
70 }
71 
72 llvm::Function *
73 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
74   assert(
75       CapturedStmtInfo &&
76       "CapturedStmtInfo should be set when generating the captured function");
77   const CapturedDecl *CD = S.getCapturedDecl();
78   const RecordDecl *RD = S.getCapturedRecordDecl();
79   assert(CD->hasBody() && "missing CapturedDecl body");
80 
81   // Build the argument list.
82   ASTContext &Ctx = CGM.getContext();
83   FunctionArgList Args;
84   Args.append(CD->param_begin(),
85               std::next(CD->param_begin(), CD->getContextParamPosition()));
86   auto I = S.captures().begin();
87   for (auto *FD : RD->fields()) {
88     QualType ArgType = FD->getType();
89     IdentifierInfo *II = nullptr;
90     VarDecl *CapVar = nullptr;
91 
92     // If this is a capture by copy and the type is not a pointer, the outlined
93     // function argument type should be uintptr and the value properly casted to
94     // uintptr. This is necessary given that the runtime library is only able to
95     // deal with pointers. We can pass in the same way the VLA type sizes to the
96     // outlined function.
97     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
98         I->capturesVariableArrayType())
99       ArgType = Ctx.getUIntPtrType();
100 
101     if (I->capturesVariable() || I->capturesVariableByCopy()) {
102       CapVar = I->getCapturedVar();
103       II = CapVar->getIdentifier();
104     } else if (I->capturesThis())
105       II = &getContext().Idents.get("this");
106     else {
107       assert(I->capturesVariableArrayType());
108       II = &getContext().Idents.get("vla");
109     }
110     if (ArgType->isVariablyModifiedType())
111       ArgType = getContext().getVariableArrayDecayedType(ArgType);
112     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
113                                              FD->getLocation(), II, ArgType));
114     ++I;
115   }
116   Args.append(
117       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
118       CD->param_end());
119 
120   // Create the function declaration.
121   FunctionType::ExtInfo ExtInfo;
122   const CGFunctionInfo &FuncInfo =
123       CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
124                                                     /*IsVariadic=*/false);
125   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
126 
127   llvm::Function *F = llvm::Function::Create(
128       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
129       CapturedStmtInfo->getHelperName(), &CGM.getModule());
130   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
131   if (CD->isNothrow())
132     F->addFnAttr(llvm::Attribute::NoUnwind);
133 
134   // Generate the function.
135   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
136                 CD->getBody()->getLocStart());
137   unsigned Cnt = CD->getContextParamPosition();
138   I = S.captures().begin();
139   for (auto *FD : RD->fields()) {
140     // If we are capturing a pointer by copy we don't need to do anything, just
141     // use the value that we get from the arguments.
142     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
143       setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
144       ++Cnt, ++I;
145       continue;
146     }
147 
148     LValue ArgLVal =
149         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
150                        AlignmentSource::Decl);
151     if (FD->hasCapturedVLAType()) {
152       LValue CastedArgLVal =
153           MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
154                                               Args[Cnt]->getName(), ArgLVal),
155                          FD->getType(), AlignmentSource::Decl);
156       auto *ExprArg =
157           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
158       auto VAT = FD->getCapturedVLAType();
159       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
160     } else if (I->capturesVariable()) {
161       auto *Var = I->getCapturedVar();
162       QualType VarTy = Var->getType();
163       Address ArgAddr = ArgLVal.getAddress();
164       if (!VarTy->isReferenceType()) {
165         ArgAddr = EmitLoadOfReference(
166             ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
167       }
168       setAddrOfLocalVar(
169           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
170     } else if (I->capturesVariableByCopy()) {
171       assert(!FD->getType()->isAnyPointerType() &&
172              "Not expecting a captured pointer.");
173       auto *Var = I->getCapturedVar();
174       QualType VarTy = Var->getType();
175       setAddrOfLocalVar(I->getCapturedVar(),
176                         castValueFromUintptr(*this, FD->getType(),
177                                              Args[Cnt]->getName(), ArgLVal,
178                                              VarTy->isReferenceType()));
179     } else {
180       // If 'this' is captured, load it into CXXThisValue.
181       assert(I->capturesThis());
182       CXXThisValue =
183           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
184     }
185     ++Cnt, ++I;
186   }
187 
188   PGO.assignRegionCounters(GlobalDecl(CD), F);
189   CapturedStmtInfo->EmitBody(*this, CD->getBody());
190   FinishFunction(CD->getBodyRBrace());
191 
192   return F;
193 }
194 
195 //===----------------------------------------------------------------------===//
196 //                              OpenMP Directive Emission
197 //===----------------------------------------------------------------------===//
198 void CodeGenFunction::EmitOMPAggregateAssign(
199     Address DestAddr, Address SrcAddr, QualType OriginalType,
200     const llvm::function_ref<void(Address, Address)> &CopyGen) {
201   // Perform element-by-element initialization.
202   QualType ElementTy;
203 
204   // Drill down to the base element type on both arrays.
205   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
206   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
207   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
208 
209   auto SrcBegin = SrcAddr.getPointer();
210   auto DestBegin = DestAddr.getPointer();
211   // Cast from pointer to array type to pointer to single element.
212   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
213   // The basic structure here is a while-do loop.
214   auto BodyBB = createBasicBlock("omp.arraycpy.body");
215   auto DoneBB = createBasicBlock("omp.arraycpy.done");
216   auto IsEmpty =
217       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
218   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
219 
220   // Enter the loop body, making that address the current address.
221   auto EntryBB = Builder.GetInsertBlock();
222   EmitBlock(BodyBB);
223 
224   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
225 
226   llvm::PHINode *SrcElementPHI =
227     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
228   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
229   Address SrcElementCurrent =
230       Address(SrcElementPHI,
231               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
232 
233   llvm::PHINode *DestElementPHI =
234     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
235   DestElementPHI->addIncoming(DestBegin, EntryBB);
236   Address DestElementCurrent =
237     Address(DestElementPHI,
238             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
239 
240   // Emit copy.
241   CopyGen(DestElementCurrent, SrcElementCurrent);
242 
243   // Shift the address forward by one element.
244   auto DestElementNext = Builder.CreateConstGEP1_32(
245       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
246   auto SrcElementNext = Builder.CreateConstGEP1_32(
247       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
248   // Check whether we've reached the end.
249   auto Done =
250       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
251   Builder.CreateCondBr(Done, DoneBB, BodyBB);
252   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
253   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
254 
255   // Done.
256   EmitBlock(DoneBB, /*IsFinished=*/true);
257 }
258 
259 /// \brief Emit initialization of arrays of complex types.
260 /// \param DestAddr Address of the array.
261 /// \param Type Type of array.
262 /// \param Init Initial expression of array.
263 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
264                                  QualType Type, const Expr *Init) {
265   // Perform element-by-element initialization.
266   QualType ElementTy;
267 
268   // Drill down to the base element type on both arrays.
269   auto ArrayTy = Type->getAsArrayTypeUnsafe();
270   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
271   DestAddr =
272       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
273 
274   auto DestBegin = DestAddr.getPointer();
275   // Cast from pointer to array type to pointer to single element.
276   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
277   // The basic structure here is a while-do loop.
278   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
279   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
280   auto IsEmpty =
281       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
282   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
283 
284   // Enter the loop body, making that address the current address.
285   auto EntryBB = CGF.Builder.GetInsertBlock();
286   CGF.EmitBlock(BodyBB);
287 
288   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
289 
290   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
291       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
292   DestElementPHI->addIncoming(DestBegin, EntryBB);
293   Address DestElementCurrent =
294       Address(DestElementPHI,
295               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
296 
297   // Emit copy.
298   {
299     CodeGenFunction::RunCleanupsScope InitScope(CGF);
300     CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
301                          /*IsInitializer=*/false);
302   }
303 
304   // Shift the address forward by one element.
305   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
306       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
307   // Check whether we've reached the end.
308   auto Done =
309       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
310   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
311   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
312 
313   // Done.
314   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
315 }
316 
317 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
318                                   Address SrcAddr, const VarDecl *DestVD,
319                                   const VarDecl *SrcVD, const Expr *Copy) {
320   if (OriginalType->isArrayType()) {
321     auto *BO = dyn_cast<BinaryOperator>(Copy);
322     if (BO && BO->getOpcode() == BO_Assign) {
323       // Perform simple memcpy for simple copying.
324       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
325     } else {
326       // For arrays with complex element types perform element by element
327       // copying.
328       EmitOMPAggregateAssign(
329           DestAddr, SrcAddr, OriginalType,
330           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
331             // Working with the single array element, so have to remap
332             // destination and source variables to corresponding array
333             // elements.
334             CodeGenFunction::OMPPrivateScope Remap(*this);
335             Remap.addPrivate(DestVD, [DestElement]() -> Address {
336               return DestElement;
337             });
338             Remap.addPrivate(
339                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
340             (void)Remap.Privatize();
341             EmitIgnoredExpr(Copy);
342           });
343     }
344   } else {
345     // Remap pseudo source variable to private copy.
346     CodeGenFunction::OMPPrivateScope Remap(*this);
347     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
348     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
349     (void)Remap.Privatize();
350     // Emit copying of the whole variable.
351     EmitIgnoredExpr(Copy);
352   }
353 }
354 
355 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
356                                                 OMPPrivateScope &PrivateScope) {
357   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
358   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
359     auto IRef = C->varlist_begin();
360     auto InitsRef = C->inits().begin();
361     for (auto IInit : C->private_copies()) {
362       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
363       if (EmittedAsFirstprivate.count(OrigVD) == 0) {
364         EmittedAsFirstprivate.insert(OrigVD);
365         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
366         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
367         bool IsRegistered;
368         DeclRefExpr DRE(
369             const_cast<VarDecl *>(OrigVD),
370             /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
371                 OrigVD) != nullptr,
372             (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
373         Address OriginalAddr = EmitLValue(&DRE).getAddress();
374         QualType Type = OrigVD->getType();
375         if (Type->isArrayType()) {
376           // Emit VarDecl with copy init for arrays.
377           // Get the address of the original variable captured in current
378           // captured region.
379           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
380             auto Emission = EmitAutoVarAlloca(*VD);
381             auto *Init = VD->getInit();
382             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
383               // Perform simple memcpy.
384               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
385                                   Type);
386             } else {
387               EmitOMPAggregateAssign(
388                   Emission.getAllocatedAddress(), OriginalAddr, Type,
389                   [this, VDInit, Init](Address DestElement,
390                                        Address SrcElement) {
391                     // Clean up any temporaries needed by the initialization.
392                     RunCleanupsScope InitScope(*this);
393                     // Emit initialization for single element.
394                     setAddrOfLocalVar(VDInit, SrcElement);
395                     EmitAnyExprToMem(Init, DestElement,
396                                      Init->getType().getQualifiers(),
397                                      /*IsInitializer*/ false);
398                     LocalDeclMap.erase(VDInit);
399                   });
400             }
401             EmitAutoVarCleanups(Emission);
402             return Emission.getAllocatedAddress();
403           });
404         } else {
405           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
406             // Emit private VarDecl with copy init.
407             // Remap temp VDInit variable to the address of the original
408             // variable
409             // (for proper handling of captured global variables).
410             setAddrOfLocalVar(VDInit, OriginalAddr);
411             EmitDecl(*VD);
412             LocalDeclMap.erase(VDInit);
413             return GetAddrOfLocalVar(VD);
414           });
415         }
416         assert(IsRegistered &&
417                "firstprivate var already registered as private");
418         // Silence the warning about unused variable.
419         (void)IsRegistered;
420       }
421       ++IRef, ++InitsRef;
422     }
423   }
424   return !EmittedAsFirstprivate.empty();
425 }
426 
427 void CodeGenFunction::EmitOMPPrivateClause(
428     const OMPExecutableDirective &D,
429     CodeGenFunction::OMPPrivateScope &PrivateScope) {
430   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
431   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
432     auto IRef = C->varlist_begin();
433     for (auto IInit : C->private_copies()) {
434       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
435       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
436         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
437         bool IsRegistered =
438             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
439               // Emit private VarDecl with copy init.
440               EmitDecl(*VD);
441               return GetAddrOfLocalVar(VD);
442             });
443         assert(IsRegistered && "private var already registered as private");
444         // Silence the warning about unused variable.
445         (void)IsRegistered;
446       }
447       ++IRef;
448     }
449   }
450 }
451 
452 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
453   // threadprivate_var1 = master_threadprivate_var1;
454   // operator=(threadprivate_var2, master_threadprivate_var2);
455   // ...
456   // __kmpc_barrier(&loc, global_tid);
457   llvm::DenseSet<const VarDecl *> CopiedVars;
458   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
459   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
460     auto IRef = C->varlist_begin();
461     auto ISrcRef = C->source_exprs().begin();
462     auto IDestRef = C->destination_exprs().begin();
463     for (auto *AssignOp : C->assignment_ops()) {
464       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
465       QualType Type = VD->getType();
466       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
467 
468         // Get the address of the master variable. If we are emitting code with
469         // TLS support, the address is passed from the master as field in the
470         // captured declaration.
471         Address MasterAddr = Address::invalid();
472         if (getLangOpts().OpenMPUseTLS &&
473             getContext().getTargetInfo().isTLSSupported()) {
474           assert(CapturedStmtInfo->lookup(VD) &&
475                  "Copyin threadprivates should have been captured!");
476           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
477                           VK_LValue, (*IRef)->getExprLoc());
478           MasterAddr = EmitLValue(&DRE).getAddress();
479           LocalDeclMap.erase(VD);
480         } else {
481           MasterAddr =
482             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
483                                         : CGM.GetAddrOfGlobal(VD),
484                     getContext().getDeclAlign(VD));
485         }
486         // Get the address of the threadprivate variable.
487         Address PrivateAddr = EmitLValue(*IRef).getAddress();
488         if (CopiedVars.size() == 1) {
489           // At first check if current thread is a master thread. If it is, no
490           // need to copy data.
491           CopyBegin = createBasicBlock("copyin.not.master");
492           CopyEnd = createBasicBlock("copyin.not.master.end");
493           Builder.CreateCondBr(
494               Builder.CreateICmpNE(
495                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
496                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
497               CopyBegin, CopyEnd);
498           EmitBlock(CopyBegin);
499         }
500         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
501         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
502         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
503       }
504       ++IRef;
505       ++ISrcRef;
506       ++IDestRef;
507     }
508   }
509   if (CopyEnd) {
510     // Exit out of copying procedure for non-master thread.
511     EmitBlock(CopyEnd, /*IsFinished=*/true);
512     return true;
513   }
514   return false;
515 }
516 
517 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
518     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
519   bool HasAtLeastOneLastprivate = false;
520   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
521   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
522     HasAtLeastOneLastprivate = true;
523     auto IRef = C->varlist_begin();
524     auto IDestRef = C->destination_exprs().begin();
525     for (auto *IInit : C->private_copies()) {
526       // Keep the address of the original variable for future update at the end
527       // of the loop.
528       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
529       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
530         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
531         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
532           DeclRefExpr DRE(
533               const_cast<VarDecl *>(OrigVD),
534               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
535                   OrigVD) != nullptr,
536               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
537           return EmitLValue(&DRE).getAddress();
538         });
539         // Check if the variable is also a firstprivate: in this case IInit is
540         // not generated. Initialization of this variable will happen in codegen
541         // for 'firstprivate' clause.
542         if (IInit) {
543           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
544           bool IsRegistered =
545               PrivateScope.addPrivate(OrigVD, [&]() -> Address {
546                 // Emit private VarDecl with copy init.
547                 EmitDecl(*VD);
548                 return GetAddrOfLocalVar(VD);
549               });
550           assert(IsRegistered &&
551                  "lastprivate var already registered as private");
552           (void)IsRegistered;
553         }
554       }
555       ++IRef, ++IDestRef;
556     }
557   }
558   return HasAtLeastOneLastprivate;
559 }
560 
561 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
562     const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
563   // Emit following code:
564   // if (<IsLastIterCond>) {
565   //   orig_var1 = private_orig_var1;
566   //   ...
567   //   orig_varn = private_orig_varn;
568   // }
569   llvm::BasicBlock *ThenBB = nullptr;
570   llvm::BasicBlock *DoneBB = nullptr;
571   if (IsLastIterCond) {
572     ThenBB = createBasicBlock(".omp.lastprivate.then");
573     DoneBB = createBasicBlock(".omp.lastprivate.done");
574     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
575     EmitBlock(ThenBB);
576   }
577   llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
578   const Expr *LastIterVal = nullptr;
579   const Expr *IVExpr = nullptr;
580   const Expr *IncExpr = nullptr;
581   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
582     if (isOpenMPWorksharingDirective(D.getDirectiveKind())) {
583       LastIterVal = cast<VarDecl>(cast<DeclRefExpr>(
584                                       LoopDirective->getUpperBoundVariable())
585                                       ->getDecl())
586                         ->getAnyInitializer();
587       IVExpr = LoopDirective->getIterationVariable();
588       IncExpr = LoopDirective->getInc();
589       auto IUpdate = LoopDirective->updates().begin();
590       for (auto *E : LoopDirective->counters()) {
591         auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
592         LoopCountersAndUpdates[D] = *IUpdate;
593         ++IUpdate;
594       }
595     }
596   }
597   {
598     llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
599     bool FirstLCV = true;
600     for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
601       auto IRef = C->varlist_begin();
602       auto ISrcRef = C->source_exprs().begin();
603       auto IDestRef = C->destination_exprs().begin();
604       for (auto *AssignOp : C->assignment_ops()) {
605         auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
606         QualType Type = PrivateVD->getType();
607         auto *CanonicalVD = PrivateVD->getCanonicalDecl();
608         if (AlreadyEmittedVars.insert(CanonicalVD).second) {
609           // If lastprivate variable is a loop control variable for loop-based
610           // directive, update its value before copyin back to original
611           // variable.
612           if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) {
613             if (FirstLCV && LastIterVal) {
614               EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(),
615                                IVExpr->getType().getQualifiers(),
616                                /*IsInitializer=*/false);
617               EmitIgnoredExpr(IncExpr);
618               FirstLCV = false;
619             }
620             EmitIgnoredExpr(UpExpr);
621           }
622           auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
623           auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
624           // Get the address of the original variable.
625           Address OriginalAddr = GetAddrOfLocalVar(DestVD);
626           // Get the address of the private variable.
627           Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
628           if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
629             PrivateAddr =
630               Address(Builder.CreateLoad(PrivateAddr),
631                       getNaturalTypeAlignment(RefTy->getPointeeType()));
632           EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
633         }
634         ++IRef;
635         ++ISrcRef;
636         ++IDestRef;
637       }
638     }
639   }
640   if (IsLastIterCond) {
641     EmitBlock(DoneBB, /*IsFinished=*/true);
642   }
643 }
644 
645 void CodeGenFunction::EmitOMPReductionClauseInit(
646     const OMPExecutableDirective &D,
647     CodeGenFunction::OMPPrivateScope &PrivateScope) {
648   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
649     auto ILHS = C->lhs_exprs().begin();
650     auto IRHS = C->rhs_exprs().begin();
651     auto IPriv = C->privates().begin();
652     for (auto IRef : C->varlists()) {
653       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
654       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
655       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
656       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
657         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
658         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
659           Base = TempOASE->getBase()->IgnoreParenImpCasts();
660         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
661           Base = TempASE->getBase()->IgnoreParenImpCasts();
662         auto *DE = cast<DeclRefExpr>(Base);
663         auto *OrigVD = cast<VarDecl>(DE->getDecl());
664         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
665         auto OASELValueUB =
666             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
667         auto OriginalBaseLValue = EmitLValue(DE);
668         auto BaseLValue = OriginalBaseLValue;
669         auto *Zero = Builder.getInt64(/*C=*/0);
670         llvm::SmallVector<llvm::Value *, 4> Indexes;
671         Indexes.push_back(Zero);
672         auto *ItemTy =
673             OASELValueLB.getPointer()->getType()->getPointerElementType();
674         auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
675         while (Ty != ItemTy) {
676           Indexes.push_back(Zero);
677           Ty = Ty->getPointerElementType();
678         }
679         BaseLValue = MakeAddrLValue(
680             Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
681                     OASELValueLB.getAlignment()),
682             OASELValueLB.getType(), OASELValueLB.getAlignmentSource());
683         // Store the address of the original variable associated with the LHS
684         // implicit variable.
685         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
686           return OASELValueLB.getAddress();
687         });
688         // Emit reduction copy.
689         bool IsRegistered = PrivateScope.addPrivate(
690             OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB,
691                      OriginalBaseLValue]() -> Address {
692               // Emit VarDecl with copy init for arrays.
693               // Get the address of the original variable captured in current
694               // captured region.
695               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
696                                                  OASELValueLB.getPointer());
697               Size = Builder.CreateNUWAdd(
698                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
699               CodeGenFunction::OpaqueValueMapping OpaqueMap(
700                   *this, cast<OpaqueValueExpr>(
701                              getContext()
702                                  .getAsVariableArrayType(PrivateVD->getType())
703                                  ->getSizeExpr()),
704                   RValue::get(Size));
705               EmitVariablyModifiedType(PrivateVD->getType());
706               auto Emission = EmitAutoVarAlloca(*PrivateVD);
707               auto Addr = Emission.getAllocatedAddress();
708               auto *Init = PrivateVD->getInit();
709               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
710               EmitAutoVarCleanups(Emission);
711               // Emit private VarDecl with reduction init.
712               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
713                                                    OASELValueLB.getPointer());
714               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
715               Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
716                   Ptr, OriginalBaseLValue.getPointer()->getType());
717               return Address(Ptr, OriginalBaseLValue.getAlignment());
718             });
719         assert(IsRegistered && "private var already registered as private");
720         // Silence the warning about unused variable.
721         (void)IsRegistered;
722         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
723           return GetAddrOfLocalVar(PrivateVD);
724         });
725       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
726         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
727         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
728           Base = TempASE->getBase()->IgnoreParenImpCasts();
729         auto *DE = cast<DeclRefExpr>(Base);
730         auto *OrigVD = cast<VarDecl>(DE->getDecl());
731         auto ASELValue = EmitLValue(ASE);
732         auto OriginalBaseLValue = EmitLValue(DE);
733         auto BaseLValue = OriginalBaseLValue;
734         auto *Zero = Builder.getInt64(/*C=*/0);
735         llvm::SmallVector<llvm::Value *, 4> Indexes;
736         Indexes.push_back(Zero);
737         auto *ItemTy =
738             ASELValue.getPointer()->getType()->getPointerElementType();
739         auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
740         while (Ty != ItemTy) {
741           Indexes.push_back(Zero);
742           Ty = Ty->getPointerElementType();
743         }
744         BaseLValue = MakeAddrLValue(
745             Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
746                     ASELValue.getAlignment()),
747             ASELValue.getType(), ASELValue.getAlignmentSource());
748         // Store the address of the original variable associated with the LHS
749         // implicit variable.
750         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
751           return ASELValue.getAddress();
752         });
753         // Emit reduction copy.
754         bool IsRegistered = PrivateScope.addPrivate(
755             OrigVD, [this, PrivateVD, BaseLValue, ASELValue,
756                      OriginalBaseLValue]() -> Address {
757               // Emit private VarDecl with reduction init.
758               EmitDecl(*PrivateVD);
759               auto Addr = GetAddrOfLocalVar(PrivateVD);
760               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
761                                                    ASELValue.getPointer());
762               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
763               Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
764                   Ptr, OriginalBaseLValue.getPointer()->getType());
765               return Address(Ptr, OriginalBaseLValue.getAlignment());
766             });
767         assert(IsRegistered && "private var already registered as private");
768         // Silence the warning about unused variable.
769         (void)IsRegistered;
770         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
771           return GetAddrOfLocalVar(PrivateVD);
772         });
773       } else {
774         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
775         // Store the address of the original variable associated with the LHS
776         // implicit variable.
777         PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address {
778           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
779                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
780                           IRef->getType(), VK_LValue, IRef->getExprLoc());
781           return EmitLValue(&DRE).getAddress();
782         });
783         // Emit reduction copy.
784         bool IsRegistered =
785             PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address {
786               // Emit private VarDecl with reduction init.
787               EmitDecl(*PrivateVD);
788               return GetAddrOfLocalVar(PrivateVD);
789             });
790         assert(IsRegistered && "private var already registered as private");
791         // Silence the warning about unused variable.
792         (void)IsRegistered;
793         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
794           return GetAddrOfLocalVar(PrivateVD);
795         });
796       }
797       ++ILHS, ++IRHS, ++IPriv;
798     }
799   }
800 }
801 
802 void CodeGenFunction::EmitOMPReductionClauseFinal(
803     const OMPExecutableDirective &D) {
804   llvm::SmallVector<const Expr *, 8> Privates;
805   llvm::SmallVector<const Expr *, 8> LHSExprs;
806   llvm::SmallVector<const Expr *, 8> RHSExprs;
807   llvm::SmallVector<const Expr *, 8> ReductionOps;
808   bool HasAtLeastOneReduction = false;
809   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
810     HasAtLeastOneReduction = true;
811     Privates.append(C->privates().begin(), C->privates().end());
812     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
813     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
814     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
815   }
816   if (HasAtLeastOneReduction) {
817     // Emit nowait reduction if nowait clause is present or directive is a
818     // parallel directive (it always has implicit barrier).
819     CGM.getOpenMPRuntime().emitReduction(
820         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
821         D.getSingleClause<OMPNowaitClause>() ||
822             isOpenMPParallelDirective(D.getDirectiveKind()) ||
823             D.getDirectiveKind() == OMPD_simd,
824         D.getDirectiveKind() == OMPD_simd);
825   }
826 }
827 
828 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
829                                            const OMPExecutableDirective &S,
830                                            OpenMPDirectiveKind InnermostKind,
831                                            const RegionCodeGenTy &CodeGen) {
832   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
833   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
834   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
835   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
836       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
837   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
838     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
839     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
840                                          /*IgnoreResultAssign*/ true);
841     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
842         CGF, NumThreads, NumThreadsClause->getLocStart());
843   }
844   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
845     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
846     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
847         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
848   }
849   const Expr *IfCond = nullptr;
850   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
851     if (C->getNameModifier() == OMPD_unknown ||
852         C->getNameModifier() == OMPD_parallel) {
853       IfCond = C->getCondition();
854       break;
855     }
856   }
857   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
858                                               CapturedVars, IfCond);
859 }
860 
861 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
862   LexicalScope Scope(*this, S.getSourceRange());
863   // Emit parallel region as a standalone region.
864   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
865     OMPPrivateScope PrivateScope(CGF);
866     bool Copyins = CGF.EmitOMPCopyinClause(S);
867     bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
868     if (Copyins || Firstprivates) {
869       // Emit implicit barrier to synchronize threads and avoid data races on
870       // initialization of firstprivate variables or propagation master's thread
871       // values of threadprivate variables to local instances of that variables
872       // of all other implicit threads.
873       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
874           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
875           /*ForceSimpleCall=*/true);
876     }
877     CGF.EmitOMPPrivateClause(S, PrivateScope);
878     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
879     (void)PrivateScope.Privatize();
880     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
881     CGF.EmitOMPReductionClauseFinal(S);
882     // Emit implicit barrier at the end of the 'parallel' directive.
883     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
884         CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
885         /*ForceSimpleCall=*/true);
886   };
887   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
888 }
889 
890 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
891                                       JumpDest LoopExit) {
892   RunCleanupsScope BodyScope(*this);
893   // Update counters values on current iteration.
894   for (auto I : D.updates()) {
895     EmitIgnoredExpr(I);
896   }
897   // Update the linear variables.
898   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
899     for (auto U : C->updates()) {
900       EmitIgnoredExpr(U);
901     }
902   }
903 
904   // On a continue in the body, jump to the end.
905   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
906   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
907   // Emit loop body.
908   EmitStmt(D.getBody());
909   // The end (updates/cleanups).
910   EmitBlock(Continue.getBlock());
911   BreakContinueStack.pop_back();
912     // TODO: Update lastprivates if the SeparateIter flag is true.
913     // This will be implemented in a follow-up OMPLastprivateClause patch, but
914     // result should be still correct without it, as we do not make these
915     // variables private yet.
916 }
917 
918 void CodeGenFunction::EmitOMPInnerLoop(
919     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
920     const Expr *IncExpr,
921     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
922     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
923   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
924 
925   // Start the loop with a block that tests the condition.
926   auto CondBlock = createBasicBlock("omp.inner.for.cond");
927   EmitBlock(CondBlock);
928   LoopStack.push(CondBlock);
929 
930   // If there are any cleanups between here and the loop-exit scope,
931   // create a block to stage a loop exit along.
932   auto ExitBlock = LoopExit.getBlock();
933   if (RequiresCleanup)
934     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
935 
936   auto LoopBody = createBasicBlock("omp.inner.for.body");
937 
938   // Emit condition.
939   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
940   if (ExitBlock != LoopExit.getBlock()) {
941     EmitBlock(ExitBlock);
942     EmitBranchThroughCleanup(LoopExit);
943   }
944 
945   EmitBlock(LoopBody);
946   incrementProfileCounter(&S);
947 
948   // Create a block for the increment.
949   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
950   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
951 
952   BodyGen(*this);
953 
954   // Emit "IV = IV + 1" and a back-edge to the condition block.
955   EmitBlock(Continue.getBlock());
956   EmitIgnoredExpr(IncExpr);
957   PostIncGen(*this);
958   BreakContinueStack.pop_back();
959   EmitBranch(CondBlock);
960   LoopStack.pop();
961   // Emit the fall-through block.
962   EmitBlock(LoopExit.getBlock());
963 }
964 
965 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
966   // Emit inits for the linear variables.
967   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
968     for (auto Init : C->inits()) {
969       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
970       auto *OrigVD = cast<VarDecl>(
971           cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl());
972       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
973                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
974                       VD->getInit()->getType(), VK_LValue,
975                       VD->getInit()->getExprLoc());
976       AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
977       EmitExprAsInit(&DRE, VD,
978                MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
979                      /*capturedByInit=*/false);
980       EmitAutoVarCleanups(Emission);
981     }
982     // Emit the linear steps for the linear clauses.
983     // If a step is not constant, it is pre-calculated before the loop.
984     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
985       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
986         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
987         // Emit calculation of the linear step.
988         EmitIgnoredExpr(CS);
989       }
990   }
991 }
992 
993 static void emitLinearClauseFinal(CodeGenFunction &CGF,
994                                   const OMPLoopDirective &D) {
995   // Emit the final values of the linear variables.
996   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
997     auto IC = C->varlist_begin();
998     for (auto F : C->finals()) {
999       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1000       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1001                       CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
1002                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1003       Address OrigAddr = CGF.EmitLValue(&DRE).getAddress();
1004       CodeGenFunction::OMPPrivateScope VarScope(CGF);
1005       VarScope.addPrivate(OrigVD,
1006                           [OrigAddr]() -> Address { return OrigAddr; });
1007       (void)VarScope.Privatize();
1008       CGF.EmitIgnoredExpr(F);
1009       ++IC;
1010     }
1011   }
1012 }
1013 
1014 static void emitAlignedClause(CodeGenFunction &CGF,
1015                               const OMPExecutableDirective &D) {
1016   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1017     unsigned ClauseAlignment = 0;
1018     if (auto AlignmentExpr = Clause->getAlignment()) {
1019       auto AlignmentCI =
1020           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1021       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1022     }
1023     for (auto E : Clause->varlists()) {
1024       unsigned Alignment = ClauseAlignment;
1025       if (Alignment == 0) {
1026         // OpenMP [2.8.1, Description]
1027         // If no optional parameter is specified, implementation-defined default
1028         // alignments for SIMD instructions on the target platforms are assumed.
1029         Alignment =
1030             CGF.getContext()
1031                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1032                     E->getType()->getPointeeType()))
1033                 .getQuantity();
1034       }
1035       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1036              "alignment is not power of 2");
1037       if (Alignment != 0) {
1038         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1039         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1040       }
1041     }
1042   }
1043 }
1044 
1045 static void emitPrivateLoopCounters(CodeGenFunction &CGF,
1046                                     CodeGenFunction::OMPPrivateScope &LoopScope,
1047                                     ArrayRef<Expr *> Counters,
1048                                     ArrayRef<Expr *> PrivateCounters) {
1049   auto I = PrivateCounters.begin();
1050   for (auto *E : Counters) {
1051     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1052     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1053     Address Addr = Address::invalid();
1054     (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1055       // Emit var without initialization.
1056       auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD);
1057       CGF.EmitAutoVarCleanups(VarEmission);
1058       Addr = VarEmission.getAllocatedAddress();
1059       return Addr;
1060     });
1061     (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; });
1062     ++I;
1063   }
1064 }
1065 
1066 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1067                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1068                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1069   {
1070     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1071     emitPrivateLoopCounters(CGF, PreCondScope, S.counters(),
1072                             S.private_counters());
1073     (void)PreCondScope.Privatize();
1074     // Get initial values of real counters.
1075     for (auto I : S.inits()) {
1076       CGF.EmitIgnoredExpr(I);
1077     }
1078   }
1079   // Check that loop is executed at least one time.
1080   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1081 }
1082 
1083 static void
1084 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
1085                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
1086   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1087     auto CurPrivate = C->privates().begin();
1088     for (auto *E : C->varlists()) {
1089       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1090       auto *PrivateVD =
1091           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1092       bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1093         // Emit private VarDecl with copy init.
1094         CGF.EmitVarDecl(*PrivateVD);
1095         return CGF.GetAddrOfLocalVar(PrivateVD);
1096       });
1097       assert(IsRegistered && "linear var already registered as private");
1098       // Silence the warning about unused variable.
1099       (void)IsRegistered;
1100       ++CurPrivate;
1101     }
1102   }
1103 }
1104 
1105 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1106                                      const OMPExecutableDirective &D) {
1107   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1108     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1109                                  /*ignoreResult=*/true);
1110     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1111     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1112     // In presence of finite 'safelen', it may be unsafe to mark all
1113     // the memory instructions parallel, because loop-carried
1114     // dependences of 'safelen' iterations are possible.
1115     CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1116   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1117     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1118                                  /*ignoreResult=*/true);
1119     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1120     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1121     // In presence of finite 'safelen', it may be unsafe to mark all
1122     // the memory instructions parallel, because loop-carried
1123     // dependences of 'safelen' iterations are possible.
1124     CGF.LoopStack.setParallel(false);
1125   }
1126 }
1127 
1128 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
1129   // Walk clauses and process safelen/lastprivate.
1130   LoopStack.setParallel();
1131   LoopStack.setVectorizeEnable(true);
1132   emitSimdlenSafelenClause(*this, D);
1133 }
1134 
1135 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
1136   auto IC = D.counters().begin();
1137   for (auto F : D.finals()) {
1138     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1139     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
1140       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1141                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1142                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1143       Address OrigAddr = EmitLValue(&DRE).getAddress();
1144       OMPPrivateScope VarScope(*this);
1145       VarScope.addPrivate(OrigVD,
1146                           [OrigAddr]() -> Address { return OrigAddr; });
1147       (void)VarScope.Privatize();
1148       EmitIgnoredExpr(F);
1149     }
1150     ++IC;
1151   }
1152   emitLinearClauseFinal(*this, D);
1153 }
1154 
1155 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1156   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1157     // if (PreCond) {
1158     //   for (IV in 0..LastIteration) BODY;
1159     //   <Final counter/linear vars updates>;
1160     // }
1161     //
1162 
1163     // Emit: if (PreCond) - begin.
1164     // If the condition constant folds and can be elided, avoid emitting the
1165     // whole loop.
1166     bool CondConstant;
1167     llvm::BasicBlock *ContBlock = nullptr;
1168     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1169       if (!CondConstant)
1170         return;
1171     } else {
1172       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1173       ContBlock = CGF.createBasicBlock("simd.if.end");
1174       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1175                   CGF.getProfileCount(&S));
1176       CGF.EmitBlock(ThenBlock);
1177       CGF.incrementProfileCounter(&S);
1178     }
1179 
1180     // Emit the loop iteration variable.
1181     const Expr *IVExpr = S.getIterationVariable();
1182     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1183     CGF.EmitVarDecl(*IVDecl);
1184     CGF.EmitIgnoredExpr(S.getInit());
1185 
1186     // Emit the iterations count variable.
1187     // If it is not a variable, Sema decided to calculate iterations count on
1188     // each iteration (e.g., it is foldable into a constant).
1189     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1190       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1191       // Emit calculation of the iterations count.
1192       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1193     }
1194 
1195     CGF.EmitOMPSimdInit(S);
1196 
1197     emitAlignedClause(CGF, S);
1198     CGF.EmitOMPLinearClauseInit(S);
1199     bool HasLastprivateClause;
1200     {
1201       OMPPrivateScope LoopScope(CGF);
1202       emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
1203                               S.private_counters());
1204       emitPrivateLinearVars(CGF, S, LoopScope);
1205       CGF.EmitOMPPrivateClause(S, LoopScope);
1206       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1207       HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1208       (void)LoopScope.Privatize();
1209       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1210                            S.getInc(),
1211                            [&S](CodeGenFunction &CGF) {
1212                              CGF.EmitOMPLoopBody(S, JumpDest());
1213                              CGF.EmitStopPoint(&S);
1214                            },
1215                            [](CodeGenFunction &) {});
1216       // Emit final copy of the lastprivate variables at the end of loops.
1217       if (HasLastprivateClause) {
1218         CGF.EmitOMPLastprivateClauseFinal(S);
1219       }
1220       CGF.EmitOMPReductionClauseFinal(S);
1221     }
1222     CGF.EmitOMPSimdFinal(S);
1223     // Emit: if (PreCond) - end.
1224     if (ContBlock) {
1225       CGF.EmitBranch(ContBlock);
1226       CGF.EmitBlock(ContBlock, true);
1227     }
1228   };
1229   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1230 }
1231 
1232 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
1233                                           const OMPLoopDirective &S,
1234                                           OMPPrivateScope &LoopScope,
1235                                           bool Ordered, Address LB,
1236                                           Address UB, Address ST,
1237                                           Address IL, llvm::Value *Chunk) {
1238   auto &RT = CGM.getOpenMPRuntime();
1239 
1240   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1241   const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
1242 
1243   assert((Ordered ||
1244           !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
1245          "static non-chunked schedule does not need outer loop");
1246 
1247   // Emit outer loop.
1248   //
1249   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1250   // When schedule(dynamic,chunk_size) is specified, the iterations are
1251   // distributed to threads in the team in chunks as the threads request them.
1252   // Each thread executes a chunk of iterations, then requests another chunk,
1253   // until no chunks remain to be distributed. Each chunk contains chunk_size
1254   // iterations, except for the last chunk to be distributed, which may have
1255   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1256   //
1257   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1258   // to threads in the team in chunks as the executing threads request them.
1259   // Each thread executes a chunk of iterations, then requests another chunk,
1260   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1261   // each chunk is proportional to the number of unassigned iterations divided
1262   // by the number of threads in the team, decreasing to 1. For a chunk_size
1263   // with value k (greater than 1), the size of each chunk is determined in the
1264   // same way, with the restriction that the chunks do not contain fewer than k
1265   // iterations (except for the last chunk to be assigned, which may have fewer
1266   // than k iterations).
1267   //
1268   // When schedule(auto) is specified, the decision regarding scheduling is
1269   // delegated to the compiler and/or runtime system. The programmer gives the
1270   // implementation the freedom to choose any possible mapping of iterations to
1271   // threads in the team.
1272   //
1273   // When schedule(runtime) is specified, the decision regarding scheduling is
1274   // deferred until run time, and the schedule and chunk size are taken from the
1275   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1276   // implementation defined
1277   //
1278   // while(__kmpc_dispatch_next(&LB, &UB)) {
1279   //   idx = LB;
1280   //   while (idx <= UB) { BODY; ++idx;
1281   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1282   //   } // inner loop
1283   // }
1284   //
1285   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1286   // When schedule(static, chunk_size) is specified, iterations are divided into
1287   // chunks of size chunk_size, and the chunks are assigned to the threads in
1288   // the team in a round-robin fashion in the order of the thread number.
1289   //
1290   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1291   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1292   //   LB = LB + ST;
1293   //   UB = UB + ST;
1294   // }
1295   //
1296 
1297   const Expr *IVExpr = S.getIterationVariable();
1298   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1299   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1300 
1301   if (DynamicOrOrdered) {
1302     llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
1303     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind,
1304                            IVSize, IVSigned, Ordered, UBVal, Chunk);
1305   } else {
1306     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
1307                          IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
1308   }
1309 
1310   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1311 
1312   // Start the loop with a block that tests the condition.
1313   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1314   EmitBlock(CondBlock);
1315   LoopStack.push(CondBlock);
1316 
1317   llvm::Value *BoolCondVal = nullptr;
1318   if (!DynamicOrOrdered) {
1319     // UB = min(UB, GlobalUB)
1320     EmitIgnoredExpr(S.getEnsureUpperBound());
1321     // IV = LB
1322     EmitIgnoredExpr(S.getInit());
1323     // IV < UB
1324     BoolCondVal = EvaluateExprAsBool(S.getCond());
1325   } else {
1326     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
1327                                     IL, LB, UB, ST);
1328   }
1329 
1330   // If there are any cleanups between here and the loop-exit scope,
1331   // create a block to stage a loop exit along.
1332   auto ExitBlock = LoopExit.getBlock();
1333   if (LoopScope.requiresCleanups())
1334     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1335 
1336   auto LoopBody = createBasicBlock("omp.dispatch.body");
1337   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1338   if (ExitBlock != LoopExit.getBlock()) {
1339     EmitBlock(ExitBlock);
1340     EmitBranchThroughCleanup(LoopExit);
1341   }
1342   EmitBlock(LoopBody);
1343 
1344   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1345   // LB for loop condition and emitted it above).
1346   if (DynamicOrOrdered)
1347     EmitIgnoredExpr(S.getInit());
1348 
1349   // Create a block for the increment.
1350   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1351   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1352 
1353   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1354   // with dynamic/guided scheduling and without ordered clause.
1355   if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
1356     LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic ||
1357                            ScheduleKind == OMPC_SCHEDULE_guided) &&
1358                           !Ordered);
1359   } else {
1360     EmitOMPSimdInit(S);
1361   }
1362 
1363   SourceLocation Loc = S.getLocStart();
1364   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
1365                    [&S, LoopExit](CodeGenFunction &CGF) {
1366                      CGF.EmitOMPLoopBody(S, LoopExit);
1367                      CGF.EmitStopPoint(&S);
1368                    },
1369                    [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
1370                      if (Ordered) {
1371                        CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
1372                            CGF, Loc, IVSize, IVSigned);
1373                      }
1374                    });
1375 
1376   EmitBlock(Continue.getBlock());
1377   BreakContinueStack.pop_back();
1378   if (!DynamicOrOrdered) {
1379     // Emit "LB = LB + Stride", "UB = UB + Stride".
1380     EmitIgnoredExpr(S.getNextLowerBound());
1381     EmitIgnoredExpr(S.getNextUpperBound());
1382   }
1383 
1384   EmitBranch(CondBlock);
1385   LoopStack.pop();
1386   // Emit the fall-through block.
1387   EmitBlock(LoopExit.getBlock());
1388 
1389   // Tell the runtime we are done.
1390   if (!DynamicOrOrdered)
1391     RT.emitForStaticFinish(*this, S.getLocEnd());
1392 }
1393 
1394 /// \brief Emit a helper variable and return corresponding lvalue.
1395 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1396                                const DeclRefExpr *Helper) {
1397   auto VDecl = cast<VarDecl>(Helper->getDecl());
1398   CGF.EmitVarDecl(*VDecl);
1399   return CGF.EmitLValue(Helper);
1400 }
1401 
1402 static std::pair<llvm::Value * /*Chunk*/, OpenMPScheduleClauseKind>
1403 emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,
1404                    bool OuterRegion) {
1405   // Detect the loop schedule kind and chunk.
1406   auto ScheduleKind = OMPC_SCHEDULE_unknown;
1407   llvm::Value *Chunk = nullptr;
1408   if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
1409     ScheduleKind = C->getScheduleKind();
1410     if (const auto *Ch = C->getChunkSize()) {
1411       if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) {
1412         if (OuterRegion) {
1413           const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl());
1414           CGF.EmitVarDecl(*ImpVar);
1415           CGF.EmitStoreThroughLValue(
1416               CGF.EmitAnyExpr(Ch),
1417               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar),
1418                                  ImpVar->getType()));
1419         } else {
1420           Ch = ImpRef;
1421         }
1422       }
1423       if (!C->getHelperChunkSize() || !OuterRegion) {
1424         Chunk = CGF.EmitScalarExpr(Ch);
1425         Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(),
1426                                          S.getIterationVariable()->getType(),
1427                                          S.getLocStart());
1428       }
1429     }
1430   }
1431   return std::make_pair(Chunk, ScheduleKind);
1432 }
1433 
1434 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
1435   // Emit the loop iteration variable.
1436   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
1437   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
1438   EmitVarDecl(*IVDecl);
1439 
1440   // Emit the iterations count variable.
1441   // If it is not a variable, Sema decided to calculate iterations count on each
1442   // iteration (e.g., it is foldable into a constant).
1443   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1444     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1445     // Emit calculation of the iterations count.
1446     EmitIgnoredExpr(S.getCalcLastIteration());
1447   }
1448 
1449   auto &RT = CGM.getOpenMPRuntime();
1450 
1451   bool HasLastprivateClause;
1452   // Check pre-condition.
1453   {
1454     // Skip the entire loop if we don't meet the precondition.
1455     // If the condition constant folds and can be elided, avoid emitting the
1456     // whole loop.
1457     bool CondConstant;
1458     llvm::BasicBlock *ContBlock = nullptr;
1459     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1460       if (!CondConstant)
1461         return false;
1462     } else {
1463       auto *ThenBlock = createBasicBlock("omp.precond.then");
1464       ContBlock = createBasicBlock("omp.precond.end");
1465       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
1466                   getProfileCount(&S));
1467       EmitBlock(ThenBlock);
1468       incrementProfileCounter(&S);
1469     }
1470 
1471     emitAlignedClause(*this, S);
1472     EmitOMPLinearClauseInit(S);
1473     // Emit 'then' code.
1474     {
1475       // Emit helper vars inits.
1476       LValue LB =
1477           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1478       LValue UB =
1479           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1480       LValue ST =
1481           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
1482       LValue IL =
1483           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
1484 
1485       OMPPrivateScope LoopScope(*this);
1486       if (EmitOMPFirstprivateClause(S, LoopScope)) {
1487         // Emit implicit barrier to synchronize threads and avoid data races on
1488         // initialization of firstprivate variables.
1489         CGM.getOpenMPRuntime().emitBarrierCall(
1490             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1491             /*ForceSimpleCall=*/true);
1492       }
1493       EmitOMPPrivateClause(S, LoopScope);
1494       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
1495       EmitOMPReductionClauseInit(S, LoopScope);
1496       emitPrivateLoopCounters(*this, LoopScope, S.counters(),
1497                               S.private_counters());
1498       emitPrivateLinearVars(*this, S, LoopScope);
1499       (void)LoopScope.Privatize();
1500 
1501       // Detect the loop schedule kind and chunk.
1502       llvm::Value *Chunk;
1503       OpenMPScheduleClauseKind ScheduleKind;
1504       auto ScheduleInfo =
1505           emitScheduleClause(*this, S, /*OuterRegion=*/false);
1506       Chunk = ScheduleInfo.first;
1507       ScheduleKind = ScheduleInfo.second;
1508       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1509       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1510       const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
1511       if (RT.isStaticNonchunked(ScheduleKind,
1512                                 /* Chunked */ Chunk != nullptr) &&
1513           !Ordered) {
1514         if (isOpenMPSimdDirective(S.getDirectiveKind())) {
1515           EmitOMPSimdInit(S);
1516         }
1517         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1518         // When no chunk_size is specified, the iteration space is divided into
1519         // chunks that are approximately equal in size, and at most one chunk is
1520         // distributed to each thread. Note that the size of the chunks is
1521         // unspecified in this case.
1522         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
1523                              IVSize, IVSigned, Ordered,
1524                              IL.getAddress(), LB.getAddress(),
1525                              UB.getAddress(), ST.getAddress());
1526         auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
1527         // UB = min(UB, GlobalUB);
1528         EmitIgnoredExpr(S.getEnsureUpperBound());
1529         // IV = LB;
1530         EmitIgnoredExpr(S.getInit());
1531         // while (idx <= UB) { BODY; ++idx; }
1532         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1533                          S.getInc(),
1534                          [&S, LoopExit](CodeGenFunction &CGF) {
1535                            CGF.EmitOMPLoopBody(S, LoopExit);
1536                            CGF.EmitStopPoint(&S);
1537                          },
1538                          [](CodeGenFunction &) {});
1539         EmitBlock(LoopExit.getBlock());
1540         // Tell the runtime we are done.
1541         RT.emitForStaticFinish(*this, S.getLocStart());
1542       } else {
1543         // Emit the outer loop, which requests its work chunk [LB..UB] from
1544         // runtime and runs the inner loop to process it.
1545         EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered,
1546                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
1547                             IL.getAddress(), Chunk);
1548       }
1549       EmitOMPReductionClauseFinal(S);
1550       // Emit final copy of the lastprivate variables if IsLastIter != 0.
1551       if (HasLastprivateClause)
1552         EmitOMPLastprivateClauseFinal(
1553             S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
1554     }
1555     if (isOpenMPSimdDirective(S.getDirectiveKind())) {
1556       EmitOMPSimdFinal(S);
1557     }
1558     // We're now done with the loop, so jump to the continuation block.
1559     if (ContBlock) {
1560       EmitBranch(ContBlock);
1561       EmitBlock(ContBlock, true);
1562     }
1563   }
1564   return HasLastprivateClause;
1565 }
1566 
1567 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
1568   LexicalScope Scope(*this, S.getSourceRange());
1569   bool HasLastprivates = false;
1570   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
1571     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1572   };
1573   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
1574                                               S.hasCancel());
1575 
1576   // Emit an implicit barrier at the end.
1577   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
1578     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1579   }
1580 }
1581 
1582 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
1583   LexicalScope Scope(*this, S.getSourceRange());
1584   bool HasLastprivates = false;
1585   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
1586     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1587   };
1588   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1589 
1590   // Emit an implicit barrier at the end.
1591   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
1592     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1593   }
1594 }
1595 
1596 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
1597                                 const Twine &Name,
1598                                 llvm::Value *Init = nullptr) {
1599   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
1600   if (Init)
1601     CGF.EmitScalarInit(Init, LVal);
1602   return LVal;
1603 }
1604 
1605 OpenMPDirectiveKind
1606 CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
1607   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
1608   auto *CS = dyn_cast<CompoundStmt>(Stmt);
1609   if (CS && CS->size() > 1) {
1610     bool HasLastprivates = false;
1611     auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) {
1612       auto &C = CGF.CGM.getContext();
1613       auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1614       // Emit helper vars inits.
1615       LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
1616                                     CGF.Builder.getInt32(0));
1617       auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
1618       LValue UB =
1619           createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
1620       LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
1621                                     CGF.Builder.getInt32(1));
1622       LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
1623                                     CGF.Builder.getInt32(0));
1624       // Loop counter.
1625       LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
1626       OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1627       CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
1628       OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1629       CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
1630       // Generate condition for loop.
1631       BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
1632                           OK_Ordinary, S.getLocStart(),
1633                           /*fpContractable=*/false);
1634       // Increment for loop counter.
1635       UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
1636                         OK_Ordinary, S.getLocStart());
1637       auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
1638         // Iterate through all sections and emit a switch construct:
1639         // switch (IV) {
1640         //   case 0:
1641         //     <SectionStmt[0]>;
1642         //     break;
1643         // ...
1644         //   case <NumSection> - 1:
1645         //     <SectionStmt[<NumSection> - 1]>;
1646         //     break;
1647         // }
1648         // .omp.sections.exit:
1649         auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
1650         auto *SwitchStmt = CGF.Builder.CreateSwitch(
1651             CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
1652             CS->size());
1653         unsigned CaseNumber = 0;
1654         for (auto *SubStmt : CS->children()) {
1655           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
1656           CGF.EmitBlock(CaseBB);
1657           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
1658           CGF.EmitStmt(SubStmt);
1659           CGF.EmitBranch(ExitBB);
1660           ++CaseNumber;
1661         }
1662         CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1663       };
1664 
1665       CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1666       if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
1667         // Emit implicit barrier to synchronize threads and avoid data races on
1668         // initialization of firstprivate variables.
1669         CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1670             CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1671             /*ForceSimpleCall=*/true);
1672       }
1673       CGF.EmitOMPPrivateClause(S, LoopScope);
1674       HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1675       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1676       (void)LoopScope.Privatize();
1677 
1678       // Emit static non-chunked loop.
1679       CGF.CGM.getOpenMPRuntime().emitForStaticInit(
1680           CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
1681           /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
1682           LB.getAddress(), UB.getAddress(), ST.getAddress());
1683       // UB = min(UB, GlobalUB);
1684       auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
1685       auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
1686           CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
1687       CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
1688       // IV = LB;
1689       CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
1690       // while (idx <= UB) { BODY; ++idx; }
1691       CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
1692                            [](CodeGenFunction &) {});
1693       // Tell the runtime we are done.
1694       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
1695       CGF.EmitOMPReductionClauseFinal(S);
1696 
1697       // Emit final copy of the lastprivate variables if IsLastIter != 0.
1698       if (HasLastprivates)
1699         CGF.EmitOMPLastprivateClauseFinal(
1700             S, CGF.Builder.CreateIsNotNull(
1701                    CGF.EmitLoadOfScalar(IL, S.getLocStart())));
1702     };
1703 
1704     bool HasCancel = false;
1705     if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
1706       HasCancel = OSD->hasCancel();
1707     else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
1708       HasCancel = OPSD->hasCancel();
1709     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
1710                                                 HasCancel);
1711     // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
1712     // clause. Otherwise the barrier will be generated by the codegen for the
1713     // directive.
1714     if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
1715       // Emit implicit barrier to synchronize threads and avoid data races on
1716       // initialization of firstprivate variables.
1717       CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
1718                                              OMPD_unknown);
1719     }
1720     return OMPD_sections;
1721   }
1722   // If only one section is found - no need to generate loop, emit as a single
1723   // region.
1724   bool HasFirstprivates;
1725   // No need to generate reductions for sections with single section region, we
1726   // can use original shared variables for all operations.
1727   bool HasReductions = S.hasClausesOfKind<OMPReductionClause>();
1728   // No need to generate lastprivates for sections with single section region,
1729   // we can use original shared variable for all calculations with barrier at
1730   // the end of the sections.
1731   bool HasLastprivates = S.hasClausesOfKind<OMPLastprivateClause>();
1732   auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) {
1733     CodeGenFunction::OMPPrivateScope SingleScope(CGF);
1734     HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
1735     CGF.EmitOMPPrivateClause(S, SingleScope);
1736     (void)SingleScope.Privatize();
1737 
1738     CGF.EmitStmt(Stmt);
1739   };
1740   CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
1741                                           llvm::None, llvm::None, llvm::None,
1742                                           llvm::None);
1743   // Emit barrier for firstprivates, lastprivates or reductions only if
1744   // 'sections' directive has 'nowait' clause. Otherwise the barrier will be
1745   // generated by the codegen for the directive.
1746   if ((HasFirstprivates || HasLastprivates || HasReductions) &&
1747       S.getSingleClause<OMPNowaitClause>()) {
1748     // Emit implicit barrier to synchronize threads and avoid data races on
1749     // initialization of firstprivate variables.
1750     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown,
1751                                            /*EmitChecks=*/false,
1752                                            /*ForceSimpleCall=*/true);
1753   }
1754   return OMPD_single;
1755 }
1756 
1757 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
1758   LexicalScope Scope(*this, S.getSourceRange());
1759   OpenMPDirectiveKind EmittedAs = EmitSections(S);
1760   // Emit an implicit barrier at the end.
1761   if (!S.getSingleClause<OMPNowaitClause>()) {
1762     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
1763   }
1764 }
1765 
1766 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
1767   LexicalScope Scope(*this, S.getSourceRange());
1768   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1769     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1770     CGF.EnsureInsertPoint();
1771   };
1772   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
1773                                               S.hasCancel());
1774 }
1775 
1776 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
1777   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
1778   llvm::SmallVector<const Expr *, 8> DestExprs;
1779   llvm::SmallVector<const Expr *, 8> SrcExprs;
1780   llvm::SmallVector<const Expr *, 8> AssignmentOps;
1781   // Check if there are any 'copyprivate' clauses associated with this
1782   // 'single'
1783   // construct.
1784   // Build a list of copyprivate variables along with helper expressions
1785   // (<source>, <destination>, <destination>=<source> expressions)
1786   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
1787     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
1788     DestExprs.append(C->destination_exprs().begin(),
1789                      C->destination_exprs().end());
1790     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
1791     AssignmentOps.append(C->assignment_ops().begin(),
1792                          C->assignment_ops().end());
1793   }
1794   LexicalScope Scope(*this, S.getSourceRange());
1795   // Emit code for 'single' region along with 'copyprivate' clauses
1796   bool HasFirstprivates;
1797   auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) {
1798     CodeGenFunction::OMPPrivateScope SingleScope(CGF);
1799     HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
1800     CGF.EmitOMPPrivateClause(S, SingleScope);
1801     (void)SingleScope.Privatize();
1802 
1803     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1804     CGF.EnsureInsertPoint();
1805   };
1806   CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
1807                                           CopyprivateVars, DestExprs, SrcExprs,
1808                                           AssignmentOps);
1809   // Emit an implicit barrier at the end (to avoid data race on firstprivate
1810   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
1811   if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) &&
1812       CopyprivateVars.empty()) {
1813     CGM.getOpenMPRuntime().emitBarrierCall(
1814         *this, S.getLocStart(),
1815         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
1816   }
1817 }
1818 
1819 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
1820   LexicalScope Scope(*this, S.getSourceRange());
1821   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1822     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1823     CGF.EnsureInsertPoint();
1824   };
1825   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
1826 }
1827 
1828 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
1829   LexicalScope Scope(*this, S.getSourceRange());
1830   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1831     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1832     CGF.EnsureInsertPoint();
1833   };
1834   CGM.getOpenMPRuntime().emitCriticalRegion(
1835       *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
1836 }
1837 
1838 void CodeGenFunction::EmitOMPParallelForDirective(
1839     const OMPParallelForDirective &S) {
1840   // Emit directive as a combined directive that consists of two implicit
1841   // directives: 'parallel' with 'for' directive.
1842   LexicalScope Scope(*this, S.getSourceRange());
1843   (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
1844   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1845     CGF.EmitOMPWorksharingLoop(S);
1846     // Emit implicit barrier at the end of parallel region, but this barrier
1847     // is at the end of 'for' directive, so emit it as the implicit barrier for
1848     // this 'for' directive.
1849     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1850         CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false,
1851         /*ForceSimpleCall=*/true);
1852   };
1853   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
1854 }
1855 
1856 void CodeGenFunction::EmitOMPParallelForSimdDirective(
1857     const OMPParallelForSimdDirective &S) {
1858   // Emit directive as a combined directive that consists of two implicit
1859   // directives: 'parallel' with 'for' directive.
1860   LexicalScope Scope(*this, S.getSourceRange());
1861   (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
1862   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1863     CGF.EmitOMPWorksharingLoop(S);
1864     // Emit implicit barrier at the end of parallel region, but this barrier
1865     // is at the end of 'for' directive, so emit it as the implicit barrier for
1866     // this 'for' directive.
1867     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1868         CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false,
1869         /*ForceSimpleCall=*/true);
1870   };
1871   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
1872 }
1873 
1874 void CodeGenFunction::EmitOMPParallelSectionsDirective(
1875     const OMPParallelSectionsDirective &S) {
1876   // Emit directive as a combined directive that consists of two implicit
1877   // directives: 'parallel' with 'sections' directive.
1878   LexicalScope Scope(*this, S.getSourceRange());
1879   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1880     (void)CGF.EmitSections(S);
1881     // Emit implicit barrier at the end of parallel region.
1882     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1883         CGF, S.getLocStart(), OMPD_parallel, /*EmitChecks=*/false,
1884         /*ForceSimpleCall=*/true);
1885   };
1886   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
1887 }
1888 
1889 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
1890   // Emit outlined function for task construct.
1891   LexicalScope Scope(*this, S.getSourceRange());
1892   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1893   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
1894   auto *I = CS->getCapturedDecl()->param_begin();
1895   auto *PartId = std::next(I);
1896   // The first function argument for tasks is a thread id, the second one is a
1897   // part id (0 for tied tasks, >=0 for untied task).
1898   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
1899   // Get list of private variables.
1900   llvm::SmallVector<const Expr *, 8> PrivateVars;
1901   llvm::SmallVector<const Expr *, 8> PrivateCopies;
1902   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
1903     auto IRef = C->varlist_begin();
1904     for (auto *IInit : C->private_copies()) {
1905       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1906       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
1907         PrivateVars.push_back(*IRef);
1908         PrivateCopies.push_back(IInit);
1909       }
1910       ++IRef;
1911     }
1912   }
1913   EmittedAsPrivate.clear();
1914   // Get list of firstprivate variables.
1915   llvm::SmallVector<const Expr *, 8> FirstprivateVars;
1916   llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
1917   llvm::SmallVector<const Expr *, 8> FirstprivateInits;
1918   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1919     auto IRef = C->varlist_begin();
1920     auto IElemInitRef = C->inits().begin();
1921     for (auto *IInit : C->private_copies()) {
1922       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1923       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
1924         FirstprivateVars.push_back(*IRef);
1925         FirstprivateCopies.push_back(IInit);
1926         FirstprivateInits.push_back(*IElemInitRef);
1927       }
1928       ++IRef, ++IElemInitRef;
1929     }
1930   }
1931   // Build list of dependences.
1932   llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
1933       Dependences;
1934   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
1935     for (auto *IRef : C->varlists()) {
1936       Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
1937     }
1938   }
1939   auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
1940       CodeGenFunction &CGF) {
1941     // Set proper addresses for generated private copies.
1942     auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
1943     OMPPrivateScope Scope(CGF);
1944     if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
1945       auto *CopyFn = CGF.Builder.CreateLoad(
1946           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
1947       auto *PrivatesPtr = CGF.Builder.CreateLoad(
1948           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
1949       // Map privates.
1950       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16>
1951           PrivatePtrs;
1952       llvm::SmallVector<llvm::Value *, 16> CallArgs;
1953       CallArgs.push_back(PrivatesPtr);
1954       for (auto *E : PrivateVars) {
1955         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1956         Address PrivatePtr =
1957             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
1958         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
1959         CallArgs.push_back(PrivatePtr.getPointer());
1960       }
1961       for (auto *E : FirstprivateVars) {
1962         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1963         Address PrivatePtr =
1964             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
1965         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
1966         CallArgs.push_back(PrivatePtr.getPointer());
1967       }
1968       CGF.EmitRuntimeCall(CopyFn, CallArgs);
1969       for (auto &&Pair : PrivatePtrs) {
1970         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
1971                             CGF.getContext().getDeclAlign(Pair.first));
1972         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
1973       }
1974     }
1975     (void)Scope.Privatize();
1976     if (*PartId) {
1977       // TODO: emit code for untied tasks.
1978     }
1979     CGF.EmitStmt(CS->getCapturedStmt());
1980   };
1981   auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
1982       S, *I, OMPD_task, CodeGen);
1983   // Check if we should emit tied or untied task.
1984   bool Tied = !S.getSingleClause<OMPUntiedClause>();
1985   // Check if the task is final
1986   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
1987   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
1988     // If the condition constant folds and can be elided, try to avoid emitting
1989     // the condition and the dead arm of the if/else.
1990     auto *Cond = Clause->getCondition();
1991     bool CondConstant;
1992     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
1993       Final.setInt(CondConstant);
1994     else
1995       Final.setPointer(EvaluateExprAsBool(Cond));
1996   } else {
1997     // By default the task is not final.
1998     Final.setInt(/*IntVal=*/false);
1999   }
2000   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2001   const Expr *IfCond = nullptr;
2002   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2003     if (C->getNameModifier() == OMPD_unknown ||
2004         C->getNameModifier() == OMPD_task) {
2005       IfCond = C->getCondition();
2006       break;
2007     }
2008   }
2009   CGM.getOpenMPRuntime().emitTaskCall(
2010       *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
2011       CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
2012       FirstprivateCopies, FirstprivateInits, Dependences);
2013 }
2014 
2015 void CodeGenFunction::EmitOMPTaskyieldDirective(
2016     const OMPTaskyieldDirective &S) {
2017   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2018 }
2019 
2020 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2021   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2022 }
2023 
2024 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2025   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2026 }
2027 
2028 void CodeGenFunction::EmitOMPTaskgroupDirective(
2029     const OMPTaskgroupDirective &S) {
2030   LexicalScope Scope(*this, S.getSourceRange());
2031   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2032     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2033     CGF.EnsureInsertPoint();
2034   };
2035   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2036 }
2037 
2038 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2039   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2040     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2041       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2042                                 FlushClause->varlist_end());
2043     }
2044     return llvm::None;
2045   }(), S.getLocStart());
2046 }
2047 
2048 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
2049                                                    const CapturedStmt *S) {
2050   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
2051   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
2052   CGF.CapturedStmtInfo = &CapStmtInfo;
2053   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
2054   Fn->addFnAttr(llvm::Attribute::NoInline);
2055   return Fn;
2056 }
2057 
2058 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
2059   LexicalScope Scope(*this, S.getSourceRange());
2060   auto *C = S.getSingleClause<OMPSIMDClause>();
2061   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
2062     if (C) {
2063       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2064       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2065       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
2066       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
2067       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
2068     } else {
2069       CGF.EmitStmt(
2070           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2071     }
2072     CGF.EnsureInsertPoint();
2073   };
2074   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
2075 }
2076 
2077 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
2078                                          QualType SrcType, QualType DestType,
2079                                          SourceLocation Loc) {
2080   assert(CGF.hasScalarEvaluationKind(DestType) &&
2081          "DestType must have scalar evaluation kind.");
2082   assert(!Val.isAggregate() && "Must be a scalar or complex.");
2083   return Val.isScalar()
2084              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
2085                                         Loc)
2086              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
2087                                                  DestType, Loc);
2088 }
2089 
2090 static CodeGenFunction::ComplexPairTy
2091 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
2092                       QualType DestType, SourceLocation Loc) {
2093   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
2094          "DestType must have complex evaluation kind.");
2095   CodeGenFunction::ComplexPairTy ComplexVal;
2096   if (Val.isScalar()) {
2097     // Convert the input element to the element type of the complex.
2098     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2099     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
2100                                               DestElementType, Loc);
2101     ComplexVal = CodeGenFunction::ComplexPairTy(
2102         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
2103   } else {
2104     assert(Val.isComplex() && "Must be a scalar or complex.");
2105     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
2106     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2107     ComplexVal.first = CGF.EmitScalarConversion(
2108         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
2109     ComplexVal.second = CGF.EmitScalarConversion(
2110         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
2111   }
2112   return ComplexVal;
2113 }
2114 
2115 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
2116                                   LValue LVal, RValue RVal) {
2117   if (LVal.isGlobalReg()) {
2118     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
2119   } else {
2120     CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent
2121                                              : llvm::Monotonic,
2122                         LVal.isVolatile(), /*IsInit=*/false);
2123   }
2124 }
2125 
2126 static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal,
2127                             QualType RValTy, SourceLocation Loc) {
2128   switch (CGF.getEvaluationKind(LVal.getType())) {
2129   case TEK_Scalar:
2130     CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue(
2131                                    CGF, RVal, RValTy, LVal.getType(), Loc)),
2132                                LVal);
2133     break;
2134   case TEK_Complex:
2135     CGF.EmitStoreOfComplex(
2136         convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal,
2137         /*isInit=*/false);
2138     break;
2139   case TEK_Aggregate:
2140     llvm_unreachable("Must be a scalar or complex.");
2141   }
2142 }
2143 
2144 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
2145                                   const Expr *X, const Expr *V,
2146                                   SourceLocation Loc) {
2147   // v = x;
2148   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
2149   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
2150   LValue XLValue = CGF.EmitLValue(X);
2151   LValue VLValue = CGF.EmitLValue(V);
2152   RValue Res = XLValue.isGlobalReg()
2153                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
2154                    : CGF.EmitAtomicLoad(XLValue, Loc,
2155                                         IsSeqCst ? llvm::SequentiallyConsistent
2156                                                  : llvm::Monotonic,
2157                                         XLValue.isVolatile());
2158   // OpenMP, 2.12.6, atomic Construct
2159   // Any atomic construct with a seq_cst clause forces the atomically
2160   // performed operation to include an implicit flush operation without a
2161   // list.
2162   if (IsSeqCst)
2163     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2164   emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc);
2165 }
2166 
2167 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
2168                                    const Expr *X, const Expr *E,
2169                                    SourceLocation Loc) {
2170   // x = expr;
2171   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
2172   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
2173   // OpenMP, 2.12.6, atomic Construct
2174   // Any atomic construct with a seq_cst clause forces the atomically
2175   // performed operation to include an implicit flush operation without a
2176   // list.
2177   if (IsSeqCst)
2178     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2179 }
2180 
2181 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
2182                                                 RValue Update,
2183                                                 BinaryOperatorKind BO,
2184                                                 llvm::AtomicOrdering AO,
2185                                                 bool IsXLHSInRHSPart) {
2186   auto &Context = CGF.CGM.getContext();
2187   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
2188   // expression is simple and atomic is allowed for the given type for the
2189   // target platform.
2190   if (BO == BO_Comma || !Update.isScalar() ||
2191       !Update.getScalarVal()->getType()->isIntegerTy() ||
2192       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
2193                         (Update.getScalarVal()->getType() !=
2194                          X.getAddress().getElementType())) ||
2195       !X.getAddress().getElementType()->isIntegerTy() ||
2196       !Context.getTargetInfo().hasBuiltinAtomic(
2197           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
2198     return std::make_pair(false, RValue::get(nullptr));
2199 
2200   llvm::AtomicRMWInst::BinOp RMWOp;
2201   switch (BO) {
2202   case BO_Add:
2203     RMWOp = llvm::AtomicRMWInst::Add;
2204     break;
2205   case BO_Sub:
2206     if (!IsXLHSInRHSPart)
2207       return std::make_pair(false, RValue::get(nullptr));
2208     RMWOp = llvm::AtomicRMWInst::Sub;
2209     break;
2210   case BO_And:
2211     RMWOp = llvm::AtomicRMWInst::And;
2212     break;
2213   case BO_Or:
2214     RMWOp = llvm::AtomicRMWInst::Or;
2215     break;
2216   case BO_Xor:
2217     RMWOp = llvm::AtomicRMWInst::Xor;
2218     break;
2219   case BO_LT:
2220     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2221                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
2222                                    : llvm::AtomicRMWInst::Max)
2223                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
2224                                    : llvm::AtomicRMWInst::UMax);
2225     break;
2226   case BO_GT:
2227     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2228                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
2229                                    : llvm::AtomicRMWInst::Min)
2230                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
2231                                    : llvm::AtomicRMWInst::UMin);
2232     break;
2233   case BO_Assign:
2234     RMWOp = llvm::AtomicRMWInst::Xchg;
2235     break;
2236   case BO_Mul:
2237   case BO_Div:
2238   case BO_Rem:
2239   case BO_Shl:
2240   case BO_Shr:
2241   case BO_LAnd:
2242   case BO_LOr:
2243     return std::make_pair(false, RValue::get(nullptr));
2244   case BO_PtrMemD:
2245   case BO_PtrMemI:
2246   case BO_LE:
2247   case BO_GE:
2248   case BO_EQ:
2249   case BO_NE:
2250   case BO_AddAssign:
2251   case BO_SubAssign:
2252   case BO_AndAssign:
2253   case BO_OrAssign:
2254   case BO_XorAssign:
2255   case BO_MulAssign:
2256   case BO_DivAssign:
2257   case BO_RemAssign:
2258   case BO_ShlAssign:
2259   case BO_ShrAssign:
2260   case BO_Comma:
2261     llvm_unreachable("Unsupported atomic update operation");
2262   }
2263   auto *UpdateVal = Update.getScalarVal();
2264   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
2265     UpdateVal = CGF.Builder.CreateIntCast(
2266         IC, X.getAddress().getElementType(),
2267         X.getType()->hasSignedIntegerRepresentation());
2268   }
2269   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
2270   return std::make_pair(true, RValue::get(Res));
2271 }
2272 
2273 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
2274     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
2275     llvm::AtomicOrdering AO, SourceLocation Loc,
2276     const llvm::function_ref<RValue(RValue)> &CommonGen) {
2277   // Update expressions are allowed to have the following forms:
2278   // x binop= expr; -> xrval + expr;
2279   // x++, ++x -> xrval + 1;
2280   // x--, --x -> xrval - 1;
2281   // x = x binop expr; -> xrval binop expr
2282   // x = expr Op x; - > expr binop xrval;
2283   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
2284   if (!Res.first) {
2285     if (X.isGlobalReg()) {
2286       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
2287       // 'xrval'.
2288       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
2289     } else {
2290       // Perform compare-and-swap procedure.
2291       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
2292     }
2293   }
2294   return Res;
2295 }
2296 
2297 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
2298                                     const Expr *X, const Expr *E,
2299                                     const Expr *UE, bool IsXLHSInRHSPart,
2300                                     SourceLocation Loc) {
2301   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2302          "Update expr in 'atomic update' must be a binary operator.");
2303   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2304   // Update expressions are allowed to have the following forms:
2305   // x binop= expr; -> xrval + expr;
2306   // x++, ++x -> xrval + 1;
2307   // x--, --x -> xrval - 1;
2308   // x = x binop expr; -> xrval binop expr
2309   // x = expr Op x; - > expr binop xrval;
2310   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
2311   LValue XLValue = CGF.EmitLValue(X);
2312   RValue ExprRValue = CGF.EmitAnyExpr(E);
2313   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
2314   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2315   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2316   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2317   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2318   auto Gen =
2319       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
2320         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2321         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2322         return CGF.EmitAnyExpr(UE);
2323       };
2324   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
2325       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2326   // OpenMP, 2.12.6, atomic Construct
2327   // Any atomic construct with a seq_cst clause forces the atomically
2328   // performed operation to include an implicit flush operation without a
2329   // list.
2330   if (IsSeqCst)
2331     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2332 }
2333 
2334 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
2335                             QualType SourceType, QualType ResType,
2336                             SourceLocation Loc) {
2337   switch (CGF.getEvaluationKind(ResType)) {
2338   case TEK_Scalar:
2339     return RValue::get(
2340         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
2341   case TEK_Complex: {
2342     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
2343     return RValue::getComplex(Res.first, Res.second);
2344   }
2345   case TEK_Aggregate:
2346     break;
2347   }
2348   llvm_unreachable("Must be a scalar or complex.");
2349 }
2350 
2351 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
2352                                      bool IsPostfixUpdate, const Expr *V,
2353                                      const Expr *X, const Expr *E,
2354                                      const Expr *UE, bool IsXLHSInRHSPart,
2355                                      SourceLocation Loc) {
2356   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
2357   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
2358   RValue NewVVal;
2359   LValue VLValue = CGF.EmitLValue(V);
2360   LValue XLValue = CGF.EmitLValue(X);
2361   RValue ExprRValue = CGF.EmitAnyExpr(E);
2362   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
2363   QualType NewVValType;
2364   if (UE) {
2365     // 'x' is updated with some additional value.
2366     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2367            "Update expr in 'atomic capture' must be a binary operator.");
2368     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2369     // Update expressions are allowed to have the following forms:
2370     // x binop= expr; -> xrval + expr;
2371     // x++, ++x -> xrval + 1;
2372     // x--, --x -> xrval - 1;
2373     // x = x binop expr; -> xrval binop expr
2374     // x = expr Op x; - > expr binop xrval;
2375     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2376     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2377     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2378     NewVValType = XRValExpr->getType();
2379     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2380     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
2381                   IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
2382       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2383       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2384       RValue Res = CGF.EmitAnyExpr(UE);
2385       NewVVal = IsPostfixUpdate ? XRValue : Res;
2386       return Res;
2387     };
2388     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
2389         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2390     if (Res.first) {
2391       // 'atomicrmw' instruction was generated.
2392       if (IsPostfixUpdate) {
2393         // Use old value from 'atomicrmw'.
2394         NewVVal = Res.second;
2395       } else {
2396         // 'atomicrmw' does not provide new value, so evaluate it using old
2397         // value of 'x'.
2398         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2399         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
2400         NewVVal = CGF.EmitAnyExpr(UE);
2401       }
2402     }
2403   } else {
2404     // 'x' is simply rewritten with some 'expr'.
2405     NewVValType = X->getType().getNonReferenceType();
2406     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
2407                                X->getType().getNonReferenceType(), Loc);
2408     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
2409       NewVVal = XRValue;
2410       return ExprRValue;
2411     };
2412     // Try to perform atomicrmw xchg, otherwise simple exchange.
2413     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
2414         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
2415         Loc, Gen);
2416     if (Res.first) {
2417       // 'atomicrmw' instruction was generated.
2418       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
2419     }
2420   }
2421   // Emit post-update store to 'v' of old/new 'x' value.
2422   emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc);
2423   // OpenMP, 2.12.6, atomic Construct
2424   // Any atomic construct with a seq_cst clause forces the atomically
2425   // performed operation to include an implicit flush operation without a
2426   // list.
2427   if (IsSeqCst)
2428     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2429 }
2430 
2431 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
2432                               bool IsSeqCst, bool IsPostfixUpdate,
2433                               const Expr *X, const Expr *V, const Expr *E,
2434                               const Expr *UE, bool IsXLHSInRHSPart,
2435                               SourceLocation Loc) {
2436   switch (Kind) {
2437   case OMPC_read:
2438     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
2439     break;
2440   case OMPC_write:
2441     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
2442     break;
2443   case OMPC_unknown:
2444   case OMPC_update:
2445     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
2446     break;
2447   case OMPC_capture:
2448     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
2449                              IsXLHSInRHSPart, Loc);
2450     break;
2451   case OMPC_if:
2452   case OMPC_final:
2453   case OMPC_num_threads:
2454   case OMPC_private:
2455   case OMPC_firstprivate:
2456   case OMPC_lastprivate:
2457   case OMPC_reduction:
2458   case OMPC_safelen:
2459   case OMPC_simdlen:
2460   case OMPC_collapse:
2461   case OMPC_default:
2462   case OMPC_seq_cst:
2463   case OMPC_shared:
2464   case OMPC_linear:
2465   case OMPC_aligned:
2466   case OMPC_copyin:
2467   case OMPC_copyprivate:
2468   case OMPC_flush:
2469   case OMPC_proc_bind:
2470   case OMPC_schedule:
2471   case OMPC_ordered:
2472   case OMPC_nowait:
2473   case OMPC_untied:
2474   case OMPC_threadprivate:
2475   case OMPC_depend:
2476   case OMPC_mergeable:
2477   case OMPC_device:
2478   case OMPC_threads:
2479   case OMPC_simd:
2480   case OMPC_map:
2481   case OMPC_num_teams:
2482   case OMPC_thread_limit:
2483   case OMPC_priority:
2484   case OMPC_grainsize:
2485   case OMPC_nogroup:
2486   case OMPC_num_tasks:
2487     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
2488   }
2489 }
2490 
2491 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
2492   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
2493   OpenMPClauseKind Kind = OMPC_unknown;
2494   for (auto *C : S.clauses()) {
2495     // Find first clause (skip seq_cst clause, if it is first).
2496     if (C->getClauseKind() != OMPC_seq_cst) {
2497       Kind = C->getClauseKind();
2498       break;
2499     }
2500   }
2501 
2502   const auto *CS =
2503       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
2504   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
2505     enterFullExpression(EWC);
2506   }
2507   // Processing for statements under 'atomic capture'.
2508   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
2509     for (const auto *C : Compound->body()) {
2510       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
2511         enterFullExpression(EWC);
2512       }
2513     }
2514   }
2515 
2516   LexicalScope Scope(*this, S.getSourceRange());
2517   auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
2518     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
2519                       S.getV(), S.getExpr(), S.getUpdateExpr(),
2520                       S.isXLHSInRHSPart(), S.getLocStart());
2521   };
2522   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
2523 }
2524 
2525 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
2526   LexicalScope Scope(*this, S.getSourceRange());
2527   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
2528 
2529   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2530   GenerateOpenMPCapturedVars(CS, CapturedVars);
2531 
2532   // Emit target region as a standalone region.
2533   auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
2534     CGF.EmitStmt(CS.getCapturedStmt());
2535   };
2536 
2537   // Obtain the target region outlined function.
2538   llvm::Value *Fn =
2539       CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen);
2540 
2541   // Check if we have any if clause associated with the directive.
2542   const Expr *IfCond = nullptr;
2543 
2544   if (auto *C = S.getSingleClause<OMPIfClause>()) {
2545     IfCond = C->getCondition();
2546   }
2547 
2548   // Check if we have any device clause associated with the directive.
2549   const Expr *Device = nullptr;
2550   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
2551     Device = C->getDevice();
2552   }
2553 
2554   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device,
2555                                         CapturedVars);
2556 }
2557 
2558 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
2559   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
2560 }
2561 
2562 void CodeGenFunction::EmitOMPCancellationPointDirective(
2563     const OMPCancellationPointDirective &S) {
2564   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
2565                                                    S.getCancelRegion());
2566 }
2567 
2568 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
2569   const Expr *IfCond = nullptr;
2570   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2571     if (C->getNameModifier() == OMPD_unknown ||
2572         C->getNameModifier() == OMPD_cancel) {
2573       IfCond = C->getCondition();
2574       break;
2575     }
2576   }
2577   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
2578                                         S.getCancelRegion());
2579 }
2580 
2581 CodeGenFunction::JumpDest
2582 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
2583   if (Kind == OMPD_parallel || Kind == OMPD_task)
2584     return ReturnBlock;
2585   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
2586          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
2587   return BreakContinueStack.back().BreakBlock;
2588 }
2589 
2590 // Generate the instructions for '#pragma omp target data' directive.
2591 void CodeGenFunction::EmitOMPTargetDataDirective(
2592     const OMPTargetDataDirective &S) {
2593   // emit the code inside the construct for now
2594   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2595   CGM.getOpenMPRuntime().emitInlinedDirective(
2596       *this, OMPD_target_data,
2597       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
2598 }
2599 
2600 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
2601   // emit the code inside the construct for now
2602   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2603   CGM.getOpenMPRuntime().emitInlinedDirective(
2604       *this, OMPD_taskloop,
2605       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
2606 }
2607 
2608 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
2609     const OMPTaskLoopSimdDirective &S) {
2610   // emit the code inside the construct for now
2611   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2612   CGM.getOpenMPRuntime().emitInlinedDirective(
2613       *this, OMPD_taskloop_simd,
2614       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
2615 }
2616 
2617