1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit OpenMP nodes as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 using namespace clang;
22 using namespace CodeGen;
23 
24 namespace {
25 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
26 /// for captured expressions.
27 class OMPLexicalScope {
28   CodeGenFunction::LexicalScope Scope;
29   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
30     for (const auto *C : S.clauses()) {
31       if (auto *CPI = OMPClauseWithPreInit::get(C)) {
32         if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
33           for (const auto *I : PreInit->decls())
34             CGF.EmitVarDecl(cast<VarDecl>(*I));
35         }
36       }
37     }
38   }
39 
40   class PostUpdateCleanup final : public EHScopeStack::Cleanup {
41     const OMPExecutableDirective &S;
42 
43   public:
44     PostUpdateCleanup(const OMPExecutableDirective &S) : S(S) {}
45 
46     void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
47       if (!CGF.HaveInsertPoint())
48         return;
49       (void)S;
50       // TODO: add cleanups for clauses that require post update.
51     }
52   };
53 
54 public:
55   OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
56       : Scope(CGF, S.getSourceRange()) {
57     emitPreInitStmt(CGF, S);
58     CGF.EHStack.pushCleanup<PostUpdateCleanup>(NormalAndEHCleanup, S);
59   }
60 };
61 } // namespace
62 
63 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
64   auto &C = getContext();
65   llvm::Value *Size = nullptr;
66   auto SizeInChars = C.getTypeSizeInChars(Ty);
67   if (SizeInChars.isZero()) {
68     // getTypeSizeInChars() returns 0 for a VLA.
69     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
70       llvm::Value *ArraySize;
71       std::tie(ArraySize, Ty) = getVLASize(VAT);
72       Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
73     }
74     SizeInChars = C.getTypeSizeInChars(Ty);
75     if (SizeInChars.isZero())
76       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
77     Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
78   } else
79     Size = CGM.getSize(SizeInChars);
80   return Size;
81 }
82 
83 void CodeGenFunction::GenerateOpenMPCapturedVars(
84     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
85   const RecordDecl *RD = S.getCapturedRecordDecl();
86   auto CurField = RD->field_begin();
87   auto CurCap = S.captures().begin();
88   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
89                                                  E = S.capture_init_end();
90        I != E; ++I, ++CurField, ++CurCap) {
91     if (CurField->hasCapturedVLAType()) {
92       auto VAT = CurField->getCapturedVLAType();
93       auto *Val = VLASizeMap[VAT->getSizeExpr()];
94       CapturedVars.push_back(Val);
95     } else if (CurCap->capturesThis())
96       CapturedVars.push_back(CXXThisValue);
97     else if (CurCap->capturesVariableByCopy())
98       CapturedVars.push_back(
99           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
100     else {
101       assert(CurCap->capturesVariable() && "Expected capture by reference.");
102       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
103     }
104   }
105 }
106 
107 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
108                                     StringRef Name, LValue AddrLV,
109                                     bool isReferenceType = false) {
110   ASTContext &Ctx = CGF.getContext();
111 
112   auto *CastedPtr = CGF.EmitScalarConversion(
113       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
114       Ctx.getPointerType(DstType), SourceLocation());
115   auto TmpAddr =
116       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
117           .getAddress();
118 
119   // If we are dealing with references we need to return the address of the
120   // reference instead of the reference of the value.
121   if (isReferenceType) {
122     QualType RefType = Ctx.getLValueReferenceType(DstType);
123     auto *RefVal = TmpAddr.getPointer();
124     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
125     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
126     CGF.EmitScalarInit(RefVal, TmpLVal);
127   }
128 
129   return TmpAddr;
130 }
131 
132 llvm::Function *
133 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
134   assert(
135       CapturedStmtInfo &&
136       "CapturedStmtInfo should be set when generating the captured function");
137   const CapturedDecl *CD = S.getCapturedDecl();
138   const RecordDecl *RD = S.getCapturedRecordDecl();
139   assert(CD->hasBody() && "missing CapturedDecl body");
140 
141   // Build the argument list.
142   ASTContext &Ctx = CGM.getContext();
143   FunctionArgList Args;
144   Args.append(CD->param_begin(),
145               std::next(CD->param_begin(), CD->getContextParamPosition()));
146   auto I = S.captures().begin();
147   for (auto *FD : RD->fields()) {
148     QualType ArgType = FD->getType();
149     IdentifierInfo *II = nullptr;
150     VarDecl *CapVar = nullptr;
151 
152     // If this is a capture by copy and the type is not a pointer, the outlined
153     // function argument type should be uintptr and the value properly casted to
154     // uintptr. This is necessary given that the runtime library is only able to
155     // deal with pointers. We can pass in the same way the VLA type sizes to the
156     // outlined function.
157     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
158         I->capturesVariableArrayType())
159       ArgType = Ctx.getUIntPtrType();
160 
161     if (I->capturesVariable() || I->capturesVariableByCopy()) {
162       CapVar = I->getCapturedVar();
163       II = CapVar->getIdentifier();
164     } else if (I->capturesThis())
165       II = &getContext().Idents.get("this");
166     else {
167       assert(I->capturesVariableArrayType());
168       II = &getContext().Idents.get("vla");
169     }
170     if (ArgType->isVariablyModifiedType())
171       ArgType = getContext().getVariableArrayDecayedType(ArgType);
172     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
173                                              FD->getLocation(), II, ArgType));
174     ++I;
175   }
176   Args.append(
177       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
178       CD->param_end());
179 
180   // Create the function declaration.
181   FunctionType::ExtInfo ExtInfo;
182   const CGFunctionInfo &FuncInfo =
183       CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
184                                                     /*IsVariadic=*/false);
185   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
186 
187   llvm::Function *F = llvm::Function::Create(
188       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
189       CapturedStmtInfo->getHelperName(), &CGM.getModule());
190   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
191   if (CD->isNothrow())
192     F->addFnAttr(llvm::Attribute::NoUnwind);
193 
194   // Generate the function.
195   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
196                 CD->getBody()->getLocStart());
197   unsigned Cnt = CD->getContextParamPosition();
198   I = S.captures().begin();
199   for (auto *FD : RD->fields()) {
200     // If we are capturing a pointer by copy we don't need to do anything, just
201     // use the value that we get from the arguments.
202     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
203       setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
204       ++Cnt;
205       ++I;
206       continue;
207     }
208 
209     LValue ArgLVal =
210         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
211                        AlignmentSource::Decl);
212     if (FD->hasCapturedVLAType()) {
213       LValue CastedArgLVal =
214           MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
215                                               Args[Cnt]->getName(), ArgLVal),
216                          FD->getType(), AlignmentSource::Decl);
217       auto *ExprArg =
218           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
219       auto VAT = FD->getCapturedVLAType();
220       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
221     } else if (I->capturesVariable()) {
222       auto *Var = I->getCapturedVar();
223       QualType VarTy = Var->getType();
224       Address ArgAddr = ArgLVal.getAddress();
225       if (!VarTy->isReferenceType()) {
226         ArgAddr = EmitLoadOfReference(
227             ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
228       }
229       setAddrOfLocalVar(
230           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
231     } else if (I->capturesVariableByCopy()) {
232       assert(!FD->getType()->isAnyPointerType() &&
233              "Not expecting a captured pointer.");
234       auto *Var = I->getCapturedVar();
235       QualType VarTy = Var->getType();
236       setAddrOfLocalVar(I->getCapturedVar(),
237                         castValueFromUintptr(*this, FD->getType(),
238                                              Args[Cnt]->getName(), ArgLVal,
239                                              VarTy->isReferenceType()));
240     } else {
241       // If 'this' is captured, load it into CXXThisValue.
242       assert(I->capturesThis());
243       CXXThisValue =
244           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
245     }
246     ++Cnt;
247     ++I;
248   }
249 
250   PGO.assignRegionCounters(GlobalDecl(CD), F);
251   CapturedStmtInfo->EmitBody(*this, CD->getBody());
252   FinishFunction(CD->getBodyRBrace());
253 
254   return F;
255 }
256 
257 //===----------------------------------------------------------------------===//
258 //                              OpenMP Directive Emission
259 //===----------------------------------------------------------------------===//
260 void CodeGenFunction::EmitOMPAggregateAssign(
261     Address DestAddr, Address SrcAddr, QualType OriginalType,
262     const llvm::function_ref<void(Address, Address)> &CopyGen) {
263   // Perform element-by-element initialization.
264   QualType ElementTy;
265 
266   // Drill down to the base element type on both arrays.
267   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
268   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
269   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
270 
271   auto SrcBegin = SrcAddr.getPointer();
272   auto DestBegin = DestAddr.getPointer();
273   // Cast from pointer to array type to pointer to single element.
274   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
275   // The basic structure here is a while-do loop.
276   auto BodyBB = createBasicBlock("omp.arraycpy.body");
277   auto DoneBB = createBasicBlock("omp.arraycpy.done");
278   auto IsEmpty =
279       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
280   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
281 
282   // Enter the loop body, making that address the current address.
283   auto EntryBB = Builder.GetInsertBlock();
284   EmitBlock(BodyBB);
285 
286   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
287 
288   llvm::PHINode *SrcElementPHI =
289     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
290   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
291   Address SrcElementCurrent =
292       Address(SrcElementPHI,
293               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
294 
295   llvm::PHINode *DestElementPHI =
296     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
297   DestElementPHI->addIncoming(DestBegin, EntryBB);
298   Address DestElementCurrent =
299     Address(DestElementPHI,
300             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
301 
302   // Emit copy.
303   CopyGen(DestElementCurrent, SrcElementCurrent);
304 
305   // Shift the address forward by one element.
306   auto DestElementNext = Builder.CreateConstGEP1_32(
307       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
308   auto SrcElementNext = Builder.CreateConstGEP1_32(
309       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
310   // Check whether we've reached the end.
311   auto Done =
312       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
313   Builder.CreateCondBr(Done, DoneBB, BodyBB);
314   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
315   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
316 
317   // Done.
318   EmitBlock(DoneBB, /*IsFinished=*/true);
319 }
320 
321 /// \brief Emit initialization of arrays of complex types.
322 /// \param DestAddr Address of the array.
323 /// \param Type Type of array.
324 /// \param Init Initial expression of array.
325 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
326                                  QualType Type, const Expr *Init) {
327   // Perform element-by-element initialization.
328   QualType ElementTy;
329 
330   // Drill down to the base element type on both arrays.
331   auto ArrayTy = Type->getAsArrayTypeUnsafe();
332   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
333   DestAddr =
334       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
335 
336   auto DestBegin = DestAddr.getPointer();
337   // Cast from pointer to array type to pointer to single element.
338   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
339   // The basic structure here is a while-do loop.
340   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
341   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
342   auto IsEmpty =
343       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
344   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
345 
346   // Enter the loop body, making that address the current address.
347   auto EntryBB = CGF.Builder.GetInsertBlock();
348   CGF.EmitBlock(BodyBB);
349 
350   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
351 
352   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
353       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
354   DestElementPHI->addIncoming(DestBegin, EntryBB);
355   Address DestElementCurrent =
356       Address(DestElementPHI,
357               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
358 
359   // Emit copy.
360   {
361     CodeGenFunction::RunCleanupsScope InitScope(CGF);
362     CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
363                          /*IsInitializer=*/false);
364   }
365 
366   // Shift the address forward by one element.
367   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
368       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
369   // Check whether we've reached the end.
370   auto Done =
371       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
372   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
373   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
374 
375   // Done.
376   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
377 }
378 
379 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
380                                   Address SrcAddr, const VarDecl *DestVD,
381                                   const VarDecl *SrcVD, const Expr *Copy) {
382   if (OriginalType->isArrayType()) {
383     auto *BO = dyn_cast<BinaryOperator>(Copy);
384     if (BO && BO->getOpcode() == BO_Assign) {
385       // Perform simple memcpy for simple copying.
386       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
387     } else {
388       // For arrays with complex element types perform element by element
389       // copying.
390       EmitOMPAggregateAssign(
391           DestAddr, SrcAddr, OriginalType,
392           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
393             // Working with the single array element, so have to remap
394             // destination and source variables to corresponding array
395             // elements.
396             CodeGenFunction::OMPPrivateScope Remap(*this);
397             Remap.addPrivate(DestVD, [DestElement]() -> Address {
398               return DestElement;
399             });
400             Remap.addPrivate(
401                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
402             (void)Remap.Privatize();
403             EmitIgnoredExpr(Copy);
404           });
405     }
406   } else {
407     // Remap pseudo source variable to private copy.
408     CodeGenFunction::OMPPrivateScope Remap(*this);
409     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
410     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
411     (void)Remap.Privatize();
412     // Emit copying of the whole variable.
413     EmitIgnoredExpr(Copy);
414   }
415 }
416 
417 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
418                                                 OMPPrivateScope &PrivateScope) {
419   if (!HaveInsertPoint())
420     return false;
421   bool FirstprivateIsLastprivate = false;
422   llvm::DenseSet<const VarDecl *> Lastprivates;
423   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
424     for (const auto *D : C->varlists())
425       Lastprivates.insert(
426           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
427   }
428   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
429   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
430     auto IRef = C->varlist_begin();
431     auto InitsRef = C->inits().begin();
432     for (auto IInit : C->private_copies()) {
433       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
434       FirstprivateIsLastprivate =
435           FirstprivateIsLastprivate ||
436           (Lastprivates.count(OrigVD->getCanonicalDecl()) > 0);
437       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
438         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
439         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
440         bool IsRegistered;
441         DeclRefExpr DRE(
442             const_cast<VarDecl *>(OrigVD),
443             /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
444                 OrigVD) != nullptr,
445             (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
446         Address OriginalAddr = EmitLValue(&DRE).getAddress();
447         QualType Type = OrigVD->getType();
448         if (Type->isArrayType()) {
449           // Emit VarDecl with copy init for arrays.
450           // Get the address of the original variable captured in current
451           // captured region.
452           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
453             auto Emission = EmitAutoVarAlloca(*VD);
454             auto *Init = VD->getInit();
455             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
456               // Perform simple memcpy.
457               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
458                                   Type);
459             } else {
460               EmitOMPAggregateAssign(
461                   Emission.getAllocatedAddress(), OriginalAddr, Type,
462                   [this, VDInit, Init](Address DestElement,
463                                        Address SrcElement) {
464                     // Clean up any temporaries needed by the initialization.
465                     RunCleanupsScope InitScope(*this);
466                     // Emit initialization for single element.
467                     setAddrOfLocalVar(VDInit, SrcElement);
468                     EmitAnyExprToMem(Init, DestElement,
469                                      Init->getType().getQualifiers(),
470                                      /*IsInitializer*/ false);
471                     LocalDeclMap.erase(VDInit);
472                   });
473             }
474             EmitAutoVarCleanups(Emission);
475             return Emission.getAllocatedAddress();
476           });
477         } else {
478           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
479             // Emit private VarDecl with copy init.
480             // Remap temp VDInit variable to the address of the original
481             // variable
482             // (for proper handling of captured global variables).
483             setAddrOfLocalVar(VDInit, OriginalAddr);
484             EmitDecl(*VD);
485             LocalDeclMap.erase(VDInit);
486             return GetAddrOfLocalVar(VD);
487           });
488         }
489         assert(IsRegistered &&
490                "firstprivate var already registered as private");
491         // Silence the warning about unused variable.
492         (void)IsRegistered;
493       }
494       ++IRef;
495       ++InitsRef;
496     }
497   }
498   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
499 }
500 
501 void CodeGenFunction::EmitOMPPrivateClause(
502     const OMPExecutableDirective &D,
503     CodeGenFunction::OMPPrivateScope &PrivateScope) {
504   if (!HaveInsertPoint())
505     return;
506   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
507   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
508     auto IRef = C->varlist_begin();
509     for (auto IInit : C->private_copies()) {
510       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
511       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
512         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
513         bool IsRegistered =
514             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
515               // Emit private VarDecl with copy init.
516               EmitDecl(*VD);
517               return GetAddrOfLocalVar(VD);
518             });
519         assert(IsRegistered && "private var already registered as private");
520         // Silence the warning about unused variable.
521         (void)IsRegistered;
522       }
523       ++IRef;
524     }
525   }
526 }
527 
528 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
529   if (!HaveInsertPoint())
530     return false;
531   // threadprivate_var1 = master_threadprivate_var1;
532   // operator=(threadprivate_var2, master_threadprivate_var2);
533   // ...
534   // __kmpc_barrier(&loc, global_tid);
535   llvm::DenseSet<const VarDecl *> CopiedVars;
536   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
537   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
538     auto IRef = C->varlist_begin();
539     auto ISrcRef = C->source_exprs().begin();
540     auto IDestRef = C->destination_exprs().begin();
541     for (auto *AssignOp : C->assignment_ops()) {
542       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
543       QualType Type = VD->getType();
544       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
545         // Get the address of the master variable. If we are emitting code with
546         // TLS support, the address is passed from the master as field in the
547         // captured declaration.
548         Address MasterAddr = Address::invalid();
549         if (getLangOpts().OpenMPUseTLS &&
550             getContext().getTargetInfo().isTLSSupported()) {
551           assert(CapturedStmtInfo->lookup(VD) &&
552                  "Copyin threadprivates should have been captured!");
553           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
554                           VK_LValue, (*IRef)->getExprLoc());
555           MasterAddr = EmitLValue(&DRE).getAddress();
556           LocalDeclMap.erase(VD);
557         } else {
558           MasterAddr =
559             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
560                                         : CGM.GetAddrOfGlobal(VD),
561                     getContext().getDeclAlign(VD));
562         }
563         // Get the address of the threadprivate variable.
564         Address PrivateAddr = EmitLValue(*IRef).getAddress();
565         if (CopiedVars.size() == 1) {
566           // At first check if current thread is a master thread. If it is, no
567           // need to copy data.
568           CopyBegin = createBasicBlock("copyin.not.master");
569           CopyEnd = createBasicBlock("copyin.not.master.end");
570           Builder.CreateCondBr(
571               Builder.CreateICmpNE(
572                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
573                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
574               CopyBegin, CopyEnd);
575           EmitBlock(CopyBegin);
576         }
577         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
578         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
579         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
580       }
581       ++IRef;
582       ++ISrcRef;
583       ++IDestRef;
584     }
585   }
586   if (CopyEnd) {
587     // Exit out of copying procedure for non-master thread.
588     EmitBlock(CopyEnd, /*IsFinished=*/true);
589     return true;
590   }
591   return false;
592 }
593 
594 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
595     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
596   if (!HaveInsertPoint())
597     return false;
598   bool HasAtLeastOneLastprivate = false;
599   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
600   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
601     HasAtLeastOneLastprivate = true;
602     auto IRef = C->varlist_begin();
603     auto IDestRef = C->destination_exprs().begin();
604     for (auto *IInit : C->private_copies()) {
605       // Keep the address of the original variable for future update at the end
606       // of the loop.
607       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
608       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
609         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
610         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
611           DeclRefExpr DRE(
612               const_cast<VarDecl *>(OrigVD),
613               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
614                   OrigVD) != nullptr,
615               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
616           return EmitLValue(&DRE).getAddress();
617         });
618         // Check if the variable is also a firstprivate: in this case IInit is
619         // not generated. Initialization of this variable will happen in codegen
620         // for 'firstprivate' clause.
621         if (IInit) {
622           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
623           bool IsRegistered =
624               PrivateScope.addPrivate(OrigVD, [&]() -> Address {
625                 // Emit private VarDecl with copy init.
626                 EmitDecl(*VD);
627                 return GetAddrOfLocalVar(VD);
628               });
629           assert(IsRegistered &&
630                  "lastprivate var already registered as private");
631           (void)IsRegistered;
632         }
633       }
634       ++IRef;
635       ++IDestRef;
636     }
637   }
638   return HasAtLeastOneLastprivate;
639 }
640 
641 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
642     const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
643   if (!HaveInsertPoint())
644     return;
645   // Emit following code:
646   // if (<IsLastIterCond>) {
647   //   orig_var1 = private_orig_var1;
648   //   ...
649   //   orig_varn = private_orig_varn;
650   // }
651   llvm::BasicBlock *ThenBB = nullptr;
652   llvm::BasicBlock *DoneBB = nullptr;
653   if (IsLastIterCond) {
654     ThenBB = createBasicBlock(".omp.lastprivate.then");
655     DoneBB = createBasicBlock(".omp.lastprivate.done");
656     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
657     EmitBlock(ThenBB);
658   }
659   llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
660   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
661     auto IC = LoopDirective->counters().begin();
662     for (auto F : LoopDirective->finals()) {
663       auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl();
664       LoopCountersAndUpdates[D] = F;
665       ++IC;
666     }
667   }
668   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
669   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
670     auto IRef = C->varlist_begin();
671     auto ISrcRef = C->source_exprs().begin();
672     auto IDestRef = C->destination_exprs().begin();
673     for (auto *AssignOp : C->assignment_ops()) {
674       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
675       QualType Type = PrivateVD->getType();
676       auto *CanonicalVD = PrivateVD->getCanonicalDecl();
677       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
678         // If lastprivate variable is a loop control variable for loop-based
679         // directive, update its value before copyin back to original
680         // variable.
681         if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
682           EmitIgnoredExpr(UpExpr);
683         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
684         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
685         // Get the address of the original variable.
686         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
687         // Get the address of the private variable.
688         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
689         if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
690           PrivateAddr =
691               Address(Builder.CreateLoad(PrivateAddr),
692                       getNaturalTypeAlignment(RefTy->getPointeeType()));
693         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
694       }
695       ++IRef;
696       ++ISrcRef;
697       ++IDestRef;
698     }
699   }
700   if (IsLastIterCond)
701     EmitBlock(DoneBB, /*IsFinished=*/true);
702 }
703 
704 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
705                           LValue BaseLV, llvm::Value *Addr) {
706   Address Tmp = Address::invalid();
707   Address TopTmp = Address::invalid();
708   Address MostTopTmp = Address::invalid();
709   BaseTy = BaseTy.getNonReferenceType();
710   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
711          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
712     Tmp = CGF.CreateMemTemp(BaseTy);
713     if (TopTmp.isValid())
714       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
715     else
716       MostTopTmp = Tmp;
717     TopTmp = Tmp;
718     BaseTy = BaseTy->getPointeeType();
719   }
720   llvm::Type *Ty = BaseLV.getPointer()->getType();
721   if (Tmp.isValid())
722     Ty = Tmp.getElementType();
723   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
724   if (Tmp.isValid()) {
725     CGF.Builder.CreateStore(Addr, Tmp);
726     return MostTopTmp;
727   }
728   return Address(Addr, BaseLV.getAlignment());
729 }
730 
731 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
732                           LValue BaseLV) {
733   BaseTy = BaseTy.getNonReferenceType();
734   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
735          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
736     if (auto *PtrTy = BaseTy->getAs<PointerType>())
737       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
738     else {
739       BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
740                                              BaseTy->castAs<ReferenceType>());
741     }
742     BaseTy = BaseTy->getPointeeType();
743   }
744   return CGF.MakeAddrLValue(
745       Address(
746           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
747               BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
748           BaseLV.getAlignment()),
749       BaseLV.getType(), BaseLV.getAlignmentSource());
750 }
751 
752 void CodeGenFunction::EmitOMPReductionClauseInit(
753     const OMPExecutableDirective &D,
754     CodeGenFunction::OMPPrivateScope &PrivateScope) {
755   if (!HaveInsertPoint())
756     return;
757   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
758     auto ILHS = C->lhs_exprs().begin();
759     auto IRHS = C->rhs_exprs().begin();
760     auto IPriv = C->privates().begin();
761     for (auto IRef : C->varlists()) {
762       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
763       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
764       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
765       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
766         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
767         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
768           Base = TempOASE->getBase()->IgnoreParenImpCasts();
769         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
770           Base = TempASE->getBase()->IgnoreParenImpCasts();
771         auto *DE = cast<DeclRefExpr>(Base);
772         auto *OrigVD = cast<VarDecl>(DE->getDecl());
773         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
774         auto OASELValueUB =
775             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
776         auto OriginalBaseLValue = EmitLValue(DE);
777         LValue BaseLValue =
778             loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
779                         OriginalBaseLValue);
780         // Store the address of the original variable associated with the LHS
781         // implicit variable.
782         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
783           return OASELValueLB.getAddress();
784         });
785         // Emit reduction copy.
786         bool IsRegistered = PrivateScope.addPrivate(
787             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
788                      OASELValueUB, OriginalBaseLValue]() -> Address {
789               // Emit VarDecl with copy init for arrays.
790               // Get the address of the original variable captured in current
791               // captured region.
792               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
793                                                  OASELValueLB.getPointer());
794               Size = Builder.CreateNUWAdd(
795                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
796               CodeGenFunction::OpaqueValueMapping OpaqueMap(
797                   *this, cast<OpaqueValueExpr>(
798                              getContext()
799                                  .getAsVariableArrayType(PrivateVD->getType())
800                                  ->getSizeExpr()),
801                   RValue::get(Size));
802               EmitVariablyModifiedType(PrivateVD->getType());
803               auto Emission = EmitAutoVarAlloca(*PrivateVD);
804               auto Addr = Emission.getAllocatedAddress();
805               auto *Init = PrivateVD->getInit();
806               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
807               EmitAutoVarCleanups(Emission);
808               // Emit private VarDecl with reduction init.
809               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
810                                                    OASELValueLB.getPointer());
811               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
812               return castToBase(*this, OrigVD->getType(),
813                                 OASELValueLB.getType(), OriginalBaseLValue,
814                                 Ptr);
815             });
816         assert(IsRegistered && "private var already registered as private");
817         // Silence the warning about unused variable.
818         (void)IsRegistered;
819         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
820           return GetAddrOfLocalVar(PrivateVD);
821         });
822       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
823         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
824         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
825           Base = TempASE->getBase()->IgnoreParenImpCasts();
826         auto *DE = cast<DeclRefExpr>(Base);
827         auto *OrigVD = cast<VarDecl>(DE->getDecl());
828         auto ASELValue = EmitLValue(ASE);
829         auto OriginalBaseLValue = EmitLValue(DE);
830         LValue BaseLValue = loadToBegin(
831             *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
832         // Store the address of the original variable associated with the LHS
833         // implicit variable.
834         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
835           return ASELValue.getAddress();
836         });
837         // Emit reduction copy.
838         bool IsRegistered = PrivateScope.addPrivate(
839             OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
840                      OriginalBaseLValue]() -> Address {
841               // Emit private VarDecl with reduction init.
842               EmitDecl(*PrivateVD);
843               auto Addr = GetAddrOfLocalVar(PrivateVD);
844               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
845                                                    ASELValue.getPointer());
846               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
847               return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
848                                 OriginalBaseLValue, Ptr);
849             });
850         assert(IsRegistered && "private var already registered as private");
851         // Silence the warning about unused variable.
852         (void)IsRegistered;
853         PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
854           return Builder.CreateElementBitCast(
855               GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
856               "rhs.begin");
857         });
858       } else {
859         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
860         QualType Type = PrivateVD->getType();
861         if (getContext().getAsArrayType(Type)) {
862           // Store the address of the original variable associated with the LHS
863           // implicit variable.
864           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
865                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
866                           IRef->getType(), VK_LValue, IRef->getExprLoc());
867           Address OriginalAddr = EmitLValue(&DRE).getAddress();
868           PrivateScope.addPrivate(LHSVD, [this, OriginalAddr,
869                                           LHSVD]() -> Address {
870             return Builder.CreateElementBitCast(
871                 OriginalAddr, ConvertTypeForMem(LHSVD->getType()),
872                 "lhs.begin");
873           });
874           bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
875             if (Type->isVariablyModifiedType()) {
876               CodeGenFunction::OpaqueValueMapping OpaqueMap(
877                   *this, cast<OpaqueValueExpr>(
878                              getContext()
879                                  .getAsVariableArrayType(PrivateVD->getType())
880                                  ->getSizeExpr()),
881                   RValue::get(
882                       getTypeSize(OrigVD->getType().getNonReferenceType())));
883               EmitVariablyModifiedType(Type);
884             }
885             auto Emission = EmitAutoVarAlloca(*PrivateVD);
886             auto Addr = Emission.getAllocatedAddress();
887             auto *Init = PrivateVD->getInit();
888             EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
889             EmitAutoVarCleanups(Emission);
890             return Emission.getAllocatedAddress();
891           });
892           assert(IsRegistered && "private var already registered as private");
893           // Silence the warning about unused variable.
894           (void)IsRegistered;
895           PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
896             return Builder.CreateElementBitCast(
897                 GetAddrOfLocalVar(PrivateVD),
898                 ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
899           });
900         } else {
901           // Store the address of the original variable associated with the LHS
902           // implicit variable.
903           PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address {
904             DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
905                             CapturedStmtInfo->lookup(OrigVD) != nullptr,
906                             IRef->getType(), VK_LValue, IRef->getExprLoc());
907             return EmitLValue(&DRE).getAddress();
908           });
909           // Emit reduction copy.
910           bool IsRegistered =
911               PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address {
912                 // Emit private VarDecl with reduction init.
913                 EmitDecl(*PrivateVD);
914                 return GetAddrOfLocalVar(PrivateVD);
915               });
916           assert(IsRegistered && "private var already registered as private");
917           // Silence the warning about unused variable.
918           (void)IsRegistered;
919           PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
920             return GetAddrOfLocalVar(PrivateVD);
921           });
922         }
923       }
924       ++ILHS;
925       ++IRHS;
926       ++IPriv;
927     }
928   }
929 }
930 
931 void CodeGenFunction::EmitOMPReductionClauseFinal(
932     const OMPExecutableDirective &D) {
933   if (!HaveInsertPoint())
934     return;
935   llvm::SmallVector<const Expr *, 8> Privates;
936   llvm::SmallVector<const Expr *, 8> LHSExprs;
937   llvm::SmallVector<const Expr *, 8> RHSExprs;
938   llvm::SmallVector<const Expr *, 8> ReductionOps;
939   bool HasAtLeastOneReduction = false;
940   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
941     HasAtLeastOneReduction = true;
942     Privates.append(C->privates().begin(), C->privates().end());
943     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
944     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
945     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
946   }
947   if (HasAtLeastOneReduction) {
948     // Emit nowait reduction if nowait clause is present or directive is a
949     // parallel directive (it always has implicit barrier).
950     CGM.getOpenMPRuntime().emitReduction(
951         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
952         D.getSingleClause<OMPNowaitClause>() ||
953             isOpenMPParallelDirective(D.getDirectiveKind()) ||
954             D.getDirectiveKind() == OMPD_simd,
955         D.getDirectiveKind() == OMPD_simd);
956   }
957 }
958 
959 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
960                                            const OMPExecutableDirective &S,
961                                            OpenMPDirectiveKind InnermostKind,
962                                            const RegionCodeGenTy &CodeGen) {
963   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
964   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
965   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
966   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
967       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
968   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
969     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
970     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
971                                          /*IgnoreResultAssign*/ true);
972     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
973         CGF, NumThreads, NumThreadsClause->getLocStart());
974   }
975   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
976     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
977     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
978         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
979   }
980   const Expr *IfCond = nullptr;
981   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
982     if (C->getNameModifier() == OMPD_unknown ||
983         C->getNameModifier() == OMPD_parallel) {
984       IfCond = C->getCondition();
985       break;
986     }
987   }
988   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
989                                               CapturedVars, IfCond);
990 }
991 
992 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
993   OMPLexicalScope Scope(*this, S);
994   // Emit parallel region as a standalone region.
995   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
996     OMPPrivateScope PrivateScope(CGF);
997     bool Copyins = CGF.EmitOMPCopyinClause(S);
998     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
999     if (Copyins) {
1000       // Emit implicit barrier to synchronize threads and avoid data races on
1001       // propagation master's thread values of threadprivate variables to local
1002       // instances of that variables of all other implicit threads.
1003       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1004           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1005           /*ForceSimpleCall=*/true);
1006     }
1007     CGF.EmitOMPPrivateClause(S, PrivateScope);
1008     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1009     (void)PrivateScope.Privatize();
1010     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1011     CGF.EmitOMPReductionClauseFinal(S);
1012   };
1013   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
1014 }
1015 
1016 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1017                                       JumpDest LoopExit) {
1018   RunCleanupsScope BodyScope(*this);
1019   // Update counters values on current iteration.
1020   for (auto I : D.updates()) {
1021     EmitIgnoredExpr(I);
1022   }
1023   // Update the linear variables.
1024   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1025     for (auto U : C->updates()) {
1026       EmitIgnoredExpr(U);
1027     }
1028   }
1029 
1030   // On a continue in the body, jump to the end.
1031   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1032   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1033   // Emit loop body.
1034   EmitStmt(D.getBody());
1035   // The end (updates/cleanups).
1036   EmitBlock(Continue.getBlock());
1037   BreakContinueStack.pop_back();
1038 }
1039 
1040 void CodeGenFunction::EmitOMPInnerLoop(
1041     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1042     const Expr *IncExpr,
1043     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1044     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1045   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1046 
1047   // Start the loop with a block that tests the condition.
1048   auto CondBlock = createBasicBlock("omp.inner.for.cond");
1049   EmitBlock(CondBlock);
1050   LoopStack.push(CondBlock);
1051 
1052   // If there are any cleanups between here and the loop-exit scope,
1053   // create a block to stage a loop exit along.
1054   auto ExitBlock = LoopExit.getBlock();
1055   if (RequiresCleanup)
1056     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1057 
1058   auto LoopBody = createBasicBlock("omp.inner.for.body");
1059 
1060   // Emit condition.
1061   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1062   if (ExitBlock != LoopExit.getBlock()) {
1063     EmitBlock(ExitBlock);
1064     EmitBranchThroughCleanup(LoopExit);
1065   }
1066 
1067   EmitBlock(LoopBody);
1068   incrementProfileCounter(&S);
1069 
1070   // Create a block for the increment.
1071   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1072   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1073 
1074   BodyGen(*this);
1075 
1076   // Emit "IV = IV + 1" and a back-edge to the condition block.
1077   EmitBlock(Continue.getBlock());
1078   EmitIgnoredExpr(IncExpr);
1079   PostIncGen(*this);
1080   BreakContinueStack.pop_back();
1081   EmitBranch(CondBlock);
1082   LoopStack.pop();
1083   // Emit the fall-through block.
1084   EmitBlock(LoopExit.getBlock());
1085 }
1086 
1087 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1088   if (!HaveInsertPoint())
1089     return;
1090   // Emit inits for the linear variables.
1091   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1092     for (auto Init : C->inits()) {
1093       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1094       auto *OrigVD = cast<VarDecl>(
1095           cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl());
1096       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1097                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1098                       VD->getInit()->getType(), VK_LValue,
1099                       VD->getInit()->getExprLoc());
1100       AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1101       EmitExprAsInit(&DRE, VD,
1102                MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
1103                      /*capturedByInit=*/false);
1104       EmitAutoVarCleanups(Emission);
1105     }
1106     // Emit the linear steps for the linear clauses.
1107     // If a step is not constant, it is pre-calculated before the loop.
1108     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1109       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1110         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1111         // Emit calculation of the linear step.
1112         EmitIgnoredExpr(CS);
1113       }
1114   }
1115 }
1116 
1117 static void emitLinearClauseFinal(CodeGenFunction &CGF,
1118                                   const OMPLoopDirective &D) {
1119   if (!CGF.HaveInsertPoint())
1120     return;
1121   // Emit the final values of the linear variables.
1122   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1123     auto IC = C->varlist_begin();
1124     for (auto F : C->finals()) {
1125       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1126       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1127                       CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
1128                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1129       Address OrigAddr = CGF.EmitLValue(&DRE).getAddress();
1130       CodeGenFunction::OMPPrivateScope VarScope(CGF);
1131       VarScope.addPrivate(OrigVD,
1132                           [OrigAddr]() -> Address { return OrigAddr; });
1133       (void)VarScope.Privatize();
1134       CGF.EmitIgnoredExpr(F);
1135       ++IC;
1136     }
1137   }
1138 }
1139 
1140 static void emitAlignedClause(CodeGenFunction &CGF,
1141                               const OMPExecutableDirective &D) {
1142   if (!CGF.HaveInsertPoint())
1143     return;
1144   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1145     unsigned ClauseAlignment = 0;
1146     if (auto AlignmentExpr = Clause->getAlignment()) {
1147       auto AlignmentCI =
1148           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1149       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1150     }
1151     for (auto E : Clause->varlists()) {
1152       unsigned Alignment = ClauseAlignment;
1153       if (Alignment == 0) {
1154         // OpenMP [2.8.1, Description]
1155         // If no optional parameter is specified, implementation-defined default
1156         // alignments for SIMD instructions on the target platforms are assumed.
1157         Alignment =
1158             CGF.getContext()
1159                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1160                     E->getType()->getPointeeType()))
1161                 .getQuantity();
1162       }
1163       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1164              "alignment is not power of 2");
1165       if (Alignment != 0) {
1166         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1167         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1168       }
1169     }
1170   }
1171 }
1172 
1173 static void emitPrivateLoopCounters(CodeGenFunction &CGF,
1174                                     CodeGenFunction::OMPPrivateScope &LoopScope,
1175                                     ArrayRef<Expr *> Counters,
1176                                     ArrayRef<Expr *> PrivateCounters) {
1177   if (!CGF.HaveInsertPoint())
1178     return;
1179   auto I = PrivateCounters.begin();
1180   for (auto *E : Counters) {
1181     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1182     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1183     Address Addr = Address::invalid();
1184     (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1185       // Emit var without initialization.
1186       auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD);
1187       CGF.EmitAutoVarCleanups(VarEmission);
1188       Addr = VarEmission.getAllocatedAddress();
1189       return Addr;
1190     });
1191     (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; });
1192     ++I;
1193   }
1194 }
1195 
1196 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1197                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
1198                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1199   if (!CGF.HaveInsertPoint())
1200     return;
1201   {
1202     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1203     emitPrivateLoopCounters(CGF, PreCondScope, S.counters(),
1204                             S.private_counters());
1205     (void)PreCondScope.Privatize();
1206     // Get initial values of real counters.
1207     for (auto I : S.inits()) {
1208       CGF.EmitIgnoredExpr(I);
1209     }
1210   }
1211   // Check that loop is executed at least one time.
1212   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1213 }
1214 
1215 static void
1216 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
1217                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
1218   if (!CGF.HaveInsertPoint())
1219     return;
1220   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1221     auto CurPrivate = C->privates().begin();
1222     for (auto *E : C->varlists()) {
1223       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1224       auto *PrivateVD =
1225           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1226       bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1227         // Emit private VarDecl with copy init.
1228         CGF.EmitVarDecl(*PrivateVD);
1229         return CGF.GetAddrOfLocalVar(PrivateVD);
1230       });
1231       assert(IsRegistered && "linear var already registered as private");
1232       // Silence the warning about unused variable.
1233       (void)IsRegistered;
1234       ++CurPrivate;
1235     }
1236   }
1237 }
1238 
1239 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1240                                      const OMPExecutableDirective &D,
1241                                      bool IsMonotonic) {
1242   if (!CGF.HaveInsertPoint())
1243     return;
1244   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1245     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1246                                  /*ignoreResult=*/true);
1247     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1248     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1249     // In presence of finite 'safelen', it may be unsafe to mark all
1250     // the memory instructions parallel, because loop-carried
1251     // dependences of 'safelen' iterations are possible.
1252     if (!IsMonotonic)
1253       CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1254   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1255     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1256                                  /*ignoreResult=*/true);
1257     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1258     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1259     // In presence of finite 'safelen', it may be unsafe to mark all
1260     // the memory instructions parallel, because loop-carried
1261     // dependences of 'safelen' iterations are possible.
1262     CGF.LoopStack.setParallel(false);
1263   }
1264 }
1265 
1266 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1267                                       bool IsMonotonic) {
1268   // Walk clauses and process safelen/lastprivate.
1269   LoopStack.setParallel(!IsMonotonic);
1270   LoopStack.setVectorizeEnable(true);
1271   emitSimdlenSafelenClause(*this, D, IsMonotonic);
1272 }
1273 
1274 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
1275   if (!HaveInsertPoint())
1276     return;
1277   auto IC = D.counters().begin();
1278   for (auto F : D.finals()) {
1279     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1280     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
1281       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1282                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
1283                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1284       Address OrigAddr = EmitLValue(&DRE).getAddress();
1285       OMPPrivateScope VarScope(*this);
1286       VarScope.addPrivate(OrigVD,
1287                           [OrigAddr]() -> Address { return OrigAddr; });
1288       (void)VarScope.Privatize();
1289       EmitIgnoredExpr(F);
1290     }
1291     ++IC;
1292   }
1293   emitLinearClauseFinal(*this, D);
1294 }
1295 
1296 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1297   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1298     // if (PreCond) {
1299     //   for (IV in 0..LastIteration) BODY;
1300     //   <Final counter/linear vars updates>;
1301     // }
1302     //
1303 
1304     // Emit: if (PreCond) - begin.
1305     // If the condition constant folds and can be elided, avoid emitting the
1306     // whole loop.
1307     bool CondConstant;
1308     llvm::BasicBlock *ContBlock = nullptr;
1309     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1310       if (!CondConstant)
1311         return;
1312     } else {
1313       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1314       ContBlock = CGF.createBasicBlock("simd.if.end");
1315       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1316                   CGF.getProfileCount(&S));
1317       CGF.EmitBlock(ThenBlock);
1318       CGF.incrementProfileCounter(&S);
1319     }
1320 
1321     // Emit the loop iteration variable.
1322     const Expr *IVExpr = S.getIterationVariable();
1323     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1324     CGF.EmitVarDecl(*IVDecl);
1325     CGF.EmitIgnoredExpr(S.getInit());
1326 
1327     // Emit the iterations count variable.
1328     // If it is not a variable, Sema decided to calculate iterations count on
1329     // each iteration (e.g., it is foldable into a constant).
1330     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1331       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1332       // Emit calculation of the iterations count.
1333       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1334     }
1335 
1336     CGF.EmitOMPSimdInit(S);
1337 
1338     emitAlignedClause(CGF, S);
1339     CGF.EmitOMPLinearClauseInit(S);
1340     bool HasLastprivateClause;
1341     {
1342       OMPPrivateScope LoopScope(CGF);
1343       emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
1344                               S.private_counters());
1345       emitPrivateLinearVars(CGF, S, LoopScope);
1346       CGF.EmitOMPPrivateClause(S, LoopScope);
1347       CGF.EmitOMPReductionClauseInit(S, LoopScope);
1348       HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1349       (void)LoopScope.Privatize();
1350       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1351                            S.getInc(),
1352                            [&S](CodeGenFunction &CGF) {
1353                              CGF.EmitOMPLoopBody(S, JumpDest());
1354                              CGF.EmitStopPoint(&S);
1355                            },
1356                            [](CodeGenFunction &) {});
1357       // Emit final copy of the lastprivate variables at the end of loops.
1358       if (HasLastprivateClause) {
1359         CGF.EmitOMPLastprivateClauseFinal(S);
1360       }
1361       CGF.EmitOMPReductionClauseFinal(S);
1362     }
1363     CGF.EmitOMPSimdFinal(S);
1364     // Emit: if (PreCond) - end.
1365     if (ContBlock) {
1366       CGF.EmitBranch(ContBlock);
1367       CGF.EmitBlock(ContBlock, true);
1368     }
1369   };
1370   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1371 }
1372 
1373 void CodeGenFunction::EmitOMPForOuterLoop(
1374     OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic,
1375     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1376     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
1377   auto &RT = CGM.getOpenMPRuntime();
1378 
1379   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1380   const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
1381 
1382   assert((Ordered ||
1383           !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
1384          "static non-chunked schedule does not need outer loop");
1385 
1386   // Emit outer loop.
1387   //
1388   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1389   // When schedule(dynamic,chunk_size) is specified, the iterations are
1390   // distributed to threads in the team in chunks as the threads request them.
1391   // Each thread executes a chunk of iterations, then requests another chunk,
1392   // until no chunks remain to be distributed. Each chunk contains chunk_size
1393   // iterations, except for the last chunk to be distributed, which may have
1394   // fewer iterations. When no chunk_size is specified, it defaults to 1.
1395   //
1396   // When schedule(guided,chunk_size) is specified, the iterations are assigned
1397   // to threads in the team in chunks as the executing threads request them.
1398   // Each thread executes a chunk of iterations, then requests another chunk,
1399   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
1400   // each chunk is proportional to the number of unassigned iterations divided
1401   // by the number of threads in the team, decreasing to 1. For a chunk_size
1402   // with value k (greater than 1), the size of each chunk is determined in the
1403   // same way, with the restriction that the chunks do not contain fewer than k
1404   // iterations (except for the last chunk to be assigned, which may have fewer
1405   // than k iterations).
1406   //
1407   // When schedule(auto) is specified, the decision regarding scheduling is
1408   // delegated to the compiler and/or runtime system. The programmer gives the
1409   // implementation the freedom to choose any possible mapping of iterations to
1410   // threads in the team.
1411   //
1412   // When schedule(runtime) is specified, the decision regarding scheduling is
1413   // deferred until run time, and the schedule and chunk size are taken from the
1414   // run-sched-var ICV. If the ICV is set to auto, the schedule is
1415   // implementation defined
1416   //
1417   // while(__kmpc_dispatch_next(&LB, &UB)) {
1418   //   idx = LB;
1419   //   while (idx <= UB) { BODY; ++idx;
1420   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
1421   //   } // inner loop
1422   // }
1423   //
1424   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1425   // When schedule(static, chunk_size) is specified, iterations are divided into
1426   // chunks of size chunk_size, and the chunks are assigned to the threads in
1427   // the team in a round-robin fashion in the order of the thread number.
1428   //
1429   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
1430   //   while (idx <= UB) { BODY; ++idx; } // inner loop
1431   //   LB = LB + ST;
1432   //   UB = UB + ST;
1433   // }
1434   //
1435 
1436   const Expr *IVExpr = S.getIterationVariable();
1437   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1438   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1439 
1440   if (DynamicOrOrdered) {
1441     llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
1442     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind,
1443                            IVSize, IVSigned, Ordered, UBVal, Chunk);
1444   } else {
1445     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
1446                          IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
1447   }
1448 
1449   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1450 
1451   // Start the loop with a block that tests the condition.
1452   auto CondBlock = createBasicBlock("omp.dispatch.cond");
1453   EmitBlock(CondBlock);
1454   LoopStack.push(CondBlock);
1455 
1456   llvm::Value *BoolCondVal = nullptr;
1457   if (!DynamicOrOrdered) {
1458     // UB = min(UB, GlobalUB)
1459     EmitIgnoredExpr(S.getEnsureUpperBound());
1460     // IV = LB
1461     EmitIgnoredExpr(S.getInit());
1462     // IV < UB
1463     BoolCondVal = EvaluateExprAsBool(S.getCond());
1464   } else {
1465     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
1466                                     IL, LB, UB, ST);
1467   }
1468 
1469   // If there are any cleanups between here and the loop-exit scope,
1470   // create a block to stage a loop exit along.
1471   auto ExitBlock = LoopExit.getBlock();
1472   if (LoopScope.requiresCleanups())
1473     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1474 
1475   auto LoopBody = createBasicBlock("omp.dispatch.body");
1476   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1477   if (ExitBlock != LoopExit.getBlock()) {
1478     EmitBlock(ExitBlock);
1479     EmitBranchThroughCleanup(LoopExit);
1480   }
1481   EmitBlock(LoopBody);
1482 
1483   // Emit "IV = LB" (in case of static schedule, we have already calculated new
1484   // LB for loop condition and emitted it above).
1485   if (DynamicOrOrdered)
1486     EmitIgnoredExpr(S.getInit());
1487 
1488   // Create a block for the increment.
1489   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1490   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1491 
1492   // Generate !llvm.loop.parallel metadata for loads and stores for loops
1493   // with dynamic/guided scheduling and without ordered clause.
1494   if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1495     LoopStack.setParallel(!IsMonotonic);
1496   else
1497     EmitOMPSimdInit(S, IsMonotonic);
1498 
1499   SourceLocation Loc = S.getLocStart();
1500   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
1501                    [&S, LoopExit](CodeGenFunction &CGF) {
1502                      CGF.EmitOMPLoopBody(S, LoopExit);
1503                      CGF.EmitStopPoint(&S);
1504                    },
1505                    [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
1506                      if (Ordered) {
1507                        CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
1508                            CGF, Loc, IVSize, IVSigned);
1509                      }
1510                    });
1511 
1512   EmitBlock(Continue.getBlock());
1513   BreakContinueStack.pop_back();
1514   if (!DynamicOrOrdered) {
1515     // Emit "LB = LB + Stride", "UB = UB + Stride".
1516     EmitIgnoredExpr(S.getNextLowerBound());
1517     EmitIgnoredExpr(S.getNextUpperBound());
1518   }
1519 
1520   EmitBranch(CondBlock);
1521   LoopStack.pop();
1522   // Emit the fall-through block.
1523   EmitBlock(LoopExit.getBlock());
1524 
1525   // Tell the runtime we are done.
1526   if (!DynamicOrOrdered)
1527     RT.emitForStaticFinish(*this, S.getLocEnd());
1528 }
1529 
1530 /// \brief Emit a helper variable and return corresponding lvalue.
1531 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1532                                const DeclRefExpr *Helper) {
1533   auto VDecl = cast<VarDecl>(Helper->getDecl());
1534   CGF.EmitVarDecl(*VDecl);
1535   return CGF.EmitLValue(Helper);
1536 }
1537 
1538 namespace {
1539   struct ScheduleKindModifiersTy {
1540     OpenMPScheduleClauseKind Kind;
1541     OpenMPScheduleClauseModifier M1;
1542     OpenMPScheduleClauseModifier M2;
1543     ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
1544                             OpenMPScheduleClauseModifier M1,
1545                             OpenMPScheduleClauseModifier M2)
1546         : Kind(Kind), M1(M1), M2(M2) {}
1547   };
1548 } // namespace
1549 
1550 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
1551   // Emit the loop iteration variable.
1552   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
1553   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
1554   EmitVarDecl(*IVDecl);
1555 
1556   // Emit the iterations count variable.
1557   // If it is not a variable, Sema decided to calculate iterations count on each
1558   // iteration (e.g., it is foldable into a constant).
1559   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1560     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1561     // Emit calculation of the iterations count.
1562     EmitIgnoredExpr(S.getCalcLastIteration());
1563   }
1564 
1565   auto &RT = CGM.getOpenMPRuntime();
1566 
1567   bool HasLastprivateClause;
1568   // Check pre-condition.
1569   {
1570     // Skip the entire loop if we don't meet the precondition.
1571     // If the condition constant folds and can be elided, avoid emitting the
1572     // whole loop.
1573     bool CondConstant;
1574     llvm::BasicBlock *ContBlock = nullptr;
1575     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1576       if (!CondConstant)
1577         return false;
1578     } else {
1579       auto *ThenBlock = createBasicBlock("omp.precond.then");
1580       ContBlock = createBasicBlock("omp.precond.end");
1581       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
1582                   getProfileCount(&S));
1583       EmitBlock(ThenBlock);
1584       incrementProfileCounter(&S);
1585     }
1586 
1587     emitAlignedClause(*this, S);
1588     EmitOMPLinearClauseInit(S);
1589     // Emit 'then' code.
1590     {
1591       // Emit helper vars inits.
1592       LValue LB =
1593           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
1594       LValue UB =
1595           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
1596       LValue ST =
1597           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
1598       LValue IL =
1599           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
1600 
1601       OMPPrivateScope LoopScope(*this);
1602       if (EmitOMPFirstprivateClause(S, LoopScope)) {
1603         // Emit implicit barrier to synchronize threads and avoid data races on
1604         // initialization of firstprivate variables and post-update of
1605         // lastprivate variables.
1606         CGM.getOpenMPRuntime().emitBarrierCall(
1607             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1608             /*ForceSimpleCall=*/true);
1609       }
1610       EmitOMPPrivateClause(S, LoopScope);
1611       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
1612       EmitOMPReductionClauseInit(S, LoopScope);
1613       emitPrivateLoopCounters(*this, LoopScope, S.counters(),
1614                               S.private_counters());
1615       emitPrivateLinearVars(*this, S, LoopScope);
1616       (void)LoopScope.Privatize();
1617 
1618       // Detect the loop schedule kind and chunk.
1619       llvm::Value *Chunk = nullptr;
1620       OpenMPScheduleClauseKind ScheduleKind = OMPC_SCHEDULE_unknown;
1621       OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown;
1622       OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown;
1623       if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
1624         ScheduleKind = C->getScheduleKind();
1625         M1 = C->getFirstScheduleModifier();
1626         M2 = C->getSecondScheduleModifier();
1627         if (const auto *Ch = C->getChunkSize()) {
1628           Chunk = EmitScalarExpr(Ch);
1629           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
1630                                        S.getIterationVariable()->getType(),
1631                                        S.getLocStart());
1632         }
1633       }
1634       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1635       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1636       const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
1637       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
1638       // If the static schedule kind is specified or if the ordered clause is
1639       // specified, and if no monotonic modifier is specified, the effect will
1640       // be as if the monotonic modifier was specified.
1641       if (RT.isStaticNonchunked(ScheduleKind,
1642                                 /* Chunked */ Chunk != nullptr) &&
1643           !Ordered) {
1644         if (isOpenMPSimdDirective(S.getDirectiveKind()))
1645           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
1646         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1647         // When no chunk_size is specified, the iteration space is divided into
1648         // chunks that are approximately equal in size, and at most one chunk is
1649         // distributed to each thread. Note that the size of the chunks is
1650         // unspecified in this case.
1651         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
1652                              IVSize, IVSigned, Ordered,
1653                              IL.getAddress(), LB.getAddress(),
1654                              UB.getAddress(), ST.getAddress());
1655         auto LoopExit =
1656             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
1657         // UB = min(UB, GlobalUB);
1658         EmitIgnoredExpr(S.getEnsureUpperBound());
1659         // IV = LB;
1660         EmitIgnoredExpr(S.getInit());
1661         // while (idx <= UB) { BODY; ++idx; }
1662         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1663                          S.getInc(),
1664                          [&S, LoopExit](CodeGenFunction &CGF) {
1665                            CGF.EmitOMPLoopBody(S, LoopExit);
1666                            CGF.EmitStopPoint(&S);
1667                          },
1668                          [](CodeGenFunction &) {});
1669         EmitBlock(LoopExit.getBlock());
1670         // Tell the runtime we are done.
1671         RT.emitForStaticFinish(*this, S.getLocStart());
1672       } else {
1673         const bool IsMonotonic = Ordered ||
1674                                  ScheduleKind == OMPC_SCHEDULE_static ||
1675                                  ScheduleKind == OMPC_SCHEDULE_unknown ||
1676                                  M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
1677                                  M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
1678         // Emit the outer loop, which requests its work chunk [LB..UB] from
1679         // runtime and runs the inner loop to process it.
1680         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
1681                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
1682                             IL.getAddress(), Chunk);
1683       }
1684       EmitOMPReductionClauseFinal(S);
1685       // Emit final copy of the lastprivate variables if IsLastIter != 0.
1686       if (HasLastprivateClause)
1687         EmitOMPLastprivateClauseFinal(
1688             S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
1689     }
1690     if (isOpenMPSimdDirective(S.getDirectiveKind())) {
1691       EmitOMPSimdFinal(S);
1692     }
1693     // We're now done with the loop, so jump to the continuation block.
1694     if (ContBlock) {
1695       EmitBranch(ContBlock);
1696       EmitBlock(ContBlock, true);
1697     }
1698   }
1699   return HasLastprivateClause;
1700 }
1701 
1702 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
1703   bool HasLastprivates = false;
1704   {
1705     OMPLexicalScope Scope(*this, S);
1706     auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
1707       HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1708     };
1709     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
1710                                                 S.hasCancel());
1711   }
1712 
1713   // Emit an implicit barrier at the end.
1714   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
1715     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1716   }
1717 }
1718 
1719 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
1720   bool HasLastprivates = false;
1721   {
1722     OMPLexicalScope Scope(*this, S);
1723     auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
1724       HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
1725     };
1726     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1727   }
1728 
1729   // Emit an implicit barrier at the end.
1730   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
1731     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
1732   }
1733 }
1734 
1735 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
1736                                 const Twine &Name,
1737                                 llvm::Value *Init = nullptr) {
1738   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
1739   if (Init)
1740     CGF.EmitScalarInit(Init, LVal);
1741   return LVal;
1742 }
1743 
1744 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
1745   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
1746   auto *CS = dyn_cast<CompoundStmt>(Stmt);
1747   bool HasLastprivates = false;
1748   auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) {
1749     auto &C = CGF.CGM.getContext();
1750     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1751     // Emit helper vars inits.
1752     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
1753                                   CGF.Builder.getInt32(0));
1754     auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
1755                                       : CGF.Builder.getInt32(0);
1756     LValue UB =
1757         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
1758     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
1759                                   CGF.Builder.getInt32(1));
1760     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
1761                                   CGF.Builder.getInt32(0));
1762     // Loop counter.
1763     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
1764     OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1765     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
1766     OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
1767     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
1768     // Generate condition for loop.
1769     BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
1770                         OK_Ordinary, S.getLocStart(),
1771                         /*fpContractable=*/false);
1772     // Increment for loop counter.
1773     UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
1774                       S.getLocStart());
1775     auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
1776       // Iterate through all sections and emit a switch construct:
1777       // switch (IV) {
1778       //   case 0:
1779       //     <SectionStmt[0]>;
1780       //     break;
1781       // ...
1782       //   case <NumSection> - 1:
1783       //     <SectionStmt[<NumSection> - 1]>;
1784       //     break;
1785       // }
1786       // .omp.sections.exit:
1787       auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
1788       auto *SwitchStmt = CGF.Builder.CreateSwitch(
1789           CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
1790           CS == nullptr ? 1 : CS->size());
1791       if (CS) {
1792         unsigned CaseNumber = 0;
1793         for (auto *SubStmt : CS->children()) {
1794           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
1795           CGF.EmitBlock(CaseBB);
1796           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
1797           CGF.EmitStmt(SubStmt);
1798           CGF.EmitBranch(ExitBB);
1799           ++CaseNumber;
1800         }
1801       } else {
1802         auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
1803         CGF.EmitBlock(CaseBB);
1804         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
1805         CGF.EmitStmt(Stmt);
1806         CGF.EmitBranch(ExitBB);
1807       }
1808       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1809     };
1810 
1811     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
1812     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
1813       // Emit implicit barrier to synchronize threads and avoid data races on
1814       // initialization of firstprivate variables and post-update of lastprivate
1815       // variables.
1816       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1817           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1818           /*ForceSimpleCall=*/true);
1819     }
1820     CGF.EmitOMPPrivateClause(S, LoopScope);
1821     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1822     CGF.EmitOMPReductionClauseInit(S, LoopScope);
1823     (void)LoopScope.Privatize();
1824 
1825     // Emit static non-chunked loop.
1826     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
1827         CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
1828         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
1829         UB.getAddress(), ST.getAddress());
1830     // UB = min(UB, GlobalUB);
1831     auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
1832     auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
1833         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
1834     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
1835     // IV = LB;
1836     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
1837     // while (idx <= UB) { BODY; ++idx; }
1838     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
1839                          [](CodeGenFunction &) {});
1840     // Tell the runtime we are done.
1841     CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
1842     CGF.EmitOMPReductionClauseFinal(S);
1843 
1844     // Emit final copy of the lastprivate variables if IsLastIter != 0.
1845     if (HasLastprivates)
1846       CGF.EmitOMPLastprivateClauseFinal(
1847           S, CGF.Builder.CreateIsNotNull(
1848                  CGF.EmitLoadOfScalar(IL, S.getLocStart())));
1849   };
1850 
1851   bool HasCancel = false;
1852   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
1853     HasCancel = OSD->hasCancel();
1854   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
1855     HasCancel = OPSD->hasCancel();
1856   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
1857                                               HasCancel);
1858   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
1859   // clause. Otherwise the barrier will be generated by the codegen for the
1860   // directive.
1861   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
1862     // Emit implicit barrier to synchronize threads and avoid data races on
1863     // initialization of firstprivate variables.
1864     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
1865                                            OMPD_unknown);
1866   }
1867 }
1868 
1869 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
1870   {
1871     OMPLexicalScope Scope(*this, S);
1872     EmitSections(S);
1873   }
1874   // Emit an implicit barrier at the end.
1875   if (!S.getSingleClause<OMPNowaitClause>()) {
1876     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
1877                                            OMPD_sections);
1878   }
1879 }
1880 
1881 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
1882   OMPLexicalScope Scope(*this, S);
1883   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1884     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1885   };
1886   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
1887                                               S.hasCancel());
1888 }
1889 
1890 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
1891   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
1892   llvm::SmallVector<const Expr *, 8> DestExprs;
1893   llvm::SmallVector<const Expr *, 8> SrcExprs;
1894   llvm::SmallVector<const Expr *, 8> AssignmentOps;
1895   // Check if there are any 'copyprivate' clauses associated with this
1896   // 'single' construct.
1897   // Build a list of copyprivate variables along with helper expressions
1898   // (<source>, <destination>, <destination>=<source> expressions)
1899   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
1900     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
1901     DestExprs.append(C->destination_exprs().begin(),
1902                      C->destination_exprs().end());
1903     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
1904     AssignmentOps.append(C->assignment_ops().begin(),
1905                          C->assignment_ops().end());
1906   }
1907   {
1908     OMPLexicalScope Scope(*this, S);
1909     // Emit code for 'single' region along with 'copyprivate' clauses
1910     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1911       CodeGenFunction::OMPPrivateScope SingleScope(CGF);
1912       (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
1913       CGF.EmitOMPPrivateClause(S, SingleScope);
1914       (void)SingleScope.Privatize();
1915       CGF.EmitStmt(
1916           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1917     };
1918     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
1919                                             CopyprivateVars, DestExprs,
1920                                             SrcExprs, AssignmentOps);
1921   }
1922   // Emit an implicit barrier at the end (to avoid data race on firstprivate
1923   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
1924   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
1925     CGM.getOpenMPRuntime().emitBarrierCall(
1926         *this, S.getLocStart(),
1927         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
1928   }
1929 }
1930 
1931 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
1932   OMPLexicalScope Scope(*this, S);
1933   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1934     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1935   };
1936   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
1937 }
1938 
1939 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
1940   OMPLexicalScope Scope(*this, S);
1941   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1942     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1943   };
1944   Expr *Hint = nullptr;
1945   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
1946     Hint = HintClause->getHint();
1947   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
1948                                             S.getDirectiveName().getAsString(),
1949                                             CodeGen, S.getLocStart(), Hint);
1950 }
1951 
1952 void CodeGenFunction::EmitOMPParallelForDirective(
1953     const OMPParallelForDirective &S) {
1954   // Emit directive as a combined directive that consists of two implicit
1955   // directives: 'parallel' with 'for' directive.
1956   OMPLexicalScope Scope(*this, S);
1957   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1958     CGF.EmitOMPWorksharingLoop(S);
1959   };
1960   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
1961 }
1962 
1963 void CodeGenFunction::EmitOMPParallelForSimdDirective(
1964     const OMPParallelForSimdDirective &S) {
1965   // Emit directive as a combined directive that consists of two implicit
1966   // directives: 'parallel' with 'for' directive.
1967   OMPLexicalScope Scope(*this, S);
1968   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
1969     CGF.EmitOMPWorksharingLoop(S);
1970   };
1971   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
1972 }
1973 
1974 void CodeGenFunction::EmitOMPParallelSectionsDirective(
1975     const OMPParallelSectionsDirective &S) {
1976   // Emit directive as a combined directive that consists of two implicit
1977   // directives: 'parallel' with 'sections' directive.
1978   OMPLexicalScope Scope(*this, S);
1979   auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitSections(S); };
1980   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
1981 }
1982 
1983 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
1984   // Emit outlined function for task construct.
1985   OMPLexicalScope Scope(*this, S);
1986   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
1987   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
1988   auto *I = CS->getCapturedDecl()->param_begin();
1989   auto *PartId = std::next(I);
1990   // The first function argument for tasks is a thread id, the second one is a
1991   // part id (0 for tied tasks, >=0 for untied task).
1992   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
1993   // Get list of private variables.
1994   llvm::SmallVector<const Expr *, 8> PrivateVars;
1995   llvm::SmallVector<const Expr *, 8> PrivateCopies;
1996   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
1997     auto IRef = C->varlist_begin();
1998     for (auto *IInit : C->private_copies()) {
1999       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2000       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2001         PrivateVars.push_back(*IRef);
2002         PrivateCopies.push_back(IInit);
2003       }
2004       ++IRef;
2005     }
2006   }
2007   EmittedAsPrivate.clear();
2008   // Get list of firstprivate variables.
2009   llvm::SmallVector<const Expr *, 8> FirstprivateVars;
2010   llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
2011   llvm::SmallVector<const Expr *, 8> FirstprivateInits;
2012   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2013     auto IRef = C->varlist_begin();
2014     auto IElemInitRef = C->inits().begin();
2015     for (auto *IInit : C->private_copies()) {
2016       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2017       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2018         FirstprivateVars.push_back(*IRef);
2019         FirstprivateCopies.push_back(IInit);
2020         FirstprivateInits.push_back(*IElemInitRef);
2021       }
2022       ++IRef;
2023       ++IElemInitRef;
2024     }
2025   }
2026   // Build list of dependences.
2027   llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
2028       Dependences;
2029   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
2030     for (auto *IRef : C->varlists()) {
2031       Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
2032     }
2033   }
2034   auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
2035       CodeGenFunction &CGF) {
2036     // Set proper addresses for generated private copies.
2037     auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
2038     OMPPrivateScope Scope(CGF);
2039     if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
2040       auto *CopyFn = CGF.Builder.CreateLoad(
2041           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
2042       auto *PrivatesPtr = CGF.Builder.CreateLoad(
2043           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
2044       // Map privates.
2045       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16>
2046           PrivatePtrs;
2047       llvm::SmallVector<llvm::Value *, 16> CallArgs;
2048       CallArgs.push_back(PrivatesPtr);
2049       for (auto *E : PrivateVars) {
2050         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2051         Address PrivatePtr =
2052             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
2053         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2054         CallArgs.push_back(PrivatePtr.getPointer());
2055       }
2056       for (auto *E : FirstprivateVars) {
2057         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2058         Address PrivatePtr =
2059             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
2060         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
2061         CallArgs.push_back(PrivatePtr.getPointer());
2062       }
2063       CGF.EmitRuntimeCall(CopyFn, CallArgs);
2064       for (auto &&Pair : PrivatePtrs) {
2065         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
2066                             CGF.getContext().getDeclAlign(Pair.first));
2067         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
2068       }
2069     }
2070     (void)Scope.Privatize();
2071     if (*PartId) {
2072       // TODO: emit code for untied tasks.
2073     }
2074     CGF.EmitStmt(CS->getCapturedStmt());
2075   };
2076   auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
2077       S, *I, OMPD_task, CodeGen);
2078   // Check if we should emit tied or untied task.
2079   bool Tied = !S.getSingleClause<OMPUntiedClause>();
2080   // Check if the task is final
2081   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
2082   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2083     // If the condition constant folds and can be elided, try to avoid emitting
2084     // the condition and the dead arm of the if/else.
2085     auto *Cond = Clause->getCondition();
2086     bool CondConstant;
2087     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2088       Final.setInt(CondConstant);
2089     else
2090       Final.setPointer(EvaluateExprAsBool(Cond));
2091   } else {
2092     // By default the task is not final.
2093     Final.setInt(/*IntVal=*/false);
2094   }
2095   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
2096   const Expr *IfCond = nullptr;
2097   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2098     if (C->getNameModifier() == OMPD_unknown ||
2099         C->getNameModifier() == OMPD_task) {
2100       IfCond = C->getCondition();
2101       break;
2102     }
2103   }
2104   CGM.getOpenMPRuntime().emitTaskCall(
2105       *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
2106       CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
2107       FirstprivateCopies, FirstprivateInits, Dependences);
2108 }
2109 
2110 void CodeGenFunction::EmitOMPTaskyieldDirective(
2111     const OMPTaskyieldDirective &S) {
2112   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
2113 }
2114 
2115 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
2116   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
2117 }
2118 
2119 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
2120   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
2121 }
2122 
2123 void CodeGenFunction::EmitOMPTaskgroupDirective(
2124     const OMPTaskgroupDirective &S) {
2125   OMPLexicalScope Scope(*this, S);
2126   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2127     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2128   };
2129   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
2130 }
2131 
2132 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
2133   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
2134     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
2135       return llvm::makeArrayRef(FlushClause->varlist_begin(),
2136                                 FlushClause->varlist_end());
2137     }
2138     return llvm::None;
2139   }(), S.getLocStart());
2140 }
2141 
2142 void CodeGenFunction::EmitOMPDistributeDirective(
2143     const OMPDistributeDirective &S) {
2144   llvm_unreachable("CodeGen for 'omp distribute' is not supported yet.");
2145 }
2146 
2147 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
2148                                                    const CapturedStmt *S) {
2149   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
2150   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
2151   CGF.CapturedStmtInfo = &CapStmtInfo;
2152   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
2153   Fn->addFnAttr(llvm::Attribute::NoInline);
2154   return Fn;
2155 }
2156 
2157 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
2158   if (!S.getAssociatedStmt())
2159     return;
2160   OMPLexicalScope Scope(*this, S);
2161   auto *C = S.getSingleClause<OMPSIMDClause>();
2162   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
2163     if (C) {
2164       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2165       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2166       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
2167       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
2168       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
2169     } else {
2170       CGF.EmitStmt(
2171           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2172     }
2173   };
2174   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
2175 }
2176 
2177 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
2178                                          QualType SrcType, QualType DestType,
2179                                          SourceLocation Loc) {
2180   assert(CGF.hasScalarEvaluationKind(DestType) &&
2181          "DestType must have scalar evaluation kind.");
2182   assert(!Val.isAggregate() && "Must be a scalar or complex.");
2183   return Val.isScalar()
2184              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
2185                                         Loc)
2186              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
2187                                                  DestType, Loc);
2188 }
2189 
2190 static CodeGenFunction::ComplexPairTy
2191 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
2192                       QualType DestType, SourceLocation Loc) {
2193   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
2194          "DestType must have complex evaluation kind.");
2195   CodeGenFunction::ComplexPairTy ComplexVal;
2196   if (Val.isScalar()) {
2197     // Convert the input element to the element type of the complex.
2198     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2199     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
2200                                               DestElementType, Loc);
2201     ComplexVal = CodeGenFunction::ComplexPairTy(
2202         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
2203   } else {
2204     assert(Val.isComplex() && "Must be a scalar or complex.");
2205     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
2206     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
2207     ComplexVal.first = CGF.EmitScalarConversion(
2208         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
2209     ComplexVal.second = CGF.EmitScalarConversion(
2210         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
2211   }
2212   return ComplexVal;
2213 }
2214 
2215 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
2216                                   LValue LVal, RValue RVal) {
2217   if (LVal.isGlobalReg()) {
2218     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
2219   } else {
2220     CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent
2221                                              : llvm::Monotonic,
2222                         LVal.isVolatile(), /*IsInit=*/false);
2223   }
2224 }
2225 
2226 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
2227                                          QualType RValTy, SourceLocation Loc) {
2228   switch (getEvaluationKind(LVal.getType())) {
2229   case TEK_Scalar:
2230     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
2231                                *this, RVal, RValTy, LVal.getType(), Loc)),
2232                            LVal);
2233     break;
2234   case TEK_Complex:
2235     EmitStoreOfComplex(
2236         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
2237         /*isInit=*/false);
2238     break;
2239   case TEK_Aggregate:
2240     llvm_unreachable("Must be a scalar or complex.");
2241   }
2242 }
2243 
2244 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
2245                                   const Expr *X, const Expr *V,
2246                                   SourceLocation Loc) {
2247   // v = x;
2248   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
2249   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
2250   LValue XLValue = CGF.EmitLValue(X);
2251   LValue VLValue = CGF.EmitLValue(V);
2252   RValue Res = XLValue.isGlobalReg()
2253                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
2254                    : CGF.EmitAtomicLoad(XLValue, Loc,
2255                                         IsSeqCst ? llvm::SequentiallyConsistent
2256                                                  : llvm::Monotonic,
2257                                         XLValue.isVolatile());
2258   // OpenMP, 2.12.6, atomic Construct
2259   // Any atomic construct with a seq_cst clause forces the atomically
2260   // performed operation to include an implicit flush operation without a
2261   // list.
2262   if (IsSeqCst)
2263     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2264   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
2265 }
2266 
2267 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
2268                                    const Expr *X, const Expr *E,
2269                                    SourceLocation Loc) {
2270   // x = expr;
2271   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
2272   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
2273   // OpenMP, 2.12.6, atomic Construct
2274   // Any atomic construct with a seq_cst clause forces the atomically
2275   // performed operation to include an implicit flush operation without a
2276   // list.
2277   if (IsSeqCst)
2278     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2279 }
2280 
2281 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
2282                                                 RValue Update,
2283                                                 BinaryOperatorKind BO,
2284                                                 llvm::AtomicOrdering AO,
2285                                                 bool IsXLHSInRHSPart) {
2286   auto &Context = CGF.CGM.getContext();
2287   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
2288   // expression is simple and atomic is allowed for the given type for the
2289   // target platform.
2290   if (BO == BO_Comma || !Update.isScalar() ||
2291       !Update.getScalarVal()->getType()->isIntegerTy() ||
2292       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
2293                         (Update.getScalarVal()->getType() !=
2294                          X.getAddress().getElementType())) ||
2295       !X.getAddress().getElementType()->isIntegerTy() ||
2296       !Context.getTargetInfo().hasBuiltinAtomic(
2297           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
2298     return std::make_pair(false, RValue::get(nullptr));
2299 
2300   llvm::AtomicRMWInst::BinOp RMWOp;
2301   switch (BO) {
2302   case BO_Add:
2303     RMWOp = llvm::AtomicRMWInst::Add;
2304     break;
2305   case BO_Sub:
2306     if (!IsXLHSInRHSPart)
2307       return std::make_pair(false, RValue::get(nullptr));
2308     RMWOp = llvm::AtomicRMWInst::Sub;
2309     break;
2310   case BO_And:
2311     RMWOp = llvm::AtomicRMWInst::And;
2312     break;
2313   case BO_Or:
2314     RMWOp = llvm::AtomicRMWInst::Or;
2315     break;
2316   case BO_Xor:
2317     RMWOp = llvm::AtomicRMWInst::Xor;
2318     break;
2319   case BO_LT:
2320     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2321                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
2322                                    : llvm::AtomicRMWInst::Max)
2323                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
2324                                    : llvm::AtomicRMWInst::UMax);
2325     break;
2326   case BO_GT:
2327     RMWOp = X.getType()->hasSignedIntegerRepresentation()
2328                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
2329                                    : llvm::AtomicRMWInst::Min)
2330                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
2331                                    : llvm::AtomicRMWInst::UMin);
2332     break;
2333   case BO_Assign:
2334     RMWOp = llvm::AtomicRMWInst::Xchg;
2335     break;
2336   case BO_Mul:
2337   case BO_Div:
2338   case BO_Rem:
2339   case BO_Shl:
2340   case BO_Shr:
2341   case BO_LAnd:
2342   case BO_LOr:
2343     return std::make_pair(false, RValue::get(nullptr));
2344   case BO_PtrMemD:
2345   case BO_PtrMemI:
2346   case BO_LE:
2347   case BO_GE:
2348   case BO_EQ:
2349   case BO_NE:
2350   case BO_AddAssign:
2351   case BO_SubAssign:
2352   case BO_AndAssign:
2353   case BO_OrAssign:
2354   case BO_XorAssign:
2355   case BO_MulAssign:
2356   case BO_DivAssign:
2357   case BO_RemAssign:
2358   case BO_ShlAssign:
2359   case BO_ShrAssign:
2360   case BO_Comma:
2361     llvm_unreachable("Unsupported atomic update operation");
2362   }
2363   auto *UpdateVal = Update.getScalarVal();
2364   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
2365     UpdateVal = CGF.Builder.CreateIntCast(
2366         IC, X.getAddress().getElementType(),
2367         X.getType()->hasSignedIntegerRepresentation());
2368   }
2369   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
2370   return std::make_pair(true, RValue::get(Res));
2371 }
2372 
2373 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
2374     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
2375     llvm::AtomicOrdering AO, SourceLocation Loc,
2376     const llvm::function_ref<RValue(RValue)> &CommonGen) {
2377   // Update expressions are allowed to have the following forms:
2378   // x binop= expr; -> xrval + expr;
2379   // x++, ++x -> xrval + 1;
2380   // x--, --x -> xrval - 1;
2381   // x = x binop expr; -> xrval binop expr
2382   // x = expr Op x; - > expr binop xrval;
2383   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
2384   if (!Res.first) {
2385     if (X.isGlobalReg()) {
2386       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
2387       // 'xrval'.
2388       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
2389     } else {
2390       // Perform compare-and-swap procedure.
2391       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
2392     }
2393   }
2394   return Res;
2395 }
2396 
2397 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
2398                                     const Expr *X, const Expr *E,
2399                                     const Expr *UE, bool IsXLHSInRHSPart,
2400                                     SourceLocation Loc) {
2401   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2402          "Update expr in 'atomic update' must be a binary operator.");
2403   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2404   // Update expressions are allowed to have the following forms:
2405   // x binop= expr; -> xrval + expr;
2406   // x++, ++x -> xrval + 1;
2407   // x--, --x -> xrval - 1;
2408   // x = x binop expr; -> xrval binop expr
2409   // x = expr Op x; - > expr binop xrval;
2410   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
2411   LValue XLValue = CGF.EmitLValue(X);
2412   RValue ExprRValue = CGF.EmitAnyExpr(E);
2413   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
2414   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2415   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2416   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2417   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2418   auto Gen =
2419       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
2420         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2421         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2422         return CGF.EmitAnyExpr(UE);
2423       };
2424   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
2425       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2426   // OpenMP, 2.12.6, atomic Construct
2427   // Any atomic construct with a seq_cst clause forces the atomically
2428   // performed operation to include an implicit flush operation without a
2429   // list.
2430   if (IsSeqCst)
2431     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2432 }
2433 
2434 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
2435                             QualType SourceType, QualType ResType,
2436                             SourceLocation Loc) {
2437   switch (CGF.getEvaluationKind(ResType)) {
2438   case TEK_Scalar:
2439     return RValue::get(
2440         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
2441   case TEK_Complex: {
2442     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
2443     return RValue::getComplex(Res.first, Res.second);
2444   }
2445   case TEK_Aggregate:
2446     break;
2447   }
2448   llvm_unreachable("Must be a scalar or complex.");
2449 }
2450 
2451 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
2452                                      bool IsPostfixUpdate, const Expr *V,
2453                                      const Expr *X, const Expr *E,
2454                                      const Expr *UE, bool IsXLHSInRHSPart,
2455                                      SourceLocation Loc) {
2456   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
2457   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
2458   RValue NewVVal;
2459   LValue VLValue = CGF.EmitLValue(V);
2460   LValue XLValue = CGF.EmitLValue(X);
2461   RValue ExprRValue = CGF.EmitAnyExpr(E);
2462   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
2463   QualType NewVValType;
2464   if (UE) {
2465     // 'x' is updated with some additional value.
2466     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
2467            "Update expr in 'atomic capture' must be a binary operator.");
2468     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
2469     // Update expressions are allowed to have the following forms:
2470     // x binop= expr; -> xrval + expr;
2471     // x++, ++x -> xrval + 1;
2472     // x--, --x -> xrval - 1;
2473     // x = x binop expr; -> xrval binop expr
2474     // x = expr Op x; - > expr binop xrval;
2475     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
2476     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
2477     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
2478     NewVValType = XRValExpr->getType();
2479     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
2480     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
2481                   IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
2482       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2483       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
2484       RValue Res = CGF.EmitAnyExpr(UE);
2485       NewVVal = IsPostfixUpdate ? XRValue : Res;
2486       return Res;
2487     };
2488     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
2489         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
2490     if (Res.first) {
2491       // 'atomicrmw' instruction was generated.
2492       if (IsPostfixUpdate) {
2493         // Use old value from 'atomicrmw'.
2494         NewVVal = Res.second;
2495       } else {
2496         // 'atomicrmw' does not provide new value, so evaluate it using old
2497         // value of 'x'.
2498         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
2499         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
2500         NewVVal = CGF.EmitAnyExpr(UE);
2501       }
2502     }
2503   } else {
2504     // 'x' is simply rewritten with some 'expr'.
2505     NewVValType = X->getType().getNonReferenceType();
2506     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
2507                                X->getType().getNonReferenceType(), Loc);
2508     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
2509       NewVVal = XRValue;
2510       return ExprRValue;
2511     };
2512     // Try to perform atomicrmw xchg, otherwise simple exchange.
2513     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
2514         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
2515         Loc, Gen);
2516     if (Res.first) {
2517       // 'atomicrmw' instruction was generated.
2518       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
2519     }
2520   }
2521   // Emit post-update store to 'v' of old/new 'x' value.
2522   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
2523   // OpenMP, 2.12.6, atomic Construct
2524   // Any atomic construct with a seq_cst clause forces the atomically
2525   // performed operation to include an implicit flush operation without a
2526   // list.
2527   if (IsSeqCst)
2528     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
2529 }
2530 
2531 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
2532                               bool IsSeqCst, bool IsPostfixUpdate,
2533                               const Expr *X, const Expr *V, const Expr *E,
2534                               const Expr *UE, bool IsXLHSInRHSPart,
2535                               SourceLocation Loc) {
2536   switch (Kind) {
2537   case OMPC_read:
2538     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
2539     break;
2540   case OMPC_write:
2541     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
2542     break;
2543   case OMPC_unknown:
2544   case OMPC_update:
2545     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
2546     break;
2547   case OMPC_capture:
2548     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
2549                              IsXLHSInRHSPart, Loc);
2550     break;
2551   case OMPC_if:
2552   case OMPC_final:
2553   case OMPC_num_threads:
2554   case OMPC_private:
2555   case OMPC_firstprivate:
2556   case OMPC_lastprivate:
2557   case OMPC_reduction:
2558   case OMPC_safelen:
2559   case OMPC_simdlen:
2560   case OMPC_collapse:
2561   case OMPC_default:
2562   case OMPC_seq_cst:
2563   case OMPC_shared:
2564   case OMPC_linear:
2565   case OMPC_aligned:
2566   case OMPC_copyin:
2567   case OMPC_copyprivate:
2568   case OMPC_flush:
2569   case OMPC_proc_bind:
2570   case OMPC_schedule:
2571   case OMPC_ordered:
2572   case OMPC_nowait:
2573   case OMPC_untied:
2574   case OMPC_threadprivate:
2575   case OMPC_depend:
2576   case OMPC_mergeable:
2577   case OMPC_device:
2578   case OMPC_threads:
2579   case OMPC_simd:
2580   case OMPC_map:
2581   case OMPC_num_teams:
2582   case OMPC_thread_limit:
2583   case OMPC_priority:
2584   case OMPC_grainsize:
2585   case OMPC_nogroup:
2586   case OMPC_num_tasks:
2587   case OMPC_hint:
2588   case OMPC_dist_schedule:
2589   case OMPC_defaultmap:
2590     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
2591   }
2592 }
2593 
2594 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
2595   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
2596   OpenMPClauseKind Kind = OMPC_unknown;
2597   for (auto *C : S.clauses()) {
2598     // Find first clause (skip seq_cst clause, if it is first).
2599     if (C->getClauseKind() != OMPC_seq_cst) {
2600       Kind = C->getClauseKind();
2601       break;
2602     }
2603   }
2604 
2605   const auto *CS =
2606       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
2607   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
2608     enterFullExpression(EWC);
2609   }
2610   // Processing for statements under 'atomic capture'.
2611   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
2612     for (const auto *C : Compound->body()) {
2613       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
2614         enterFullExpression(EWC);
2615       }
2616     }
2617   }
2618 
2619   OMPLexicalScope Scope(*this, S);
2620   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) {
2621     CGF.EmitStopPoint(CS);
2622     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
2623                       S.getV(), S.getExpr(), S.getUpdateExpr(),
2624                       S.isXLHSInRHSPart(), S.getLocStart());
2625   };
2626   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
2627 }
2628 
2629 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
2630   OMPLexicalScope Scope(*this, S);
2631   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
2632 
2633   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
2634   GenerateOpenMPCapturedVars(CS, CapturedVars);
2635 
2636   llvm::Function *Fn = nullptr;
2637   llvm::Constant *FnID = nullptr;
2638 
2639   // Check if we have any if clause associated with the directive.
2640   const Expr *IfCond = nullptr;
2641 
2642   if (auto *C = S.getSingleClause<OMPIfClause>()) {
2643     IfCond = C->getCondition();
2644   }
2645 
2646   // Check if we have any device clause associated with the directive.
2647   const Expr *Device = nullptr;
2648   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
2649     Device = C->getDevice();
2650   }
2651 
2652   // Check if we have an if clause whose conditional always evaluates to false
2653   // or if we do not have any targets specified. If so the target region is not
2654   // an offload entry point.
2655   bool IsOffloadEntry = true;
2656   if (IfCond) {
2657     bool Val;
2658     if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
2659       IsOffloadEntry = false;
2660   }
2661   if (CGM.getLangOpts().OMPTargetTriples.empty())
2662     IsOffloadEntry = false;
2663 
2664   assert(CurFuncDecl && "No parent declaration for target region!");
2665   StringRef ParentName;
2666   // In case we have Ctors/Dtors we use the complete type variant to produce
2667   // the mangling of the device outlined kernel.
2668   if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
2669     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
2670   else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
2671     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
2672   else
2673     ParentName =
2674         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
2675 
2676   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
2677                                                     IsOffloadEntry);
2678 
2679   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
2680                                         CapturedVars);
2681 }
2682 
2683 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
2684   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
2685 }
2686 
2687 void CodeGenFunction::EmitOMPCancellationPointDirective(
2688     const OMPCancellationPointDirective &S) {
2689   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
2690                                                    S.getCancelRegion());
2691 }
2692 
2693 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
2694   const Expr *IfCond = nullptr;
2695   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2696     if (C->getNameModifier() == OMPD_unknown ||
2697         C->getNameModifier() == OMPD_cancel) {
2698       IfCond = C->getCondition();
2699       break;
2700     }
2701   }
2702   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
2703                                         S.getCancelRegion());
2704 }
2705 
2706 CodeGenFunction::JumpDest
2707 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
2708   if (Kind == OMPD_parallel || Kind == OMPD_task)
2709     return ReturnBlock;
2710   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
2711          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
2712   return BreakContinueStack.back().BreakBlock;
2713 }
2714 
2715 // Generate the instructions for '#pragma omp target data' directive.
2716 void CodeGenFunction::EmitOMPTargetDataDirective(
2717     const OMPTargetDataDirective &S) {
2718   // emit the code inside the construct for now
2719   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2720   CGM.getOpenMPRuntime().emitInlinedDirective(
2721       *this, OMPD_target_data,
2722       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
2723 }
2724 
2725 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
2726     const OMPTargetEnterDataDirective &S) {
2727   // TODO: codegen for target enter data.
2728 }
2729 
2730 void CodeGenFunction::EmitOMPTargetExitDataDirective(
2731     const OMPTargetExitDataDirective &S) {
2732   // TODO: codegen for target exit data.
2733 }
2734 
2735 void CodeGenFunction::EmitOMPTargetParallelDirective(
2736     const OMPTargetParallelDirective &S) {
2737   // TODO: codegen for target parallel.
2738 }
2739 
2740 void CodeGenFunction::EmitOMPTargetParallelForDirective(
2741     const OMPTargetParallelForDirective &S) {
2742   // TODO: codegen for target parallel for.
2743 }
2744 
2745 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
2746   // emit the code inside the construct for now
2747   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2748   CGM.getOpenMPRuntime().emitInlinedDirective(
2749       *this, OMPD_taskloop,
2750       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
2751 }
2752 
2753 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
2754     const OMPTaskLoopSimdDirective &S) {
2755   // emit the code inside the construct for now
2756   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2757   CGM.getOpenMPRuntime().emitInlinedDirective(
2758       *this, OMPD_taskloop_simd,
2759       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
2760 }
2761 
2762