1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/Format.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <numeric>
43 
44 using namespace clang;
45 using namespace CodeGen;
46 using namespace llvm::omp;
47 
48 namespace {
49 /// Base class for handling code generation inside OpenMP regions.
50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51 public:
52   /// Kinds of OpenMP regions used in codegen.
53   enum CGOpenMPRegionKind {
54     /// Region with outlined function for standalone 'parallel'
55     /// directive.
56     ParallelOutlinedRegion,
57     /// Region with outlined function for standalone 'task' directive.
58     TaskOutlinedRegion,
59     /// Region for constructs that do not require function outlining,
60     /// like 'for', 'sections', 'atomic' etc. directives.
61     InlinedRegion,
62     /// Region with outlined function for standalone 'target' directive.
63     TargetRegion,
64   };
65 
66   CGOpenMPRegionInfo(const CapturedStmt &CS,
67                      const CGOpenMPRegionKind RegionKind,
68                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69                      bool HasCancel)
70       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72 
73   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75                      bool HasCancel)
76       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77         Kind(Kind), HasCancel(HasCancel) {}
78 
79   /// Get a variable or parameter for storing global thread id
80   /// inside OpenMP construct.
81   virtual const VarDecl *getThreadIDVariable() const = 0;
82 
83   /// Emit the captured statement body.
84   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85 
86   /// Get an LValue for the current ThreadID variable.
87   /// \return LValue for thread id variable. This LValue always has type int32*.
88   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89 
90   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91 
92   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93 
94   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95 
96   bool hasCancel() const { return HasCancel; }
97 
98   static bool classof(const CGCapturedStmtInfo *Info) {
99     return Info->getKind() == CR_OpenMP;
100   }
101 
102   ~CGOpenMPRegionInfo() override = default;
103 
104 protected:
105   CGOpenMPRegionKind RegionKind;
106   RegionCodeGenTy CodeGen;
107   OpenMPDirectiveKind Kind;
108   bool HasCancel;
109 };
110 
111 /// API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115                              const RegionCodeGenTy &CodeGen,
116                              OpenMPDirectiveKind Kind, bool HasCancel,
117                              StringRef HelperName)
118       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119                            HasCancel),
120         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
122   }
123 
124   /// Get a variable or parameter for storing global thread id
125   /// inside OpenMP construct.
126   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127 
128   /// Get the name of the capture helper.
129   StringRef getHelperName() const override { return HelperName; }
130 
131   static bool classof(const CGCapturedStmtInfo *Info) {
132     return CGOpenMPRegionInfo::classof(Info) &&
133            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134                ParallelOutlinedRegion;
135   }
136 
137 private:
138   /// A variable or parameter storing global thread id for OpenMP
139   /// constructs.
140   const VarDecl *ThreadIDVar;
141   StringRef HelperName;
142 };
143 
144 /// API for captured statement code generation in OpenMP constructs.
145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146 public:
147   class UntiedTaskActionTy final : public PrePostActionTy {
148     bool Untied;
149     const VarDecl *PartIDVar;
150     const RegionCodeGenTy UntiedCodeGen;
151     llvm::SwitchInst *UntiedSwitch = nullptr;
152 
153   public:
154     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155                        const RegionCodeGenTy &UntiedCodeGen)
156         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157     void Enter(CodeGenFunction &CGF) override {
158       if (Untied) {
159         // Emit task switching point.
160         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         llvm::Value *Res =
164             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167         CGF.EmitBlock(DoneBB);
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171                               CGF.Builder.GetInsertBlock());
172         emitUntiedSwitch(CGF);
173       }
174     }
175     void emitUntiedSwitch(CodeGenFunction &CGF) const {
176       if (Untied) {
177         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178             CGF.GetAddrOfLocalVar(PartIDVar),
179             PartIDVar->getType()->castAs<PointerType>());
180         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181                               PartIdLVal);
182         UntiedCodeGen(CGF);
183         CodeGenFunction::JumpDest CurPoint =
184             CGF.getJumpDestInCurrentScope(".untied.next.");
185         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188                               CGF.Builder.GetInsertBlock());
189         CGF.EmitBranchThroughCleanup(CurPoint);
190         CGF.EmitBlock(CurPoint.getBlock());
191       }
192     }
193     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194   };
195   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196                                  const VarDecl *ThreadIDVar,
197                                  const RegionCodeGenTy &CodeGen,
198                                  OpenMPDirectiveKind Kind, bool HasCancel,
199                                  const UntiedTaskActionTy &Action)
200       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201         ThreadIDVar(ThreadIDVar), Action(Action) {
202     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
203   }
204 
205   /// Get a variable or parameter for storing global thread id
206   /// inside OpenMP construct.
207   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208 
209   /// Get an LValue for the current ThreadID variable.
210   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211 
212   /// Get the name of the capture helper.
213   StringRef getHelperName() const override { return ".omp_outlined."; }
214 
215   void emitUntiedSwitch(CodeGenFunction &CGF) override {
216     Action.emitUntiedSwitch(CGF);
217   }
218 
219   static bool classof(const CGCapturedStmtInfo *Info) {
220     return CGOpenMPRegionInfo::classof(Info) &&
221            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222                TaskOutlinedRegion;
223   }
224 
225 private:
226   /// A variable or parameter storing global thread id for OpenMP
227   /// constructs.
228   const VarDecl *ThreadIDVar;
229   /// Action for emitting code for untied tasks.
230   const UntiedTaskActionTy &Action;
231 };
232 
233 /// API for inlined captured statement code generation in OpenMP
234 /// constructs.
235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236 public:
237   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238                             const RegionCodeGenTy &CodeGen,
239                             OpenMPDirectiveKind Kind, bool HasCancel)
240       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241         OldCSI(OldCSI),
242         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243 
244   // Retrieve the value of the context parameter.
245   llvm::Value *getContextValue() const override {
246     if (OuterRegionInfo)
247       return OuterRegionInfo->getContextValue();
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   void setContextValue(llvm::Value *V) override {
252     if (OuterRegionInfo) {
253       OuterRegionInfo->setContextValue(V);
254       return;
255     }
256     llvm_unreachable("No context value for inlined OpenMP region");
257   }
258 
259   /// Lookup the captured field decl for a variable.
260   const FieldDecl *lookup(const VarDecl *VD) const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->lookup(VD);
263     // If there is no outer outlined region,no need to lookup in a list of
264     // captured variables, we can use the original one.
265     return nullptr;
266   }
267 
268   FieldDecl *getThisFieldDecl() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThisFieldDecl();
271     return nullptr;
272   }
273 
274   /// Get a variable or parameter for storing global thread id
275   /// inside OpenMP construct.
276   const VarDecl *getThreadIDVariable() const override {
277     if (OuterRegionInfo)
278       return OuterRegionInfo->getThreadIDVariable();
279     return nullptr;
280   }
281 
282   /// Get an LValue for the current ThreadID variable.
283   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284     if (OuterRegionInfo)
285       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286     llvm_unreachable("No LValue for inlined OpenMP construct");
287   }
288 
289   /// Get the name of the capture helper.
290   StringRef getHelperName() const override {
291     if (auto *OuterRegionInfo = getOldCSI())
292       return OuterRegionInfo->getHelperName();
293     llvm_unreachable("No helper name for inlined OpenMP construct");
294   }
295 
296   void emitUntiedSwitch(CodeGenFunction &CGF) override {
297     if (OuterRegionInfo)
298       OuterRegionInfo->emitUntiedSwitch(CGF);
299   }
300 
301   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302 
303   static bool classof(const CGCapturedStmtInfo *Info) {
304     return CGOpenMPRegionInfo::classof(Info) &&
305            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306   }
307 
308   ~CGOpenMPInlinedRegionInfo() override = default;
309 
310 private:
311   /// CodeGen info about outer OpenMP region.
312   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313   CGOpenMPRegionInfo *OuterRegionInfo;
314 };
315 
316 /// API for captured statement code generation in OpenMP target
317 /// constructs. For this captures, implicit parameters are used instead of the
318 /// captured fields. The name of the target region has to be unique in a given
319 /// application so it is provided by the client, because only the client has
320 /// the information to generate that.
321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322 public:
323   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
325       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326                            /*HasCancel=*/false),
327         HelperName(HelperName) {}
328 
329   /// This is unused for target regions because each starts executing
330   /// with a single thread.
331   const VarDecl *getThreadIDVariable() const override { return nullptr; }
332 
333   /// Get the name of the capture helper.
334   StringRef getHelperName() const override { return HelperName; }
335 
336   static bool classof(const CGCapturedStmtInfo *Info) {
337     return CGOpenMPRegionInfo::classof(Info) &&
338            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339   }
340 
341 private:
342   StringRef HelperName;
343 };
344 
345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346   llvm_unreachable("No codegen for expressions");
347 }
348 /// API for generation of expressions captured in a innermost OpenMP
349 /// region.
350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351 public:
352   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354                                   OMPD_unknown,
355                                   /*HasCancel=*/false),
356         PrivScope(CGF) {
357     // Make sure the globals captured in the provided statement are local by
358     // using the privatization logic. We assume the same variable is not
359     // captured more than once.
360     for (const auto &C : CS.captures()) {
361       if (!C.capturesVariable() && !C.capturesVariableByCopy())
362         continue;
363 
364       const VarDecl *VD = C.getCapturedVar();
365       if (VD->isLocalVarDeclOrParm())
366         continue;
367 
368       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369                       /*RefersToEnclosingVariableOrCapture=*/false,
370                       VD->getType().getNonReferenceType(), VK_LValue,
371                       C.getLocation());
372       PrivScope.addPrivate(
373           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 namespace {
481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
482 /// Values for bit flags for marking which requires clauses have been used.
483 enum OpenMPOffloadingRequiresDirFlags : int64_t {
484   /// flag undefined.
485   OMP_REQ_UNDEFINED               = 0x000,
486   /// no requires clause present.
487   OMP_REQ_NONE                    = 0x001,
488   /// reverse_offload clause.
489   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
490   /// unified_address clause.
491   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
492   /// unified_shared_memory clause.
493   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
494   /// dynamic_allocators clause.
495   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
496   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
497 };
498 
499 enum OpenMPOffloadingReservedDeviceIDs {
500   /// Device ID if the device was not defined, runtime should get it
501   /// from environment variables in the spec.
502   OMP_DEVICEID_UNDEF = -1,
503 };
504 } // anonymous namespace
505 
506 /// Describes ident structure that describes a source location.
507 /// All descriptions are taken from
508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509 /// Original structure:
510 /// typedef struct ident {
511 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
512 ///                                  see above  */
513 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
514 ///                                  KMP_IDENT_KMPC identifies this union
515 ///                                  member  */
516 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
517 ///                                  see above */
518 ///#if USE_ITT_BUILD
519 ///                            /*  but currently used for storing
520 ///                                region-specific ITT */
521 ///                            /*  contextual information. */
522 ///#endif /* USE_ITT_BUILD */
523 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
524 ///                                 C++  */
525 ///    char const *psource;    /**< String describing the source location.
526 ///                            The string is composed of semi-colon separated
527 //                             fields which describe the source file,
528 ///                            the function and a pair of line numbers that
529 ///                            delimit the construct.
530 ///                             */
531 /// } ident_t;
532 enum IdentFieldIndex {
533   /// might be used in Fortran
534   IdentField_Reserved_1,
535   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536   IdentField_Flags,
537   /// Not really used in Fortran any more
538   IdentField_Reserved_2,
539   /// Source[4] in Fortran, do not use for C++
540   IdentField_Reserved_3,
541   /// String describing the source location. The string is composed of
542   /// semi-colon separated fields which describe the source file, the function
543   /// and a pair of line numbers that delimit the construct.
544   IdentField_PSource
545 };
546 
547 /// Schedule types for 'omp for' loops (these enumerators are taken from
548 /// the enum sched_type in kmp.h).
549 enum OpenMPSchedType {
550   /// Lower bound for default (unordered) versions.
551   OMP_sch_lower = 32,
552   OMP_sch_static_chunked = 33,
553   OMP_sch_static = 34,
554   OMP_sch_dynamic_chunked = 35,
555   OMP_sch_guided_chunked = 36,
556   OMP_sch_runtime = 37,
557   OMP_sch_auto = 38,
558   /// static with chunk adjustment (e.g., simd)
559   OMP_sch_static_balanced_chunked = 45,
560   /// Lower bound for 'ordered' versions.
561   OMP_ord_lower = 64,
562   OMP_ord_static_chunked = 65,
563   OMP_ord_static = 66,
564   OMP_ord_dynamic_chunked = 67,
565   OMP_ord_guided_chunked = 68,
566   OMP_ord_runtime = 69,
567   OMP_ord_auto = 70,
568   OMP_sch_default = OMP_sch_static,
569   /// dist_schedule types
570   OMP_dist_sch_static_chunked = 91,
571   OMP_dist_sch_static = 92,
572   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573   /// Set if the monotonic schedule modifier was present.
574   OMP_sch_modifier_monotonic = (1 << 29),
575   /// Set if the nonmonotonic schedule modifier was present.
576   OMP_sch_modifier_nonmonotonic = (1 << 30),
577 };
578 
579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580 /// region.
581 class CleanupTy final : public EHScopeStack::Cleanup {
582   PrePostActionTy *Action;
583 
584 public:
585   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587     if (!CGF.HaveInsertPoint())
588       return;
589     Action->Exit(CGF);
590   }
591 };
592 
593 } // anonymous namespace
594 
595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596   CodeGenFunction::RunCleanupsScope Scope(CGF);
597   if (PrePostAction) {
598     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599     Callback(CodeGen, CGF, *PrePostAction);
600   } else {
601     PrePostActionTy Action;
602     Callback(CodeGen, CGF, Action);
603   }
604 }
605 
606 /// Check if the combiner is a call to UDR combiner and if it is so return the
607 /// UDR decl used for reduction.
608 static const OMPDeclareReductionDecl *
609 getReductionInit(const Expr *ReductionOp) {
610   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612       if (const auto *DRE =
613               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615           return DRD;
616   return nullptr;
617 }
618 
619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620                                              const OMPDeclareReductionDecl *DRD,
621                                              const Expr *InitOp,
622                                              Address Private, Address Original,
623                                              QualType Ty) {
624   if (DRD->getInitializer()) {
625     std::pair<llvm::Function *, llvm::Function *> Reduction =
626         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627     const auto *CE = cast<CallExpr>(InitOp);
628     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631     const auto *LHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633     const auto *RHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637                             [=]() { return Private; });
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639                             [=]() { return Original; });
640     (void)PrivateScope.Privatize();
641     RValue Func = RValue::get(Reduction.second);
642     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643     CGF.EmitIgnoredExpr(InitOp);
644   } else {
645     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647     auto *GV = new llvm::GlobalVariable(
648         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649         llvm::GlobalValue::PrivateLinkage, Init, Name);
650     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651     RValue InitRVal;
652     switch (CGF.getEvaluationKind(Ty)) {
653     case TEK_Scalar:
654       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655       break;
656     case TEK_Complex:
657       InitRVal =
658           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659       break;
660     case TEK_Aggregate: {
661       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664                            /*IsInitializer=*/false);
665       return;
666     }
667     }
668     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671                          /*IsInitializer=*/false);
672   }
673 }
674 
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681                                  QualType Type, bool EmitDeclareReductionInit,
682                                  const Expr *Init,
683                                  const OMPDeclareReductionDecl *DRD,
684                                  Address SrcAddr = Address::invalid()) {
685   // Perform element-by-element initialization.
686   QualType ElementTy;
687 
688   // Drill down to the base element type on both arrays.
689   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691   if (DRD)
692     SrcAddr =
693         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 
695   llvm::Value *SrcBegin = nullptr;
696   if (DRD)
697     SrcBegin = SrcAddr.getPointer();
698   llvm::Value *DestBegin = DestAddr.getPointer();
699   // Cast from pointer to array type to pointer to single element.
700   llvm::Value *DestEnd =
701       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
702   // The basic structure here is a while-do loop.
703   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705   llvm::Value *IsEmpty =
706       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 
709   // Enter the loop body, making that address the current address.
710   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711   CGF.EmitBlock(BodyBB);
712 
713   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 
715   llvm::PHINode *SrcElementPHI = nullptr;
716   Address SrcElementCurrent = Address::invalid();
717   if (DRD) {
718     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                           "omp.arraycpy.srcElementPast");
720     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721     SrcElementCurrent =
722         Address(SrcElementPHI,
723                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724   }
725   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727   DestElementPHI->addIncoming(DestBegin, EntryBB);
728   Address DestElementCurrent =
729       Address(DestElementPHI,
730               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 
732   // Emit copy.
733   {
734     CodeGenFunction::RunCleanupsScope InitScope(CGF);
735     if (EmitDeclareReductionInit) {
736       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                        SrcElementCurrent, ElementTy);
738     } else
739       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                            /*IsInitializer=*/false);
741   }
742 
743   if (DRD) {
744     // Shift the address forward by one element.
745     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
747         "omp.arraycpy.dest.element");
748     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
749   }
750 
751   // Shift the address forward by one element.
752   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
753       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
754       "omp.arraycpy.dest.element");
755   // Check whether we've reached the end.
756   llvm::Value *Done =
757       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
758   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
759   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
760 
761   // Done.
762   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
763 }
764 
765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
766   return CGF.EmitOMPSharedLValue(E);
767 }
768 
769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
770                                             const Expr *E) {
771   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
772     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
773   return LValue();
774 }
775 
776 void ReductionCodeGen::emitAggregateInitialization(
777     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
778     const OMPDeclareReductionDecl *DRD) {
779   // Emit VarDecl with copy init for arrays.
780   // Get the address of the original variable captured in current
781   // captured region.
782   const auto *PrivateVD =
783       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
784   bool EmitDeclareReductionInit =
785       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
786   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
787                        EmitDeclareReductionInit,
788                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
789                                                 : PrivateVD->getInit(),
790                        DRD, SharedAddr);
791 }
792 
793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
794                                    ArrayRef<const Expr *> Origs,
795                                    ArrayRef<const Expr *> Privates,
796                                    ArrayRef<const Expr *> ReductionOps) {
797   ClausesData.reserve(Shareds.size());
798   SharedAddresses.reserve(Shareds.size());
799   Sizes.reserve(Shareds.size());
800   BaseDecls.reserve(Shareds.size());
801   const auto *IOrig = Origs.begin();
802   const auto *IPriv = Privates.begin();
803   const auto *IRed = ReductionOps.begin();
804   for (const Expr *Ref : Shareds) {
805     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
806     std::advance(IOrig, 1);
807     std::advance(IPriv, 1);
808     std::advance(IRed, 1);
809   }
810 }
811 
812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
813   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
814          "Number of generated lvalues must be exactly N.");
815   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
816   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
817   SharedAddresses.emplace_back(First, Second);
818   if (ClausesData[N].Shared == ClausesData[N].Ref) {
819     OrigAddresses.emplace_back(First, Second);
820   } else {
821     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
822     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
823     OrigAddresses.emplace_back(First, Second);
824   }
825 }
826 
827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
828   const auto *PrivateVD =
829       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
830   QualType PrivateType = PrivateVD->getType();
831   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
832   if (!PrivateType->isVariablyModifiedType()) {
833     Sizes.emplace_back(
834         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
835         nullptr);
836     return;
837   }
838   llvm::Value *Size;
839   llvm::Value *SizeInChars;
840   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
841   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
842   if (AsArraySection) {
843     Size = CGF.Builder.CreatePtrDiff(ElemType,
844                                      OrigAddresses[N].second.getPointer(CGF),
845                                      OrigAddresses[N].first.getPointer(CGF));
846     Size = CGF.Builder.CreateNUWAdd(
847         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
848     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
849   } else {
850     SizeInChars =
851         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
852     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
853   }
854   Sizes.emplace_back(SizeInChars, Size);
855   CodeGenFunction::OpaqueValueMapping OpaqueMap(
856       CGF,
857       cast<OpaqueValueExpr>(
858           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
859       RValue::get(Size));
860   CGF.EmitVariablyModifiedType(PrivateType);
861 }
862 
863 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
864                                          llvm::Value *Size) {
865   const auto *PrivateVD =
866       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
867   QualType PrivateType = PrivateVD->getType();
868   if (!PrivateType->isVariablyModifiedType()) {
869     assert(!Size && !Sizes[N].second &&
870            "Size should be nullptr for non-variably modified reduction "
871            "items.");
872     return;
873   }
874   CodeGenFunction::OpaqueValueMapping OpaqueMap(
875       CGF,
876       cast<OpaqueValueExpr>(
877           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
878       RValue::get(Size));
879   CGF.EmitVariablyModifiedType(PrivateType);
880 }
881 
882 void ReductionCodeGen::emitInitialization(
883     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
884     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
885   assert(SharedAddresses.size() > N && "No variable was generated");
886   const auto *PrivateVD =
887       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
888   const OMPDeclareReductionDecl *DRD =
889       getReductionInit(ClausesData[N].ReductionOp);
890   QualType PrivateType = PrivateVD->getType();
891   PrivateAddr = CGF.Builder.CreateElementBitCast(
892       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
893   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
894     if (DRD && DRD->getInitializer())
895       (void)DefaultInit(CGF);
896     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
897   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
898     (void)DefaultInit(CGF);
899     QualType SharedType = SharedAddresses[N].first.getType();
900     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
901                                      PrivateAddr, SharedAddr, SharedType);
902   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
903              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
904     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
905                          PrivateVD->getType().getQualifiers(),
906                          /*IsInitializer=*/false);
907   }
908 }
909 
910 bool ReductionCodeGen::needCleanups(unsigned N) {
911   const auto *PrivateVD =
912       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
913   QualType PrivateType = PrivateVD->getType();
914   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
915   return DTorKind != QualType::DK_none;
916 }
917 
918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
919                                     Address PrivateAddr) {
920   const auto *PrivateVD =
921       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922   QualType PrivateType = PrivateVD->getType();
923   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
924   if (needCleanups(N)) {
925     PrivateAddr = CGF.Builder.CreateElementBitCast(
926         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
927     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
928   }
929 }
930 
931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
932                           LValue BaseLV) {
933   BaseTy = BaseTy.getNonReferenceType();
934   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
935          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
936     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
937       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
938     } else {
939       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
940       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
941     }
942     BaseTy = BaseTy->getPointeeType();
943   }
944   return CGF.MakeAddrLValue(
945       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
946                                        CGF.ConvertTypeForMem(ElTy)),
947       BaseLV.getType(), BaseLV.getBaseInfo(),
948       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
949 }
950 
951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
952                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
953                           llvm::Value *Addr) {
954   Address Tmp = Address::invalid();
955   Address TopTmp = Address::invalid();
956   Address MostTopTmp = Address::invalid();
957   BaseTy = BaseTy.getNonReferenceType();
958   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
959          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
960     Tmp = CGF.CreateMemTemp(BaseTy);
961     if (TopTmp.isValid())
962       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
963     else
964       MostTopTmp = Tmp;
965     TopTmp = Tmp;
966     BaseTy = BaseTy->getPointeeType();
967   }
968   llvm::Type *Ty = BaseLVType;
969   if (Tmp.isValid())
970     Ty = Tmp.getElementType();
971   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
972   if (Tmp.isValid()) {
973     CGF.Builder.CreateStore(Addr, Tmp);
974     return MostTopTmp;
975   }
976   return Address(Addr, BaseLVAlignment);
977 }
978 
979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
980   const VarDecl *OrigVD = nullptr;
981   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
982     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
984       Base = TempOASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
990     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
992       Base = TempASE->getBase()->IgnoreParenImpCasts();
993     DE = cast<DeclRefExpr>(Base);
994     OrigVD = cast<VarDecl>(DE->getDecl());
995   }
996   return OrigVD;
997 }
998 
999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1000                                                Address PrivateAddr) {
1001   const DeclRefExpr *DE;
1002   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1003     BaseDecls.emplace_back(OrigVD);
1004     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1005     LValue BaseLValue =
1006         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1007                     OriginalBaseLValue);
1008     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1009     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1010         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1011         SharedAddr.getPointer());
1012     llvm::Value *PrivatePointer =
1013         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1014             PrivateAddr.getPointer(), SharedAddr.getType());
1015     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1016         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1017     return castToBase(CGF, OrigVD->getType(),
1018                       SharedAddresses[N].first.getType(),
1019                       OriginalBaseLValue.getAddress(CGF).getType(),
1020                       OriginalBaseLValue.getAlignment(), Ptr);
1021   }
1022   BaseDecls.emplace_back(
1023       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1024   return PrivateAddr;
1025 }
1026 
1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1028   const OMPDeclareReductionDecl *DRD =
1029       getReductionInit(ClausesData[N].ReductionOp);
1030   return DRD && DRD->getInitializer();
1031 }
1032 
1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1034   return CGF.EmitLoadOfPointerLValue(
1035       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1036       getThreadIDVariable()->getType()->castAs<PointerType>());
1037 }
1038 
1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1040   if (!CGF.HaveInsertPoint())
1041     return;
1042   // 1.2.2 OpenMP Language Terminology
1043   // Structured block - An executable statement with a single entry at the
1044   // top and a single exit at the bottom.
1045   // The point of exit cannot be a branch out of the structured block.
1046   // longjmp() and throw() must not violate the entry/exit criteria.
1047   CGF.EHStack.pushTerminate();
1048   if (S)
1049     CGF.incrementProfileCounter(S);
1050   CodeGen(CGF);
1051   CGF.EHStack.popTerminate();
1052 }
1053 
1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1055     CodeGenFunction &CGF) {
1056   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1057                             getThreadIDVariable()->getType(),
1058                             AlignmentSource::Decl);
1059 }
1060 
1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1062                                        QualType FieldTy) {
1063   auto *Field = FieldDecl::Create(
1064       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1065       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1066       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1067   Field->setAccess(AS_public);
1068   DC->addDecl(Field);
1069   return Field;
1070 }
1071 
1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1073                                  StringRef Separator)
1074     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1075       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1076   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1077 
1078   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1079   OMPBuilder.initialize();
1080   loadOffloadInfoMetadata();
1081 }
1082 
1083 void CGOpenMPRuntime::clear() {
1084   InternalVars.clear();
1085   // Clean non-target variable declarations possibly used only in debug info.
1086   for (const auto &Data : EmittedNonTargetVariables) {
1087     if (!Data.getValue().pointsToAliveValue())
1088       continue;
1089     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1090     if (!GV)
1091       continue;
1092     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1093       continue;
1094     GV->eraseFromParent();
1095   }
1096 }
1097 
1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1099   SmallString<128> Buffer;
1100   llvm::raw_svector_ostream OS(Buffer);
1101   StringRef Sep = FirstSeparator;
1102   for (StringRef Part : Parts) {
1103     OS << Sep << Part;
1104     Sep = Separator;
1105   }
1106   return std::string(OS.str());
1107 }
1108 
1109 static llvm::Function *
1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1111                           const Expr *CombinerInitializer, const VarDecl *In,
1112                           const VarDecl *Out, bool IsCombiner) {
1113   // void .omp_combiner.(Ty *in, Ty *out);
1114   ASTContext &C = CGM.getContext();
1115   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1116   FunctionArgList Args;
1117   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1118                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1119   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1120                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121   Args.push_back(&OmpOutParm);
1122   Args.push_back(&OmpInParm);
1123   const CGFunctionInfo &FnInfo =
1124       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1125   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1126   std::string Name = CGM.getOpenMPRuntime().getName(
1127       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1128   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1129                                     Name, &CGM.getModule());
1130   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1131   if (CGM.getLangOpts().Optimize) {
1132     Fn->removeFnAttr(llvm::Attribute::NoInline);
1133     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1134     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1135   }
1136   CodeGenFunction CGF(CGM);
1137   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1138   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1139   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1140                     Out->getLocation());
1141   CodeGenFunction::OMPPrivateScope Scope(CGF);
1142   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1143   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1144     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1145         .getAddress(CGF);
1146   });
1147   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1148   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1149     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1150         .getAddress(CGF);
1151   });
1152   (void)Scope.Privatize();
1153   if (!IsCombiner && Out->hasInit() &&
1154       !CGF.isTrivialInitializer(Out->getInit())) {
1155     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1156                          Out->getType().getQualifiers(),
1157                          /*IsInitializer=*/true);
1158   }
1159   if (CombinerInitializer)
1160     CGF.EmitIgnoredExpr(CombinerInitializer);
1161   Scope.ForceCleanup();
1162   CGF.FinishFunction();
1163   return Fn;
1164 }
1165 
1166 void CGOpenMPRuntime::emitUserDefinedReduction(
1167     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1168   if (UDRMap.count(D) > 0)
1169     return;
1170   llvm::Function *Combiner = emitCombinerOrInitializer(
1171       CGM, D->getType(), D->getCombiner(),
1172       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1173       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1174       /*IsCombiner=*/true);
1175   llvm::Function *Initializer = nullptr;
1176   if (const Expr *Init = D->getInitializer()) {
1177     Initializer = emitCombinerOrInitializer(
1178         CGM, D->getType(),
1179         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1180                                                                      : nullptr,
1181         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1182         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1183         /*IsCombiner=*/false);
1184   }
1185   UDRMap.try_emplace(D, Combiner, Initializer);
1186   if (CGF) {
1187     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1188     Decls.second.push_back(D);
1189   }
1190 }
1191 
1192 std::pair<llvm::Function *, llvm::Function *>
1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1194   auto I = UDRMap.find(D);
1195   if (I != UDRMap.end())
1196     return I->second;
1197   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1198   return UDRMap.lookup(D);
1199 }
1200 
1201 namespace {
1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1203 // Builder if one is present.
1204 struct PushAndPopStackRAII {
1205   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1206                       bool HasCancel, llvm::omp::Directive Kind)
1207       : OMPBuilder(OMPBuilder) {
1208     if (!OMPBuilder)
1209       return;
1210 
1211     // The following callback is the crucial part of clangs cleanup process.
1212     //
1213     // NOTE:
1214     // Once the OpenMPIRBuilder is used to create parallel regions (and
1215     // similar), the cancellation destination (Dest below) is determined via
1216     // IP. That means if we have variables to finalize we split the block at IP,
1217     // use the new block (=BB) as destination to build a JumpDest (via
1218     // getJumpDestInCurrentScope(BB)) which then is fed to
1219     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1220     // to push & pop an FinalizationInfo object.
1221     // The FiniCB will still be needed but at the point where the
1222     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1223     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1224       assert(IP.getBlock()->end() == IP.getPoint() &&
1225              "Clang CG should cause non-terminated block!");
1226       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1227       CGF.Builder.restoreIP(IP);
1228       CodeGenFunction::JumpDest Dest =
1229           CGF.getOMPCancelDestination(OMPD_parallel);
1230       CGF.EmitBranchThroughCleanup(Dest);
1231     };
1232 
1233     // TODO: Remove this once we emit parallel regions through the
1234     //       OpenMPIRBuilder as it can do this setup internally.
1235     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1236     OMPBuilder->pushFinalizationCB(std::move(FI));
1237   }
1238   ~PushAndPopStackRAII() {
1239     if (OMPBuilder)
1240       OMPBuilder->popFinalizationCB();
1241   }
1242   llvm::OpenMPIRBuilder *OMPBuilder;
1243 };
1244 } // namespace
1245 
1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1247     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1248     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1249     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1250   assert(ThreadIDVar->getType()->isPointerType() &&
1251          "thread id variable must be of type kmp_int32 *");
1252   CodeGenFunction CGF(CGM, true);
1253   bool HasCancel = false;
1254   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1255     HasCancel = OPD->hasCancel();
1256   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1257     HasCancel = OPD->hasCancel();
1258   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1259     HasCancel = OPSD->hasCancel();
1260   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1261     HasCancel = OPFD->hasCancel();
1262   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1263     HasCancel = OPFD->hasCancel();
1264   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1265     HasCancel = OPFD->hasCancel();
1266   else if (const auto *OPFD =
1267                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272 
1273   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1274   //       parallel region to make cancellation barriers work properly.
1275   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1276   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1277   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1278                                     HasCancel, OutlinedHelperName);
1279   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1280   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1281 }
1282 
1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1284     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1286   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1287   return emitParallelOrTeamsOutlinedFunction(
1288       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1289 }
1290 
1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1292     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1295   return emitParallelOrTeamsOutlinedFunction(
1296       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297 }
1298 
1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1300     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1302     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1303     bool Tied, unsigned &NumberOfParts) {
1304   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1305                                               PrePostActionTy &) {
1306     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1307     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1308     llvm::Value *TaskArgs[] = {
1309         UpLoc, ThreadID,
1310         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1311                                     TaskTVar->getType()->castAs<PointerType>())
1312             .getPointer(CGF)};
1313     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1314                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1315                         TaskArgs);
1316   };
1317   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1318                                                             UntiedCodeGen);
1319   CodeGen.setAction(Action);
1320   assert(!ThreadIDVar->getType()->isPointerType() &&
1321          "thread id variable must be of type kmp_int32 for tasks");
1322   const OpenMPDirectiveKind Region =
1323       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1324                                                       : OMPD_task;
1325   const CapturedStmt *CS = D.getCapturedStmt(Region);
1326   bool HasCancel = false;
1327   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1328     HasCancel = TD->hasCancel();
1329   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1330     HasCancel = TD->hasCancel();
1331   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1332     HasCancel = TD->hasCancel();
1333   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335 
1336   CodeGenFunction CGF(CGM, true);
1337   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1338                                         InnermostKind, HasCancel, Action);
1339   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1340   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1341   if (!Tied)
1342     NumberOfParts = Action.getNumberOfParts();
1343   return Res;
1344 }
1345 
1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1347                              const RecordDecl *RD, const CGRecordLayout &RL,
1348                              ArrayRef<llvm::Constant *> Data) {
1349   llvm::StructType *StructTy = RL.getLLVMType();
1350   unsigned PrevIdx = 0;
1351   ConstantInitBuilder CIBuilder(CGM);
1352   auto DI = Data.begin();
1353   for (const FieldDecl *FD : RD->fields()) {
1354     unsigned Idx = RL.getLLVMFieldNo(FD);
1355     // Fill the alignment.
1356     for (unsigned I = PrevIdx; I < Idx; ++I)
1357       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1358     PrevIdx = Idx + 1;
1359     Fields.add(*DI);
1360     ++DI;
1361   }
1362 }
1363 
1364 template <class... As>
1365 static llvm::GlobalVariable *
1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1367                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1368                    As &&... Args) {
1369   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1370   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1371   ConstantInitBuilder CIBuilder(CGM);
1372   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1373   buildStructValue(Fields, CGM, RD, RL, Data);
1374   return Fields.finishAndCreateGlobal(
1375       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1376       std::forward<As>(Args)...);
1377 }
1378 
1379 template <typename T>
1380 static void
1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1382                                          ArrayRef<llvm::Constant *> Data,
1383                                          T &Parent) {
1384   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1385   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1386   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1387   buildStructValue(Fields, CGM, RD, RL, Data);
1388   Fields.finishAndAddTo(Parent);
1389 }
1390 
1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1392                                              bool AtCurrentPoint) {
1393   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1394   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1395 
1396   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1397   if (AtCurrentPoint) {
1398     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1399         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1400   } else {
1401     Elem.second.ServiceInsertPt =
1402         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1403     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1404   }
1405 }
1406 
1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1408   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1409   if (Elem.second.ServiceInsertPt) {
1410     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1411     Elem.second.ServiceInsertPt = nullptr;
1412     Ptr->eraseFromParent();
1413   }
1414 }
1415 
1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1417                                                   SourceLocation Loc,
1418                                                   SmallString<128> &Buffer) {
1419   llvm::raw_svector_ostream OS(Buffer);
1420   // Build debug location
1421   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1422   OS << ";" << PLoc.getFilename() << ";";
1423   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1424     OS << FD->getQualifiedNameAsString();
1425   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1426   return OS.str();
1427 }
1428 
1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1430                                                  SourceLocation Loc,
1431                                                  unsigned Flags) {
1432   uint32_t SrcLocStrSize;
1433   llvm::Constant *SrcLocStr;
1434   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1435       Loc.isInvalid()) {
1436     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1437   } else {
1438     std::string FunctionName;
1439     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1440       FunctionName = FD->getQualifiedNameAsString();
1441     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1442     const char *FileName = PLoc.getFilename();
1443     unsigned Line = PLoc.getLine();
1444     unsigned Column = PLoc.getColumn();
1445     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1446                                                 Column, SrcLocStrSize);
1447   }
1448   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1449   return OMPBuilder.getOrCreateIdent(
1450       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1451 }
1452 
1453 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1454                                           SourceLocation Loc) {
1455   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1457   // the clang invariants used below might be broken.
1458   if (CGM.getLangOpts().OpenMPIRBuilder) {
1459     SmallString<128> Buffer;
1460     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1461     uint32_t SrcLocStrSize;
1462     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1463         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1464     return OMPBuilder.getOrCreateThreadID(
1465         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1466   }
1467 
1468   llvm::Value *ThreadID = nullptr;
1469   // Check whether we've already cached a load of the thread id in this
1470   // function.
1471   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472   if (I != OpenMPLocThreadIDMap.end()) {
1473     ThreadID = I->second.ThreadID;
1474     if (ThreadID != nullptr)
1475       return ThreadID;
1476   }
1477   // If exceptions are enabled, do not use parameter to avoid possible crash.
1478   if (auto *OMPRegionInfo =
1479           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1480     if (OMPRegionInfo->getThreadIDVariable()) {
1481       // Check if this an outlined function with thread id passed as argument.
1482       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1483       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1484       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1485           !CGF.getLangOpts().CXXExceptions ||
1486           CGF.Builder.GetInsertBlock() == TopBlock ||
1487           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1488           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1489               TopBlock ||
1490           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1491               CGF.Builder.GetInsertBlock()) {
1492         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1493         // If value loaded in entry block, cache it and use it everywhere in
1494         // function.
1495         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1496           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1497           Elem.second.ThreadID = ThreadID;
1498         }
1499         return ThreadID;
1500       }
1501     }
1502   }
1503 
1504   // This is not an outlined function region - need to call __kmpc_int32
1505   // kmpc_global_thread_num(ident_t *loc).
1506   // Generate thread id value and cache this value for use across the
1507   // function.
1508   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509   if (!Elem.second.ServiceInsertPt)
1510     setLocThreadIdInsertPt(CGF);
1511   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1512   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1513   llvm::CallInst *Call = CGF.Builder.CreateCall(
1514       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1515                                             OMPRTL___kmpc_global_thread_num),
1516       emitUpdateLocation(CGF, Loc));
1517   Call->setCallingConv(CGF.getRuntimeCC());
1518   Elem.second.ThreadID = Call;
1519   return Call;
1520 }
1521 
1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1523   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1524   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1525     clearLocThreadIdInsertPt(CGF);
1526     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1527   }
1528   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1529     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1530       UDRMap.erase(D);
1531     FunctionUDRMap.erase(CGF.CurFn);
1532   }
1533   auto I = FunctionUDMMap.find(CGF.CurFn);
1534   if (I != FunctionUDMMap.end()) {
1535     for(const auto *D : I->second)
1536       UDMMap.erase(D);
1537     FunctionUDMMap.erase(I);
1538   }
1539   LastprivateConditionalToTypes.erase(CGF.CurFn);
1540   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1541 }
1542 
1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1544   return OMPBuilder.IdentPtr;
1545 }
1546 
1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1548   if (!Kmpc_MicroTy) {
1549     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1550     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1551                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1552     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1553   }
1554   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1555 }
1556 
1557 llvm::FunctionCallee
1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1559                                              bool IsGPUDistribute) {
1560   assert((IVSize == 32 || IVSize == 64) &&
1561          "IV size is not compatible with the omp runtime");
1562   StringRef Name;
1563   if (IsGPUDistribute)
1564     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1565                                     : "__kmpc_distribute_static_init_4u")
1566                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1567                                     : "__kmpc_distribute_static_init_8u");
1568   else
1569     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1570                                     : "__kmpc_for_static_init_4u")
1571                         : (IVSigned ? "__kmpc_for_static_init_8"
1572                                     : "__kmpc_for_static_init_8u");
1573 
1574   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1575   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1576   llvm::Type *TypeParams[] = {
1577     getIdentTyPointerTy(),                     // loc
1578     CGM.Int32Ty,                               // tid
1579     CGM.Int32Ty,                               // schedtype
1580     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1581     PtrTy,                                     // p_lower
1582     PtrTy,                                     // p_upper
1583     PtrTy,                                     // p_stride
1584     ITy,                                       // incr
1585     ITy                                        // chunk
1586   };
1587   auto *FnTy =
1588       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1589   return CGM.CreateRuntimeFunction(FnTy, Name);
1590 }
1591 
1592 llvm::FunctionCallee
1593 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1594   assert((IVSize == 32 || IVSize == 64) &&
1595          "IV size is not compatible with the omp runtime");
1596   StringRef Name =
1597       IVSize == 32
1598           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1599           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1600   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1601   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1602                                CGM.Int32Ty,           // tid
1603                                CGM.Int32Ty,           // schedtype
1604                                ITy,                   // lower
1605                                ITy,                   // upper
1606                                ITy,                   // stride
1607                                ITy                    // chunk
1608   };
1609   auto *FnTy =
1610       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1611   return CGM.CreateRuntimeFunction(FnTy, Name);
1612 }
1613 
1614 llvm::FunctionCallee
1615 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1616   assert((IVSize == 32 || IVSize == 64) &&
1617          "IV size is not compatible with the omp runtime");
1618   StringRef Name =
1619       IVSize == 32
1620           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1621           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1622   llvm::Type *TypeParams[] = {
1623       getIdentTyPointerTy(), // loc
1624       CGM.Int32Ty,           // tid
1625   };
1626   auto *FnTy =
1627       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1628   return CGM.CreateRuntimeFunction(FnTy, Name);
1629 }
1630 
1631 llvm::FunctionCallee
1632 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1633   assert((IVSize == 32 || IVSize == 64) &&
1634          "IV size is not compatible with the omp runtime");
1635   StringRef Name =
1636       IVSize == 32
1637           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1638           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1639   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1640   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1641   llvm::Type *TypeParams[] = {
1642     getIdentTyPointerTy(),                     // loc
1643     CGM.Int32Ty,                               // tid
1644     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1645     PtrTy,                                     // p_lower
1646     PtrTy,                                     // p_upper
1647     PtrTy                                      // p_stride
1648   };
1649   auto *FnTy =
1650       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1651   return CGM.CreateRuntimeFunction(FnTy, Name);
1652 }
1653 
1654 /// Obtain information that uniquely identifies a target entry. This
1655 /// consists of the file and device IDs as well as line number associated with
1656 /// the relevant entry source location.
1657 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1658                                      unsigned &DeviceID, unsigned &FileID,
1659                                      unsigned &LineNum) {
1660   SourceManager &SM = C.getSourceManager();
1661 
1662   // The loc should be always valid and have a file ID (the user cannot use
1663   // #pragma directives in macros)
1664 
1665   assert(Loc.isValid() && "Source location is expected to be always valid.");
1666 
1667   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1668   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1669 
1670   llvm::sys::fs::UniqueID ID;
1671   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1672     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1673     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1674     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1675       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1676           << PLoc.getFilename() << EC.message();
1677   }
1678 
1679   DeviceID = ID.getDevice();
1680   FileID = ID.getFile();
1681   LineNum = PLoc.getLine();
1682 }
1683 
1684 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1685   if (CGM.getLangOpts().OpenMPSimd)
1686     return Address::invalid();
1687   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1688       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1689   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1690               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1691                HasRequiresUnifiedSharedMemory))) {
1692     SmallString<64> PtrName;
1693     {
1694       llvm::raw_svector_ostream OS(PtrName);
1695       OS << CGM.getMangledName(GlobalDecl(VD));
1696       if (!VD->isExternallyVisible()) {
1697         unsigned DeviceID, FileID, Line;
1698         getTargetEntryUniqueInfo(CGM.getContext(),
1699                                  VD->getCanonicalDecl()->getBeginLoc(),
1700                                  DeviceID, FileID, Line);
1701         OS << llvm::format("_%x", FileID);
1702       }
1703       OS << "_decl_tgt_ref_ptr";
1704     }
1705     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1706     if (!Ptr) {
1707       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1708       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1709                                         PtrName);
1710 
1711       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1712       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1713 
1714       if (!CGM.getLangOpts().OpenMPIsDevice)
1715         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1716       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1717     }
1718     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1719   }
1720   return Address::invalid();
1721 }
1722 
1723 llvm::Constant *
1724 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1725   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1726          !CGM.getContext().getTargetInfo().isTLSSupported());
1727   // Lookup the entry, lazily creating it if necessary.
1728   std::string Suffix = getName({"cache", ""});
1729   return getOrCreateInternalVariable(
1730       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1731 }
1732 
1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1734                                                 const VarDecl *VD,
1735                                                 Address VDAddr,
1736                                                 SourceLocation Loc) {
1737   if (CGM.getLangOpts().OpenMPUseTLS &&
1738       CGM.getContext().getTargetInfo().isTLSSupported())
1739     return VDAddr;
1740 
1741   llvm::Type *VarTy = VDAddr.getElementType();
1742   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1743                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1744                                                        CGM.Int8PtrTy),
1745                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1746                          getOrCreateThreadPrivateCache(VD)};
1747   return Address(CGF.EmitRuntimeCall(
1748                      OMPBuilder.getOrCreateRuntimeFunction(
1749                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1750                      Args),
1751                  VDAddr.getAlignment());
1752 }
1753 
1754 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1755     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1756     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1757   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1758   // library.
1759   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1760   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1761                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1762                       OMPLoc);
1763   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1764   // to register constructor/destructor for variable.
1765   llvm::Value *Args[] = {
1766       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1767       Ctor, CopyCtor, Dtor};
1768   CGF.EmitRuntimeCall(
1769       OMPBuilder.getOrCreateRuntimeFunction(
1770           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1771       Args);
1772 }
1773 
1774 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1775     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1776     bool PerformInit, CodeGenFunction *CGF) {
1777   if (CGM.getLangOpts().OpenMPUseTLS &&
1778       CGM.getContext().getTargetInfo().isTLSSupported())
1779     return nullptr;
1780 
1781   VD = VD->getDefinition(CGM.getContext());
1782   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1783     QualType ASTTy = VD->getType();
1784 
1785     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1786     const Expr *Init = VD->getAnyInitializer();
1787     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1788       // Generate function that re-emits the declaration's initializer into the
1789       // threadprivate copy of the variable VD
1790       CodeGenFunction CtorCGF(CGM);
1791       FunctionArgList Args;
1792       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1793                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1794                             ImplicitParamDecl::Other);
1795       Args.push_back(&Dst);
1796 
1797       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1798           CGM.getContext().VoidPtrTy, Args);
1799       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1800       std::string Name = getName({"__kmpc_global_ctor_", ""});
1801       llvm::Function *Fn =
1802           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1803       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1804                             Args, Loc, Loc);
1805       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1806           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807           CGM.getContext().VoidPtrTy, Dst.getLocation());
1808       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1809       Arg = CtorCGF.Builder.CreateElementBitCast(
1810           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1811       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1812                                /*IsInitializer=*/true);
1813       ArgVal = CtorCGF.EmitLoadOfScalar(
1814           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1815           CGM.getContext().VoidPtrTy, Dst.getLocation());
1816       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1817       CtorCGF.FinishFunction();
1818       Ctor = Fn;
1819     }
1820     if (VD->getType().isDestructedType() != QualType::DK_none) {
1821       // Generate function that emits destructor call for the threadprivate copy
1822       // of the variable VD
1823       CodeGenFunction DtorCGF(CGM);
1824       FunctionArgList Args;
1825       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1826                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1827                             ImplicitParamDecl::Other);
1828       Args.push_back(&Dst);
1829 
1830       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1831           CGM.getContext().VoidTy, Args);
1832       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1833       std::string Name = getName({"__kmpc_global_dtor_", ""});
1834       llvm::Function *Fn =
1835           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1836       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1837       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1838                             Loc, Loc);
1839       // Create a scope with an artificial location for the body of this function.
1840       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1841       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1842           DtorCGF.GetAddrOfLocalVar(&Dst),
1843           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1844       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1845                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1846                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1847       DtorCGF.FinishFunction();
1848       Dtor = Fn;
1849     }
1850     // Do not emit init function if it is not required.
1851     if (!Ctor && !Dtor)
1852       return nullptr;
1853 
1854     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1855     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1856                                                /*isVarArg=*/false)
1857                            ->getPointerTo();
1858     // Copying constructor for the threadprivate variable.
1859     // Must be NULL - reserved by runtime, but currently it requires that this
1860     // parameter is always NULL. Otherwise it fires assertion.
1861     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1862     if (Ctor == nullptr) {
1863       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1864                                              /*isVarArg=*/false)
1865                          ->getPointerTo();
1866       Ctor = llvm::Constant::getNullValue(CtorTy);
1867     }
1868     if (Dtor == nullptr) {
1869       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1870                                              /*isVarArg=*/false)
1871                          ->getPointerTo();
1872       Dtor = llvm::Constant::getNullValue(DtorTy);
1873     }
1874     if (!CGF) {
1875       auto *InitFunctionTy =
1876           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1877       std::string Name = getName({"__omp_threadprivate_init_", ""});
1878       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1879           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1880       CodeGenFunction InitCGF(CGM);
1881       FunctionArgList ArgList;
1882       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1883                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1884                             Loc, Loc);
1885       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886       InitCGF.FinishFunction();
1887       return InitFunction;
1888     }
1889     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1890   }
1891   return nullptr;
1892 }
1893 
1894 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1895                                                      llvm::GlobalVariable *Addr,
1896                                                      bool PerformInit) {
1897   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1898       !CGM.getLangOpts().OpenMPIsDevice)
1899     return false;
1900   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1901       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1902   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1903       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1904        HasRequiresUnifiedSharedMemory))
1905     return CGM.getLangOpts().OpenMPIsDevice;
1906   VD = VD->getDefinition(CGM.getContext());
1907   assert(VD && "Unknown VarDecl");
1908 
1909   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1910     return CGM.getLangOpts().OpenMPIsDevice;
1911 
1912   QualType ASTTy = VD->getType();
1913   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1914 
1915   // Produce the unique prefix to identify the new target regions. We use
1916   // the source location of the variable declaration which we know to not
1917   // conflict with any target region.
1918   unsigned DeviceID;
1919   unsigned FileID;
1920   unsigned Line;
1921   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1922   SmallString<128> Buffer, Out;
1923   {
1924     llvm::raw_svector_ostream OS(Buffer);
1925     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1926        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1927   }
1928 
1929   const Expr *Init = VD->getAnyInitializer();
1930   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1931     llvm::Constant *Ctor;
1932     llvm::Constant *ID;
1933     if (CGM.getLangOpts().OpenMPIsDevice) {
1934       // Generate function that re-emits the declaration's initializer into
1935       // the threadprivate copy of the variable VD
1936       CodeGenFunction CtorCGF(CGM);
1937 
1938       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1939       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1940       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1941           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1942       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1943       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1944                             FunctionArgList(), Loc, Loc);
1945       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1946       CtorCGF.EmitAnyExprToMem(Init,
1947                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1948                                Init->getType().getQualifiers(),
1949                                /*IsInitializer=*/true);
1950       CtorCGF.FinishFunction();
1951       Ctor = Fn;
1952       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1953       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1954     } else {
1955       Ctor = new llvm::GlobalVariable(
1956           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1957           llvm::GlobalValue::PrivateLinkage,
1958           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1959       ID = Ctor;
1960     }
1961 
1962     // Register the information for the entry associated with the constructor.
1963     Out.clear();
1964     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1965         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1966         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1967   }
1968   if (VD->getType().isDestructedType() != QualType::DK_none) {
1969     llvm::Constant *Dtor;
1970     llvm::Constant *ID;
1971     if (CGM.getLangOpts().OpenMPIsDevice) {
1972       // Generate function that emits destructor call for the threadprivate
1973       // copy of the variable VD
1974       CodeGenFunction DtorCGF(CGM);
1975 
1976       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1977       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1978       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1979           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1980       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1981       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1982                             FunctionArgList(), Loc, Loc);
1983       // Create a scope with an artificial location for the body of this
1984       // function.
1985       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1986       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1987                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1988                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1989       DtorCGF.FinishFunction();
1990       Dtor = Fn;
1991       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1992       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1993     } else {
1994       Dtor = new llvm::GlobalVariable(
1995           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1996           llvm::GlobalValue::PrivateLinkage,
1997           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1998       ID = Dtor;
1999     }
2000     // Register the information for the entry associated with the destructor.
2001     Out.clear();
2002     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2004         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2005   }
2006   return CGM.getLangOpts().OpenMPIsDevice;
2007 }
2008 
2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2010                                                           QualType VarType,
2011                                                           StringRef Name) {
2012   std::string Suffix = getName({"artificial", ""});
2013   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2014   llvm::GlobalVariable *GAddr =
2015       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2016   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2017       CGM.getTarget().isTLSSupported()) {
2018     GAddr->setThreadLocal(/*Val=*/true);
2019     return Address(GAddr, GAddr->getValueType(),
2020                    CGM.getContext().getTypeAlignInChars(VarType));
2021   }
2022   std::string CacheSuffix = getName({"cache", ""});
2023   llvm::Value *Args[] = {
2024       emitUpdateLocation(CGF, SourceLocation()),
2025       getThreadID(CGF, SourceLocation()),
2026       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2027       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2028                                 /*isSigned=*/false),
2029       getOrCreateInternalVariable(
2030           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2031   return Address(
2032       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2033           CGF.EmitRuntimeCall(
2034               OMPBuilder.getOrCreateRuntimeFunction(
2035                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2036               Args),
2037           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2038       CGM.getContext().getTypeAlignInChars(VarType));
2039 }
2040 
2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2042                                    const RegionCodeGenTy &ThenGen,
2043                                    const RegionCodeGenTy &ElseGen) {
2044   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2045 
2046   // If the condition constant folds and can be elided, try to avoid emitting
2047   // the condition and the dead arm of the if/else.
2048   bool CondConstant;
2049   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2050     if (CondConstant)
2051       ThenGen(CGF);
2052     else
2053       ElseGen(CGF);
2054     return;
2055   }
2056 
2057   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2058   // emit the conditional branch.
2059   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2060   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2061   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2062   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2063 
2064   // Emit the 'then' code.
2065   CGF.EmitBlock(ThenBlock);
2066   ThenGen(CGF);
2067   CGF.EmitBranch(ContBlock);
2068   // Emit the 'else' code if present.
2069   // There is no need to emit line number for unconditional branch.
2070   (void)ApplyDebugLocation::CreateEmpty(CGF);
2071   CGF.EmitBlock(ElseBlock);
2072   ElseGen(CGF);
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBranch(ContBlock);
2076   // Emit the continuation block for code after the if.
2077   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2078 }
2079 
2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2081                                        llvm::Function *OutlinedFn,
2082                                        ArrayRef<llvm::Value *> CapturedVars,
2083                                        const Expr *IfCond,
2084                                        llvm::Value *NumThreads) {
2085   if (!CGF.HaveInsertPoint())
2086     return;
2087   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2088   auto &M = CGM.getModule();
2089   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2090                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2091     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2092     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2093     llvm::Value *Args[] = {
2094         RTLoc,
2095         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2096         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2097     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2098     RealArgs.append(std::begin(Args), std::end(Args));
2099     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2100 
2101     llvm::FunctionCallee RTLFn =
2102         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2103     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2104   };
2105   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2106                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2107     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2108     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2109     // Build calls:
2110     // __kmpc_serialized_parallel(&Loc, GTid);
2111     llvm::Value *Args[] = {RTLoc, ThreadID};
2112     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2113                             M, OMPRTL___kmpc_serialized_parallel),
2114                         Args);
2115 
2116     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2117     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2118     Address ZeroAddrBound =
2119         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2120                                          /*Name=*/".bound.zero.addr");
2121     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2122     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2123     // ThreadId for serialized parallels is 0.
2124     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2125     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2126     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2127 
2128     // Ensure we do not inline the function. This is trivially true for the ones
2129     // passed to __kmpc_fork_call but the ones called in serialized regions
2130     // could be inlined. This is not a perfect but it is closer to the invariant
2131     // we want, namely, every data environment starts with a new function.
2132     // TODO: We should pass the if condition to the runtime function and do the
2133     //       handling there. Much cleaner code.
2134     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2135     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2136     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2137 
2138     // __kmpc_end_serialized_parallel(&Loc, GTid);
2139     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2140     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2141                             M, OMPRTL___kmpc_end_serialized_parallel),
2142                         EndArgs);
2143   };
2144   if (IfCond) {
2145     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2146   } else {
2147     RegionCodeGenTy ThenRCG(ThenGen);
2148     ThenRCG(CGF);
2149   }
2150 }
2151 
2152 // If we're inside an (outlined) parallel region, use the region info's
2153 // thread-ID variable (it is passed in a first argument of the outlined function
2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2155 // regular serial code region, get thread ID by calling kmp_int32
2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2157 // return the address of that temp.
2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2159                                              SourceLocation Loc) {
2160   if (auto *OMPRegionInfo =
2161           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2162     if (OMPRegionInfo->getThreadIDVariable())
2163       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2164 
2165   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2166   QualType Int32Ty =
2167       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2168   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2169   CGF.EmitStoreOfScalar(ThreadID,
2170                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2171 
2172   return ThreadIDTemp;
2173 }
2174 
2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2176     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2177   SmallString<256> Buffer;
2178   llvm::raw_svector_ostream Out(Buffer);
2179   Out << Name;
2180   StringRef RuntimeName = Out.str();
2181   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2182   if (Elem.second) {
2183     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2184            "OMP internal variable has different type than requested");
2185     return &*Elem.second;
2186   }
2187 
2188   return Elem.second = new llvm::GlobalVariable(
2189              CGM.getModule(), Ty, /*IsConstant*/ false,
2190              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2191              Elem.first(), /*InsertBefore=*/nullptr,
2192              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2193 }
2194 
2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2196   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2197   std::string Name = getName({Prefix, "var"});
2198   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2199 }
2200 
2201 namespace {
2202 /// Common pre(post)-action for different OpenMP constructs.
2203 class CommonActionTy final : public PrePostActionTy {
2204   llvm::FunctionCallee EnterCallee;
2205   ArrayRef<llvm::Value *> EnterArgs;
2206   llvm::FunctionCallee ExitCallee;
2207   ArrayRef<llvm::Value *> ExitArgs;
2208   bool Conditional;
2209   llvm::BasicBlock *ContBlock = nullptr;
2210 
2211 public:
2212   CommonActionTy(llvm::FunctionCallee EnterCallee,
2213                  ArrayRef<llvm::Value *> EnterArgs,
2214                  llvm::FunctionCallee ExitCallee,
2215                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2216       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2217         ExitArgs(ExitArgs), Conditional(Conditional) {}
2218   void Enter(CodeGenFunction &CGF) override {
2219     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2220     if (Conditional) {
2221       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2222       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2223       ContBlock = CGF.createBasicBlock("omp_if.end");
2224       // Generate the branch (If-stmt)
2225       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2226       CGF.EmitBlock(ThenBlock);
2227     }
2228   }
2229   void Done(CodeGenFunction &CGF) {
2230     // Emit the rest of blocks/branches
2231     CGF.EmitBranch(ContBlock);
2232     CGF.EmitBlock(ContBlock, true);
2233   }
2234   void Exit(CodeGenFunction &CGF) override {
2235     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2236   }
2237 };
2238 } // anonymous namespace
2239 
2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2241                                          StringRef CriticalName,
2242                                          const RegionCodeGenTy &CriticalOpGen,
2243                                          SourceLocation Loc, const Expr *Hint) {
2244   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2245   // CriticalOpGen();
2246   // __kmpc_end_critical(ident_t *, gtid, Lock);
2247   // Prepare arguments and build a call to __kmpc_critical
2248   if (!CGF.HaveInsertPoint())
2249     return;
2250   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2251                          getCriticalRegionLock(CriticalName)};
2252   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2253                                                 std::end(Args));
2254   if (Hint) {
2255     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2256         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2257   }
2258   CommonActionTy Action(
2259       OMPBuilder.getOrCreateRuntimeFunction(
2260           CGM.getModule(),
2261           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2262       EnterArgs,
2263       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2264                                             OMPRTL___kmpc_end_critical),
2265       Args);
2266   CriticalOpGen.setAction(Action);
2267   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2268 }
2269 
2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2271                                        const RegionCodeGenTy &MasterOpGen,
2272                                        SourceLocation Loc) {
2273   if (!CGF.HaveInsertPoint())
2274     return;
2275   // if(__kmpc_master(ident_t *, gtid)) {
2276   //   MasterOpGen();
2277   //   __kmpc_end_master(ident_t *, gtid);
2278   // }
2279   // Prepare arguments and build a call to __kmpc_master
2280   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2281   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2282                             CGM.getModule(), OMPRTL___kmpc_master),
2283                         Args,
2284                         OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_end_master),
2286                         Args,
2287                         /*Conditional=*/true);
2288   MasterOpGen.setAction(Action);
2289   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2290   Action.Done(CGF);
2291 }
2292 
2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2294                                        const RegionCodeGenTy &MaskedOpGen,
2295                                        SourceLocation Loc, const Expr *Filter) {
2296   if (!CGF.HaveInsertPoint())
2297     return;
2298   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2299   //   MaskedOpGen();
2300   //   __kmpc_end_masked(iden_t *, gtid);
2301   // }
2302   // Prepare arguments and build a call to __kmpc_masked
2303   llvm::Value *FilterVal = Filter
2304                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2305                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2306   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2307                          FilterVal};
2308   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2309                             getThreadID(CGF, Loc)};
2310   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2311                             CGM.getModule(), OMPRTL___kmpc_masked),
2312                         Args,
2313                         OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2315                         ArgsEnd,
2316                         /*Conditional=*/true);
2317   MaskedOpGen.setAction(Action);
2318   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2319   Action.Done(CGF);
2320 }
2321 
2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2323                                         SourceLocation Loc) {
2324   if (!CGF.HaveInsertPoint())
2325     return;
2326   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2327     OMPBuilder.createTaskyield(CGF.Builder);
2328   } else {
2329     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330     llvm::Value *Args[] = {
2331         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2334                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335                         Args);
2336   }
2337 
2338   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339     Region->emitUntiedSwitch(CGF);
2340 }
2341 
2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343                                           const RegionCodeGenTy &TaskgroupOpGen,
2344                                           SourceLocation Loc) {
2345   if (!CGF.HaveInsertPoint())
2346     return;
2347   // __kmpc_taskgroup(ident_t *, gtid);
2348   // TaskgroupOpGen();
2349   // __kmpc_end_taskgroup(ident_t *, gtid);
2350   // Prepare arguments and build a call to __kmpc_taskgroup
2351   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2353                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354                         Args,
2355                         OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357                         Args);
2358   TaskgroupOpGen.setAction(Action);
2359   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360 }
2361 
2362 /// Given an array of pointers to variables, project the address of a
2363 /// given variable.
2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365                                       unsigned Index, const VarDecl *Var) {
2366   // Pull out the pointer to the variable.
2367   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369 
2370   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2371   Addr = CGF.Builder.CreateElementBitCast(
2372       Addr, CGF.ConvertTypeForMem(Var->getType()));
2373   return Addr;
2374 }
2375 
2376 static llvm::Value *emitCopyprivateCopyFunction(
2377     CodeGenModule &CGM, llvm::Type *ArgsType,
2378     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2379     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2380     SourceLocation Loc) {
2381   ASTContext &C = CGM.getContext();
2382   // void copy_func(void *LHSArg, void *RHSArg);
2383   FunctionArgList Args;
2384   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385                            ImplicitParamDecl::Other);
2386   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387                            ImplicitParamDecl::Other);
2388   Args.push_back(&LHSArg);
2389   Args.push_back(&RHSArg);
2390   const auto &CGFI =
2391       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2392   std::string Name =
2393       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2394   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2395                                     llvm::GlobalValue::InternalLinkage, Name,
2396                                     &CGM.getModule());
2397   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2398   Fn->setDoesNotRecurse();
2399   CodeGenFunction CGF(CGM);
2400   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2401   // Dest = (void*[n])(LHSArg);
2402   // Src = (void*[n])(RHSArg);
2403   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2404       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2405       ArgsType), CGF.getPointerAlign());
2406   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2410   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2411   // ...
2412   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2413   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2414     const auto *DestVar =
2415         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2416     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2417 
2418     const auto *SrcVar =
2419         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2420     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2421 
2422     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2423     QualType Type = VD->getType();
2424     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2425   }
2426   CGF.FinishFunction();
2427   return Fn;
2428 }
2429 
2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2431                                        const RegionCodeGenTy &SingleOpGen,
2432                                        SourceLocation Loc,
2433                                        ArrayRef<const Expr *> CopyprivateVars,
2434                                        ArrayRef<const Expr *> SrcExprs,
2435                                        ArrayRef<const Expr *> DstExprs,
2436                                        ArrayRef<const Expr *> AssignmentOps) {
2437   if (!CGF.HaveInsertPoint())
2438     return;
2439   assert(CopyprivateVars.size() == SrcExprs.size() &&
2440          CopyprivateVars.size() == DstExprs.size() &&
2441          CopyprivateVars.size() == AssignmentOps.size());
2442   ASTContext &C = CGM.getContext();
2443   // int32 did_it = 0;
2444   // if(__kmpc_single(ident_t *, gtid)) {
2445   //   SingleOpGen();
2446   //   __kmpc_end_single(ident_t *, gtid);
2447   //   did_it = 1;
2448   // }
2449   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450   // <copy_func>, did_it);
2451 
2452   Address DidIt = Address::invalid();
2453   if (!CopyprivateVars.empty()) {
2454     // int32 did_it = 0;
2455     QualType KmpInt32Ty =
2456         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2457     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2458     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2459   }
2460   // Prepare arguments and build a call to __kmpc_single
2461   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2462   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2463                             CGM.getModule(), OMPRTL___kmpc_single),
2464                         Args,
2465                         OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_end_single),
2467                         Args,
2468                         /*Conditional=*/true);
2469   SingleOpGen.setAction(Action);
2470   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2471   if (DidIt.isValid()) {
2472     // did_it = 1;
2473     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2474   }
2475   Action.Done(CGF);
2476   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2477   // <copy_func>, did_it);
2478   if (DidIt.isValid()) {
2479     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2480     QualType CopyprivateArrayTy = C.getConstantArrayType(
2481         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2482         /*IndexTypeQuals=*/0);
2483     // Create a list of all private variables for copyprivate.
2484     Address CopyprivateList =
2485         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2486     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2487       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2488       CGF.Builder.CreateStore(
2489           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2490               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2491               CGF.VoidPtrTy),
2492           Elem);
2493     }
2494     // Build function that copies private values from single region to all other
2495     // threads in the corresponding parallel region.
2496     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2497         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2498         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2499     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2500     Address CL =
2501       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2502                                                       CGF.VoidPtrTy);
2503     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2504     llvm::Value *Args[] = {
2505         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2506         getThreadID(CGF, Loc),        // i32 <gtid>
2507         BufSize,                      // size_t <buf_size>
2508         CL.getPointer(),              // void *<copyprivate list>
2509         CpyFn,                        // void (*) (void *, void *) <copy_func>
2510         DidItVal                      // i32 did_it
2511     };
2512     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2513                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2514                         Args);
2515   }
2516 }
2517 
2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2519                                         const RegionCodeGenTy &OrderedOpGen,
2520                                         SourceLocation Loc, bool IsThreads) {
2521   if (!CGF.HaveInsertPoint())
2522     return;
2523   // __kmpc_ordered(ident_t *, gtid);
2524   // OrderedOpGen();
2525   // __kmpc_end_ordered(ident_t *, gtid);
2526   // Prepare arguments and build a call to __kmpc_ordered
2527   if (IsThreads) {
2528     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2529     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2530                               CGM.getModule(), OMPRTL___kmpc_ordered),
2531                           Args,
2532                           OMPBuilder.getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2534                           Args);
2535     OrderedOpGen.setAction(Action);
2536     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2537     return;
2538   }
2539   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540 }
2541 
2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2543   unsigned Flags;
2544   if (Kind == OMPD_for)
2545     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2546   else if (Kind == OMPD_sections)
2547     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2548   else if (Kind == OMPD_single)
2549     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2550   else if (Kind == OMPD_barrier)
2551     Flags = OMP_IDENT_BARRIER_EXPL;
2552   else
2553     Flags = OMP_IDENT_BARRIER_IMPL;
2554   return Flags;
2555 }
2556 
2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2558     CodeGenFunction &CGF, const OMPLoopDirective &S,
2559     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2560   // Check if the loop directive is actually a doacross loop directive. In this
2561   // case choose static, 1 schedule.
2562   if (llvm::any_of(
2563           S.getClausesOfKind<OMPOrderedClause>(),
2564           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2565     ScheduleKind = OMPC_SCHEDULE_static;
2566     // Chunk size is 1 in this case.
2567     llvm::APInt ChunkSize(32, 1);
2568     ChunkExpr = IntegerLiteral::Create(
2569         CGF.getContext(), ChunkSize,
2570         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2571         SourceLocation());
2572   }
2573 }
2574 
2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2576                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2577                                       bool ForceSimpleCall) {
2578   // Check if we should use the OMPBuilder
2579   auto *OMPRegionInfo =
2580       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2581   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2582     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2583         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2584     return;
2585   }
2586 
2587   if (!CGF.HaveInsertPoint())
2588     return;
2589   // Build call __kmpc_cancel_barrier(loc, thread_id);
2590   // Build call __kmpc_barrier(loc, thread_id);
2591   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2592   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2593   // thread_id);
2594   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2595                          getThreadID(CGF, Loc)};
2596   if (OMPRegionInfo) {
2597     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2598       llvm::Value *Result = CGF.EmitRuntimeCall(
2599           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2600                                                 OMPRTL___kmpc_cancel_barrier),
2601           Args);
2602       if (EmitChecks) {
2603         // if (__kmpc_cancel_barrier()) {
2604         //   exit from construct;
2605         // }
2606         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2607         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2608         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2609         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2610         CGF.EmitBlock(ExitBB);
2611         //   exit from construct;
2612         CodeGenFunction::JumpDest CancelDestination =
2613             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2614         CGF.EmitBranchThroughCleanup(CancelDestination);
2615         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2616       }
2617       return;
2618     }
2619   }
2620   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2621                           CGM.getModule(), OMPRTL___kmpc_barrier),
2622                       Args);
2623 }
2624 
2625 /// Map the OpenMP loop schedule to the runtime enumeration.
2626 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2627                                           bool Chunked, bool Ordered) {
2628   switch (ScheduleKind) {
2629   case OMPC_SCHEDULE_static:
2630     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2631                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2632   case OMPC_SCHEDULE_dynamic:
2633     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2634   case OMPC_SCHEDULE_guided:
2635     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2636   case OMPC_SCHEDULE_runtime:
2637     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2638   case OMPC_SCHEDULE_auto:
2639     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2640   case OMPC_SCHEDULE_unknown:
2641     assert(!Chunked && "chunk was specified but schedule kind not known");
2642     return Ordered ? OMP_ord_static : OMP_sch_static;
2643   }
2644   llvm_unreachable("Unexpected runtime schedule");
2645 }
2646 
2647 /// Map the OpenMP distribute schedule to the runtime enumeration.
2648 static OpenMPSchedType
2649 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2650   // only static is allowed for dist_schedule
2651   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2652 }
2653 
2654 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2655                                          bool Chunked) const {
2656   OpenMPSchedType Schedule =
2657       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2658   return Schedule == OMP_sch_static;
2659 }
2660 
2661 bool CGOpenMPRuntime::isStaticNonchunked(
2662     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2663   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2664   return Schedule == OMP_dist_sch_static;
2665 }
2666 
2667 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2668                                       bool Chunked) const {
2669   OpenMPSchedType Schedule =
2670       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2671   return Schedule == OMP_sch_static_chunked;
2672 }
2673 
2674 bool CGOpenMPRuntime::isStaticChunked(
2675     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2676   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2677   return Schedule == OMP_dist_sch_static_chunked;
2678 }
2679 
2680 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2681   OpenMPSchedType Schedule =
2682       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2683   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2684   return Schedule != OMP_sch_static;
2685 }
2686 
2687 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2688                                   OpenMPScheduleClauseModifier M1,
2689                                   OpenMPScheduleClauseModifier M2) {
2690   int Modifier = 0;
2691   switch (M1) {
2692   case OMPC_SCHEDULE_MODIFIER_monotonic:
2693     Modifier = OMP_sch_modifier_monotonic;
2694     break;
2695   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2696     Modifier = OMP_sch_modifier_nonmonotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_simd:
2699     if (Schedule == OMP_sch_static_chunked)
2700       Schedule = OMP_sch_static_balanced_chunked;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_last:
2703   case OMPC_SCHEDULE_MODIFIER_unknown:
2704     break;
2705   }
2706   switch (M2) {
2707   case OMPC_SCHEDULE_MODIFIER_monotonic:
2708     Modifier = OMP_sch_modifier_monotonic;
2709     break;
2710   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2711     Modifier = OMP_sch_modifier_nonmonotonic;
2712     break;
2713   case OMPC_SCHEDULE_MODIFIER_simd:
2714     if (Schedule == OMP_sch_static_chunked)
2715       Schedule = OMP_sch_static_balanced_chunked;
2716     break;
2717   case OMPC_SCHEDULE_MODIFIER_last:
2718   case OMPC_SCHEDULE_MODIFIER_unknown:
2719     break;
2720   }
2721   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2722   // If the static schedule kind is specified or if the ordered clause is
2723   // specified, and if the nonmonotonic modifier is not specified, the effect is
2724   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2725   // modifier is specified, the effect is as if the nonmonotonic modifier is
2726   // specified.
2727   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2728     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2729           Schedule == OMP_sch_static_balanced_chunked ||
2730           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2731           Schedule == OMP_dist_sch_static_chunked ||
2732           Schedule == OMP_dist_sch_static))
2733       Modifier = OMP_sch_modifier_nonmonotonic;
2734   }
2735   return Schedule | Modifier;
2736 }
2737 
2738 void CGOpenMPRuntime::emitForDispatchInit(
2739     CodeGenFunction &CGF, SourceLocation Loc,
2740     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2741     bool Ordered, const DispatchRTInput &DispatchValues) {
2742   if (!CGF.HaveInsertPoint())
2743     return;
2744   OpenMPSchedType Schedule = getRuntimeSchedule(
2745       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2746   assert(Ordered ||
2747          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2748           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2749           Schedule != OMP_sch_static_balanced_chunked));
2750   // Call __kmpc_dispatch_init(
2751   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2752   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2753   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2754 
2755   // If the Chunk was not specified in the clause - use default value 1.
2756   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2757                                             : CGF.Builder.getIntN(IVSize, 1);
2758   llvm::Value *Args[] = {
2759       emitUpdateLocation(CGF, Loc),
2760       getThreadID(CGF, Loc),
2761       CGF.Builder.getInt32(addMonoNonMonoModifier(
2762           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2763       DispatchValues.LB,                                     // Lower
2764       DispatchValues.UB,                                     // Upper
2765       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2766       Chunk                                                  // Chunk
2767   };
2768   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2769 }
2770 
2771 static void emitForStaticInitCall(
2772     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2773     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2774     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2775     const CGOpenMPRuntime::StaticRTInput &Values) {
2776   if (!CGF.HaveInsertPoint())
2777     return;
2778 
2779   assert(!Values.Ordered);
2780   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2781          Schedule == OMP_sch_static_balanced_chunked ||
2782          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2783          Schedule == OMP_dist_sch_static ||
2784          Schedule == OMP_dist_sch_static_chunked);
2785 
2786   // Call __kmpc_for_static_init(
2787   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2788   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2789   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2790   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2791   llvm::Value *Chunk = Values.Chunk;
2792   if (Chunk == nullptr) {
2793     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2794             Schedule == OMP_dist_sch_static) &&
2795            "expected static non-chunked schedule");
2796     // If the Chunk was not specified in the clause - use default value 1.
2797     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2798   } else {
2799     assert((Schedule == OMP_sch_static_chunked ||
2800             Schedule == OMP_sch_static_balanced_chunked ||
2801             Schedule == OMP_ord_static_chunked ||
2802             Schedule == OMP_dist_sch_static_chunked) &&
2803            "expected static chunked schedule");
2804   }
2805   llvm::Value *Args[] = {
2806       UpdateLocation,
2807       ThreadId,
2808       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2809                                                   M2)), // Schedule type
2810       Values.IL.getPointer(),                           // &isLastIter
2811       Values.LB.getPointer(),                           // &LB
2812       Values.UB.getPointer(),                           // &UB
2813       Values.ST.getPointer(),                           // &Stride
2814       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2815       Chunk                                             // Chunk
2816   };
2817   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2818 }
2819 
2820 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2821                                         SourceLocation Loc,
2822                                         OpenMPDirectiveKind DKind,
2823                                         const OpenMPScheduleTy &ScheduleKind,
2824                                         const StaticRTInput &Values) {
2825   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2826       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2827   assert(isOpenMPWorksharingDirective(DKind) &&
2828          "Expected loop-based or sections-based directive.");
2829   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2830                                              isOpenMPLoopDirective(DKind)
2831                                                  ? OMP_IDENT_WORK_LOOP
2832                                                  : OMP_IDENT_WORK_SECTIONS);
2833   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2834   llvm::FunctionCallee StaticInitFunction =
2835       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2836   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2837   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2838                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2839 }
2840 
2841 void CGOpenMPRuntime::emitDistributeStaticInit(
2842     CodeGenFunction &CGF, SourceLocation Loc,
2843     OpenMPDistScheduleClauseKind SchedKind,
2844     const CGOpenMPRuntime::StaticRTInput &Values) {
2845   OpenMPSchedType ScheduleNum =
2846       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2847   llvm::Value *UpdatedLocation =
2848       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2849   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2850   llvm::FunctionCallee StaticInitFunction;
2851   bool isGPUDistribute =
2852       CGM.getLangOpts().OpenMPIsDevice &&
2853       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2854   StaticInitFunction = createForStaticInitFunction(
2855       Values.IVSize, Values.IVSigned, isGPUDistribute);
2856 
2857   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2858                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2859                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2860 }
2861 
2862 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2863                                           SourceLocation Loc,
2864                                           OpenMPDirectiveKind DKind) {
2865   if (!CGF.HaveInsertPoint())
2866     return;
2867   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2868   llvm::Value *Args[] = {
2869       emitUpdateLocation(CGF, Loc,
2870                          isOpenMPDistributeDirective(DKind)
2871                              ? OMP_IDENT_WORK_DISTRIBUTE
2872                              : isOpenMPLoopDirective(DKind)
2873                                    ? OMP_IDENT_WORK_LOOP
2874                                    : OMP_IDENT_WORK_SECTIONS),
2875       getThreadID(CGF, Loc)};
2876   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2877   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2878       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2879     CGF.EmitRuntimeCall(
2880         OMPBuilder.getOrCreateRuntimeFunction(
2881             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2882         Args);
2883   else
2884     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2885                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2886                         Args);
2887 }
2888 
2889 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2890                                                  SourceLocation Loc,
2891                                                  unsigned IVSize,
2892                                                  bool IVSigned) {
2893   if (!CGF.HaveInsertPoint())
2894     return;
2895   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2896   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2897   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2898 }
2899 
2900 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2901                                           SourceLocation Loc, unsigned IVSize,
2902                                           bool IVSigned, Address IL,
2903                                           Address LB, Address UB,
2904                                           Address ST) {
2905   // Call __kmpc_dispatch_next(
2906   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2907   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2908   //          kmp_int[32|64] *p_stride);
2909   llvm::Value *Args[] = {
2910       emitUpdateLocation(CGF, Loc),
2911       getThreadID(CGF, Loc),
2912       IL.getPointer(), // &isLastIter
2913       LB.getPointer(), // &Lower
2914       UB.getPointer(), // &Upper
2915       ST.getPointer()  // &Stride
2916   };
2917   llvm::Value *Call =
2918       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2919   return CGF.EmitScalarConversion(
2920       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2921       CGF.getContext().BoolTy, Loc);
2922 }
2923 
2924 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2925                                            llvm::Value *NumThreads,
2926                                            SourceLocation Loc) {
2927   if (!CGF.HaveInsertPoint())
2928     return;
2929   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2930   llvm::Value *Args[] = {
2931       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2932       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2933   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2934                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2935                       Args);
2936 }
2937 
2938 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2939                                          ProcBindKind ProcBind,
2940                                          SourceLocation Loc) {
2941   if (!CGF.HaveInsertPoint())
2942     return;
2943   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2944   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2945   llvm::Value *Args[] = {
2946       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2947       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2948   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2949                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2950                       Args);
2951 }
2952 
2953 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2954                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2955   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2956     OMPBuilder.createFlush(CGF.Builder);
2957   } else {
2958     if (!CGF.HaveInsertPoint())
2959       return;
2960     // Build call void __kmpc_flush(ident_t *loc)
2961     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2962                             CGM.getModule(), OMPRTL___kmpc_flush),
2963                         emitUpdateLocation(CGF, Loc));
2964   }
2965 }
2966 
2967 namespace {
2968 /// Indexes of fields for type kmp_task_t.
2969 enum KmpTaskTFields {
2970   /// List of shared variables.
2971   KmpTaskTShareds,
2972   /// Task routine.
2973   KmpTaskTRoutine,
2974   /// Partition id for the untied tasks.
2975   KmpTaskTPartId,
2976   /// Function with call of destructors for private variables.
2977   Data1,
2978   /// Task priority.
2979   Data2,
2980   /// (Taskloops only) Lower bound.
2981   KmpTaskTLowerBound,
2982   /// (Taskloops only) Upper bound.
2983   KmpTaskTUpperBound,
2984   /// (Taskloops only) Stride.
2985   KmpTaskTStride,
2986   /// (Taskloops only) Is last iteration flag.
2987   KmpTaskTLastIter,
2988   /// (Taskloops only) Reduction data.
2989   KmpTaskTReductions,
2990 };
2991 } // anonymous namespace
2992 
2993 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2994   return OffloadEntriesTargetRegion.empty() &&
2995          OffloadEntriesDeviceGlobalVar.empty();
2996 }
2997 
2998 /// Initialize target region entry.
2999 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3000     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3001                                     StringRef ParentName, unsigned LineNum,
3002                                     unsigned Order) {
3003   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3004                                              "only required for the device "
3005                                              "code generation.");
3006   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3007       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3008                                    OMPTargetRegionEntryTargetRegion);
3009   ++OffloadingEntriesNum;
3010 }
3011 
3012 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3013     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3014                                   StringRef ParentName, unsigned LineNum,
3015                                   llvm::Constant *Addr, llvm::Constant *ID,
3016                                   OMPTargetRegionEntryKind Flags) {
3017   // If we are emitting code for a target, the entry is already initialized,
3018   // only has to be registered.
3019   if (CGM.getLangOpts().OpenMPIsDevice) {
3020     // This could happen if the device compilation is invoked standalone.
3021     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3022       return;
3023     auto &Entry =
3024         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3025     Entry.setAddress(Addr);
3026     Entry.setID(ID);
3027     Entry.setFlags(Flags);
3028   } else {
3029     if (Flags ==
3030             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3031         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3032                                  /*IgnoreAddressId*/ true))
3033       return;
3034     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3035            "Target region entry already registered!");
3036     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3037     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3038     ++OffloadingEntriesNum;
3039   }
3040 }
3041 
3042 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3043     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3044     bool IgnoreAddressId) const {
3045   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3046   if (PerDevice == OffloadEntriesTargetRegion.end())
3047     return false;
3048   auto PerFile = PerDevice->second.find(FileID);
3049   if (PerFile == PerDevice->second.end())
3050     return false;
3051   auto PerParentName = PerFile->second.find(ParentName);
3052   if (PerParentName == PerFile->second.end())
3053     return false;
3054   auto PerLine = PerParentName->second.find(LineNum);
3055   if (PerLine == PerParentName->second.end())
3056     return false;
3057   // Fail if this entry is already registered.
3058   if (!IgnoreAddressId &&
3059       (PerLine->second.getAddress() || PerLine->second.getID()))
3060     return false;
3061   return true;
3062 }
3063 
3064 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3065     const OffloadTargetRegionEntryInfoActTy &Action) {
3066   // Scan all target region entries and perform the provided action.
3067   for (const auto &D : OffloadEntriesTargetRegion)
3068     for (const auto &F : D.second)
3069       for (const auto &P : F.second)
3070         for (const auto &L : P.second)
3071           Action(D.first, F.first, P.first(), L.first, L.second);
3072 }
3073 
3074 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3075     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3076                                        OMPTargetGlobalVarEntryKind Flags,
3077                                        unsigned Order) {
3078   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3079                                              "only required for the device "
3080                                              "code generation.");
3081   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3082   ++OffloadingEntriesNum;
3083 }
3084 
3085 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3086     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3087                                      CharUnits VarSize,
3088                                      OMPTargetGlobalVarEntryKind Flags,
3089                                      llvm::GlobalValue::LinkageTypes Linkage) {
3090   if (CGM.getLangOpts().OpenMPIsDevice) {
3091     // This could happen if the device compilation is invoked standalone.
3092     if (!hasDeviceGlobalVarEntryInfo(VarName))
3093       return;
3094     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3095     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3096       if (Entry.getVarSize().isZero()) {
3097         Entry.setVarSize(VarSize);
3098         Entry.setLinkage(Linkage);
3099       }
3100       return;
3101     }
3102     Entry.setVarSize(VarSize);
3103     Entry.setLinkage(Linkage);
3104     Entry.setAddress(Addr);
3105   } else {
3106     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3107       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3108       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3109              "Entry not initialized!");
3110       if (Entry.getVarSize().isZero()) {
3111         Entry.setVarSize(VarSize);
3112         Entry.setLinkage(Linkage);
3113       }
3114       return;
3115     }
3116     OffloadEntriesDeviceGlobalVar.try_emplace(
3117         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3118     ++OffloadingEntriesNum;
3119   }
3120 }
3121 
3122 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3123     actOnDeviceGlobalVarEntriesInfo(
3124         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3125   // Scan all target region entries and perform the provided action.
3126   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3127     Action(E.getKey(), E.getValue());
3128 }
3129 
3130 void CGOpenMPRuntime::createOffloadEntry(
3131     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3132     llvm::GlobalValue::LinkageTypes Linkage) {
3133   StringRef Name = Addr->getName();
3134   llvm::Module &M = CGM.getModule();
3135   llvm::LLVMContext &C = M.getContext();
3136 
3137   // Create constant string with the name.
3138   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3139 
3140   std::string StringName = getName({"omp_offloading", "entry_name"});
3141   auto *Str = new llvm::GlobalVariable(
3142       M, StrPtrInit->getType(), /*isConstant=*/true,
3143       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3144   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3145 
3146   llvm::Constant *Data[] = {
3147       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3148       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3149       llvm::ConstantInt::get(CGM.SizeTy, Size),
3150       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3151       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3152   std::string EntryName = getName({"omp_offloading", "entry", ""});
3153   llvm::GlobalVariable *Entry = createGlobalStruct(
3154       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3155       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3156 
3157   // The entry has to be created in the section the linker expects it to be.
3158   Entry->setSection("omp_offloading_entries");
3159 }
3160 
3161 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3162   // Emit the offloading entries and metadata so that the device codegen side
3163   // can easily figure out what to emit. The produced metadata looks like
3164   // this:
3165   //
3166   // !omp_offload.info = !{!1, ...}
3167   //
3168   // Right now we only generate metadata for function that contain target
3169   // regions.
3170 
3171   // If we are in simd mode or there are no entries, we don't need to do
3172   // anything.
3173   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3174     return;
3175 
3176   llvm::Module &M = CGM.getModule();
3177   llvm::LLVMContext &C = M.getContext();
3178   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3179                          SourceLocation, StringRef>,
3180               16>
3181       OrderedEntries(OffloadEntriesInfoManager.size());
3182   llvm::SmallVector<StringRef, 16> ParentFunctions(
3183       OffloadEntriesInfoManager.size());
3184 
3185   // Auxiliary methods to create metadata values and strings.
3186   auto &&GetMDInt = [this](unsigned V) {
3187     return llvm::ConstantAsMetadata::get(
3188         llvm::ConstantInt::get(CGM.Int32Ty, V));
3189   };
3190 
3191   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3192 
3193   // Create the offloading info metadata node.
3194   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3195 
3196   // Create function that emits metadata for each target region entry;
3197   auto &&TargetRegionMetadataEmitter =
3198       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3199        &GetMDString](
3200           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3201           unsigned Line,
3202           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3203         // Generate metadata for target regions. Each entry of this metadata
3204         // contains:
3205         // - Entry 0 -> Kind of this type of metadata (0).
3206         // - Entry 1 -> Device ID of the file where the entry was identified.
3207         // - Entry 2 -> File ID of the file where the entry was identified.
3208         // - Entry 3 -> Mangled name of the function where the entry was
3209         // identified.
3210         // - Entry 4 -> Line in the file where the entry was identified.
3211         // - Entry 5 -> Order the entry was created.
3212         // The first element of the metadata node is the kind.
3213         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3214                                  GetMDInt(FileID),      GetMDString(ParentName),
3215                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3216 
3217         SourceLocation Loc;
3218         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3219                   E = CGM.getContext().getSourceManager().fileinfo_end();
3220              I != E; ++I) {
3221           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3222               I->getFirst()->getUniqueID().getFile() == FileID) {
3223             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3224                 I->getFirst(), Line, 1);
3225             break;
3226           }
3227         }
3228         // Save this entry in the right position of the ordered entries array.
3229         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3230         ParentFunctions[E.getOrder()] = ParentName;
3231 
3232         // Add metadata to the named metadata node.
3233         MD->addOperand(llvm::MDNode::get(C, Ops));
3234       };
3235 
3236   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3237       TargetRegionMetadataEmitter);
3238 
3239   // Create function that emits metadata for each device global variable entry;
3240   auto &&DeviceGlobalVarMetadataEmitter =
3241       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3242        MD](StringRef MangledName,
3243            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3244                &E) {
3245         // Generate metadata for global variables. Each entry of this metadata
3246         // contains:
3247         // - Entry 0 -> Kind of this type of metadata (1).
3248         // - Entry 1 -> Mangled name of the variable.
3249         // - Entry 2 -> Declare target kind.
3250         // - Entry 3 -> Order the entry was created.
3251         // The first element of the metadata node is the kind.
3252         llvm::Metadata *Ops[] = {
3253             GetMDInt(E.getKind()), GetMDString(MangledName),
3254             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3255 
3256         // Save this entry in the right position of the ordered entries array.
3257         OrderedEntries[E.getOrder()] =
3258             std::make_tuple(&E, SourceLocation(), MangledName);
3259 
3260         // Add metadata to the named metadata node.
3261         MD->addOperand(llvm::MDNode::get(C, Ops));
3262       };
3263 
3264   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3265       DeviceGlobalVarMetadataEmitter);
3266 
3267   for (const auto &E : OrderedEntries) {
3268     assert(std::get<0>(E) && "All ordered entries must exist!");
3269     if (const auto *CE =
3270             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3271                 std::get<0>(E))) {
3272       if (!CE->getID() || !CE->getAddress()) {
3273         // Do not blame the entry if the parent funtion is not emitted.
3274         StringRef FnName = ParentFunctions[CE->getOrder()];
3275         if (!CGM.GetGlobalValue(FnName))
3276           continue;
3277         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3278             DiagnosticsEngine::Error,
3279             "Offloading entry for target region in %0 is incorrect: either the "
3280             "address or the ID is invalid.");
3281         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3282         continue;
3283       }
3284       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3285                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3286     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3287                                              OffloadEntryInfoDeviceGlobalVar>(
3288                    std::get<0>(E))) {
3289       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3290           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3291               CE->getFlags());
3292       switch (Flags) {
3293       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3294         if (CGM.getLangOpts().OpenMPIsDevice &&
3295             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3296           continue;
3297         if (!CE->getAddress()) {
3298           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3299               DiagnosticsEngine::Error, "Offloading entry for declare target "
3300                                         "variable %0 is incorrect: the "
3301                                         "address is invalid.");
3302           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3303           continue;
3304         }
3305         // The vaiable has no definition - no need to add the entry.
3306         if (CE->getVarSize().isZero())
3307           continue;
3308         break;
3309       }
3310       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3311         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3312                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3313                "Declaret target link address is set.");
3314         if (CGM.getLangOpts().OpenMPIsDevice)
3315           continue;
3316         if (!CE->getAddress()) {
3317           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3318               DiagnosticsEngine::Error,
3319               "Offloading entry for declare target variable is incorrect: the "
3320               "address is invalid.");
3321           CGM.getDiags().Report(DiagID);
3322           continue;
3323         }
3324         break;
3325       }
3326       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3327                          CE->getVarSize().getQuantity(), Flags,
3328                          CE->getLinkage());
3329     } else {
3330       llvm_unreachable("Unsupported entry kind.");
3331     }
3332   }
3333 }
3334 
3335 /// Loads all the offload entries information from the host IR
3336 /// metadata.
3337 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3338   // If we are in target mode, load the metadata from the host IR. This code has
3339   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3340 
3341   if (!CGM.getLangOpts().OpenMPIsDevice)
3342     return;
3343 
3344   if (CGM.getLangOpts().OMPHostIRFile.empty())
3345     return;
3346 
3347   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3348   if (auto EC = Buf.getError()) {
3349     CGM.getDiags().Report(diag::err_cannot_open_file)
3350         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3351     return;
3352   }
3353 
3354   llvm::LLVMContext C;
3355   auto ME = expectedToErrorOrAndEmitErrors(
3356       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3357 
3358   if (auto EC = ME.getError()) {
3359     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3360         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3361     CGM.getDiags().Report(DiagID)
3362         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3363     return;
3364   }
3365 
3366   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3367   if (!MD)
3368     return;
3369 
3370   for (llvm::MDNode *MN : MD->operands()) {
3371     auto &&GetMDInt = [MN](unsigned Idx) {
3372       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3373       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3374     };
3375 
3376     auto &&GetMDString = [MN](unsigned Idx) {
3377       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3378       return V->getString();
3379     };
3380 
3381     switch (GetMDInt(0)) {
3382     default:
3383       llvm_unreachable("Unexpected metadata!");
3384       break;
3385     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3386         OffloadingEntryInfoTargetRegion:
3387       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3388           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3389           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3390           /*Order=*/GetMDInt(5));
3391       break;
3392     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3393         OffloadingEntryInfoDeviceGlobalVar:
3394       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3395           /*MangledName=*/GetMDString(1),
3396           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3397               /*Flags=*/GetMDInt(2)),
3398           /*Order=*/GetMDInt(3));
3399       break;
3400     }
3401   }
3402 }
3403 
3404 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3405   if (!KmpRoutineEntryPtrTy) {
3406     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3407     ASTContext &C = CGM.getContext();
3408     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3409     FunctionProtoType::ExtProtoInfo EPI;
3410     KmpRoutineEntryPtrQTy = C.getPointerType(
3411         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3412     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3413   }
3414 }
3415 
3416 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3417   // Make sure the type of the entry is already created. This is the type we
3418   // have to create:
3419   // struct __tgt_offload_entry{
3420   //   void      *addr;       // Pointer to the offload entry info.
3421   //                          // (function or global)
3422   //   char      *name;       // Name of the function or global.
3423   //   size_t     size;       // Size of the entry info (0 if it a function).
3424   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3425   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3426   // };
3427   if (TgtOffloadEntryQTy.isNull()) {
3428     ASTContext &C = CGM.getContext();
3429     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3430     RD->startDefinition();
3431     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3432     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3433     addFieldToRecordDecl(C, RD, C.getSizeType());
3434     addFieldToRecordDecl(
3435         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3436     addFieldToRecordDecl(
3437         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3438     RD->completeDefinition();
3439     RD->addAttr(PackedAttr::CreateImplicit(C));
3440     TgtOffloadEntryQTy = C.getRecordType(RD);
3441   }
3442   return TgtOffloadEntryQTy;
3443 }
3444 
3445 namespace {
3446 struct PrivateHelpersTy {
3447   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3448                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3449       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3450         PrivateElemInit(PrivateElemInit) {}
3451   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3452   const Expr *OriginalRef = nullptr;
3453   const VarDecl *Original = nullptr;
3454   const VarDecl *PrivateCopy = nullptr;
3455   const VarDecl *PrivateElemInit = nullptr;
3456   bool isLocalPrivate() const {
3457     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3458   }
3459 };
3460 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3461 } // anonymous namespace
3462 
3463 static bool isAllocatableDecl(const VarDecl *VD) {
3464   const VarDecl *CVD = VD->getCanonicalDecl();
3465   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3466     return false;
3467   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3468   // Use the default allocation.
3469   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3470            !AA->getAllocator());
3471 }
3472 
3473 static RecordDecl *
3474 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3475   if (!Privates.empty()) {
3476     ASTContext &C = CGM.getContext();
3477     // Build struct .kmp_privates_t. {
3478     //         /*  private vars  */
3479     //       };
3480     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3481     RD->startDefinition();
3482     for (const auto &Pair : Privates) {
3483       const VarDecl *VD = Pair.second.Original;
3484       QualType Type = VD->getType().getNonReferenceType();
3485       // If the private variable is a local variable with lvalue ref type,
3486       // allocate the pointer instead of the pointee type.
3487       if (Pair.second.isLocalPrivate()) {
3488         if (VD->getType()->isLValueReferenceType())
3489           Type = C.getPointerType(Type);
3490         if (isAllocatableDecl(VD))
3491           Type = C.getPointerType(Type);
3492       }
3493       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3494       if (VD->hasAttrs()) {
3495         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3496              E(VD->getAttrs().end());
3497              I != E; ++I)
3498           FD->addAttr(*I);
3499       }
3500     }
3501     RD->completeDefinition();
3502     return RD;
3503   }
3504   return nullptr;
3505 }
3506 
3507 static RecordDecl *
3508 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3509                          QualType KmpInt32Ty,
3510                          QualType KmpRoutineEntryPointerQTy) {
3511   ASTContext &C = CGM.getContext();
3512   // Build struct kmp_task_t {
3513   //         void *              shareds;
3514   //         kmp_routine_entry_t routine;
3515   //         kmp_int32           part_id;
3516   //         kmp_cmplrdata_t data1;
3517   //         kmp_cmplrdata_t data2;
3518   // For taskloops additional fields:
3519   //         kmp_uint64          lb;
3520   //         kmp_uint64          ub;
3521   //         kmp_int64           st;
3522   //         kmp_int32           liter;
3523   //         void *              reductions;
3524   //       };
3525   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3526   UD->startDefinition();
3527   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3528   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3529   UD->completeDefinition();
3530   QualType KmpCmplrdataTy = C.getRecordType(UD);
3531   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3532   RD->startDefinition();
3533   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3534   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3535   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3536   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3537   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3538   if (isOpenMPTaskLoopDirective(Kind)) {
3539     QualType KmpUInt64Ty =
3540         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3541     QualType KmpInt64Ty =
3542         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3543     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3544     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3545     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3546     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3547     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3548   }
3549   RD->completeDefinition();
3550   return RD;
3551 }
3552 
3553 static RecordDecl *
3554 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3555                                      ArrayRef<PrivateDataTy> Privates) {
3556   ASTContext &C = CGM.getContext();
3557   // Build struct kmp_task_t_with_privates {
3558   //         kmp_task_t task_data;
3559   //         .kmp_privates_t. privates;
3560   //       };
3561   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3562   RD->startDefinition();
3563   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3564   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3565     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3566   RD->completeDefinition();
3567   return RD;
3568 }
3569 
3570 /// Emit a proxy function which accepts kmp_task_t as the second
3571 /// argument.
3572 /// \code
3573 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3574 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3575 ///   For taskloops:
3576 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3577 ///   tt->reductions, tt->shareds);
3578 ///   return 0;
3579 /// }
3580 /// \endcode
3581 static llvm::Function *
3582 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3583                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3584                       QualType KmpTaskTWithPrivatesPtrQTy,
3585                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3586                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3587                       llvm::Value *TaskPrivatesMap) {
3588   ASTContext &C = CGM.getContext();
3589   FunctionArgList Args;
3590   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3591                             ImplicitParamDecl::Other);
3592   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3593                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3594                                 ImplicitParamDecl::Other);
3595   Args.push_back(&GtidArg);
3596   Args.push_back(&TaskTypeArg);
3597   const auto &TaskEntryFnInfo =
3598       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3599   llvm::FunctionType *TaskEntryTy =
3600       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3601   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3602   auto *TaskEntry = llvm::Function::Create(
3603       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3604   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3605   TaskEntry->setDoesNotRecurse();
3606   CodeGenFunction CGF(CGM);
3607   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3608                     Loc, Loc);
3609 
3610   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3611   // tt,
3612   // For taskloops:
3613   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3614   // tt->task_data.shareds);
3615   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3616       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3617   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3618       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3619       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3620   const auto *KmpTaskTWithPrivatesQTyRD =
3621       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3622   LValue Base =
3623       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3624   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3625   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3626   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3627   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3628 
3629   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3630   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3631   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3632       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3633       CGF.ConvertTypeForMem(SharedsPtrTy));
3634 
3635   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3636   llvm::Value *PrivatesParam;
3637   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3638     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3639     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3640         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3641   } else {
3642     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3643   }
3644 
3645   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3646                                TaskPrivatesMap,
3647                                CGF.Builder
3648                                    .CreatePointerBitCastOrAddrSpaceCast(
3649                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3650                                    .getPointer()};
3651   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3652                                           std::end(CommonArgs));
3653   if (isOpenMPTaskLoopDirective(Kind)) {
3654     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3655     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3656     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3657     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3658     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3659     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3660     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3661     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3662     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3663     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3664     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3665     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3666     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3667     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3668     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3669     CallArgs.push_back(LBParam);
3670     CallArgs.push_back(UBParam);
3671     CallArgs.push_back(StParam);
3672     CallArgs.push_back(LIParam);
3673     CallArgs.push_back(RParam);
3674   }
3675   CallArgs.push_back(SharedsParam);
3676 
3677   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3678                                                   CallArgs);
3679   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3680                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3681   CGF.FinishFunction();
3682   return TaskEntry;
3683 }
3684 
3685 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3686                                             SourceLocation Loc,
3687                                             QualType KmpInt32Ty,
3688                                             QualType KmpTaskTWithPrivatesPtrQTy,
3689                                             QualType KmpTaskTWithPrivatesQTy) {
3690   ASTContext &C = CGM.getContext();
3691   FunctionArgList Args;
3692   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3693                             ImplicitParamDecl::Other);
3694   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3695                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3696                                 ImplicitParamDecl::Other);
3697   Args.push_back(&GtidArg);
3698   Args.push_back(&TaskTypeArg);
3699   const auto &DestructorFnInfo =
3700       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3701   llvm::FunctionType *DestructorFnTy =
3702       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3703   std::string Name =
3704       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3705   auto *DestructorFn =
3706       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3707                              Name, &CGM.getModule());
3708   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3709                                     DestructorFnInfo);
3710   DestructorFn->setDoesNotRecurse();
3711   CodeGenFunction CGF(CGM);
3712   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3713                     Args, Loc, Loc);
3714 
3715   LValue Base = CGF.EmitLoadOfPointerLValue(
3716       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3717       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3718   const auto *KmpTaskTWithPrivatesQTyRD =
3719       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3720   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3721   Base = CGF.EmitLValueForField(Base, *FI);
3722   for (const auto *Field :
3723        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3724     if (QualType::DestructionKind DtorKind =
3725             Field->getType().isDestructedType()) {
3726       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3727       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3728     }
3729   }
3730   CGF.FinishFunction();
3731   return DestructorFn;
3732 }
3733 
3734 /// Emit a privates mapping function for correct handling of private and
3735 /// firstprivate variables.
3736 /// \code
3737 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3738 /// **noalias priv1,...,  <tyn> **noalias privn) {
3739 ///   *priv1 = &.privates.priv1;
3740 ///   ...;
3741 ///   *privn = &.privates.privn;
3742 /// }
3743 /// \endcode
3744 static llvm::Value *
3745 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3746                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3747                                ArrayRef<PrivateDataTy> Privates) {
3748   ASTContext &C = CGM.getContext();
3749   FunctionArgList Args;
3750   ImplicitParamDecl TaskPrivatesArg(
3751       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3752       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3753       ImplicitParamDecl::Other);
3754   Args.push_back(&TaskPrivatesArg);
3755   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3756   unsigned Counter = 1;
3757   for (const Expr *E : Data.PrivateVars) {
3758     Args.push_back(ImplicitParamDecl::Create(
3759         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3760         C.getPointerType(C.getPointerType(E->getType()))
3761             .withConst()
3762             .withRestrict(),
3763         ImplicitParamDecl::Other));
3764     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3765     PrivateVarsPos[VD] = Counter;
3766     ++Counter;
3767   }
3768   for (const Expr *E : Data.FirstprivateVars) {
3769     Args.push_back(ImplicitParamDecl::Create(
3770         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3771         C.getPointerType(C.getPointerType(E->getType()))
3772             .withConst()
3773             .withRestrict(),
3774         ImplicitParamDecl::Other));
3775     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3776     PrivateVarsPos[VD] = Counter;
3777     ++Counter;
3778   }
3779   for (const Expr *E : Data.LastprivateVars) {
3780     Args.push_back(ImplicitParamDecl::Create(
3781         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3782         C.getPointerType(C.getPointerType(E->getType()))
3783             .withConst()
3784             .withRestrict(),
3785         ImplicitParamDecl::Other));
3786     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3787     PrivateVarsPos[VD] = Counter;
3788     ++Counter;
3789   }
3790   for (const VarDecl *VD : Data.PrivateLocals) {
3791     QualType Ty = VD->getType().getNonReferenceType();
3792     if (VD->getType()->isLValueReferenceType())
3793       Ty = C.getPointerType(Ty);
3794     if (isAllocatableDecl(VD))
3795       Ty = C.getPointerType(Ty);
3796     Args.push_back(ImplicitParamDecl::Create(
3797         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3798         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3799         ImplicitParamDecl::Other));
3800     PrivateVarsPos[VD] = Counter;
3801     ++Counter;
3802   }
3803   const auto &TaskPrivatesMapFnInfo =
3804       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3805   llvm::FunctionType *TaskPrivatesMapTy =
3806       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3807   std::string Name =
3808       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3809   auto *TaskPrivatesMap = llvm::Function::Create(
3810       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3811       &CGM.getModule());
3812   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3813                                     TaskPrivatesMapFnInfo);
3814   if (CGM.getLangOpts().Optimize) {
3815     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3816     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3817     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3818   }
3819   CodeGenFunction CGF(CGM);
3820   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3821                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3822 
3823   // *privi = &.privates.privi;
3824   LValue Base = CGF.EmitLoadOfPointerLValue(
3825       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3826       TaskPrivatesArg.getType()->castAs<PointerType>());
3827   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3828   Counter = 0;
3829   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3830     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3831     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3832     LValue RefLVal =
3833         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3834     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3835         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3836     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3837     ++Counter;
3838   }
3839   CGF.FinishFunction();
3840   return TaskPrivatesMap;
3841 }
3842 
3843 /// Emit initialization for private variables in task-based directives.
3844 static void emitPrivatesInit(CodeGenFunction &CGF,
3845                              const OMPExecutableDirective &D,
3846                              Address KmpTaskSharedsPtr, LValue TDBase,
3847                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3848                              QualType SharedsTy, QualType SharedsPtrTy,
3849                              const OMPTaskDataTy &Data,
3850                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3851   ASTContext &C = CGF.getContext();
3852   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3853   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3854   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3855                                  ? OMPD_taskloop
3856                                  : OMPD_task;
3857   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3858   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3859   LValue SrcBase;
3860   bool IsTargetTask =
3861       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3862       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3863   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3864   // PointersArray, SizesArray, and MappersArray. The original variables for
3865   // these arrays are not captured and we get their addresses explicitly.
3866   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3867       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3868     SrcBase = CGF.MakeAddrLValue(
3869         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3870             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3871         SharedsTy);
3872   }
3873   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3874   for (const PrivateDataTy &Pair : Privates) {
3875     // Do not initialize private locals.
3876     if (Pair.second.isLocalPrivate()) {
3877       ++FI;
3878       continue;
3879     }
3880     const VarDecl *VD = Pair.second.PrivateCopy;
3881     const Expr *Init = VD->getAnyInitializer();
3882     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3883                              !CGF.isTrivialInitializer(Init)))) {
3884       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3885       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3886         const VarDecl *OriginalVD = Pair.second.Original;
3887         // Check if the variable is the target-based BasePointersArray,
3888         // PointersArray, SizesArray, or MappersArray.
3889         LValue SharedRefLValue;
3890         QualType Type = PrivateLValue.getType();
3891         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3892         if (IsTargetTask && !SharedField) {
3893           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3894                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3895                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3896                          ->getNumParams() == 0 &&
3897                  isa<TranslationUnitDecl>(
3898                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3899                          ->getDeclContext()) &&
3900                  "Expected artificial target data variable.");
3901           SharedRefLValue =
3902               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3903         } else if (ForDup) {
3904           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3905           SharedRefLValue = CGF.MakeAddrLValue(
3906               Address(SharedRefLValue.getPointer(CGF),
3907                       C.getDeclAlign(OriginalVD)),
3908               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3909               SharedRefLValue.getTBAAInfo());
3910         } else if (CGF.LambdaCaptureFields.count(
3911                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3912                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3913           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3914         } else {
3915           // Processing for implicitly captured variables.
3916           InlinedOpenMPRegionRAII Region(
3917               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3918               /*HasCancel=*/false, /*NoInheritance=*/true);
3919           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3920         }
3921         if (Type->isArrayType()) {
3922           // Initialize firstprivate array.
3923           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3924             // Perform simple memcpy.
3925             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3926           } else {
3927             // Initialize firstprivate array using element-by-element
3928             // initialization.
3929             CGF.EmitOMPAggregateAssign(
3930                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3931                 Type,
3932                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3933                                                   Address SrcElement) {
3934                   // Clean up any temporaries needed by the initialization.
3935                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3936                   InitScope.addPrivate(
3937                       Elem, [SrcElement]() -> Address { return SrcElement; });
3938                   (void)InitScope.Privatize();
3939                   // Emit initialization for single element.
3940                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3941                       CGF, &CapturesInfo);
3942                   CGF.EmitAnyExprToMem(Init, DestElement,
3943                                        Init->getType().getQualifiers(),
3944                                        /*IsInitializer=*/false);
3945                 });
3946           }
3947         } else {
3948           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3949           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3950             return SharedRefLValue.getAddress(CGF);
3951           });
3952           (void)InitScope.Privatize();
3953           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3954           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3955                              /*capturedByInit=*/false);
3956         }
3957       } else {
3958         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3959       }
3960     }
3961     ++FI;
3962   }
3963 }
3964 
3965 /// Check if duplication function is required for taskloops.
3966 static bool checkInitIsRequired(CodeGenFunction &CGF,
3967                                 ArrayRef<PrivateDataTy> Privates) {
3968   bool InitRequired = false;
3969   for (const PrivateDataTy &Pair : Privates) {
3970     if (Pair.second.isLocalPrivate())
3971       continue;
3972     const VarDecl *VD = Pair.second.PrivateCopy;
3973     const Expr *Init = VD->getAnyInitializer();
3974     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3975                                     !CGF.isTrivialInitializer(Init));
3976     if (InitRequired)
3977       break;
3978   }
3979   return InitRequired;
3980 }
3981 
3982 
3983 /// Emit task_dup function (for initialization of
3984 /// private/firstprivate/lastprivate vars and last_iter flag)
3985 /// \code
3986 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3987 /// lastpriv) {
3988 /// // setup lastprivate flag
3989 ///    task_dst->last = lastpriv;
3990 /// // could be constructor calls here...
3991 /// }
3992 /// \endcode
3993 static llvm::Value *
3994 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3995                     const OMPExecutableDirective &D,
3996                     QualType KmpTaskTWithPrivatesPtrQTy,
3997                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3998                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3999                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4000                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4001   ASTContext &C = CGM.getContext();
4002   FunctionArgList Args;
4003   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4004                            KmpTaskTWithPrivatesPtrQTy,
4005                            ImplicitParamDecl::Other);
4006   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4007                            KmpTaskTWithPrivatesPtrQTy,
4008                            ImplicitParamDecl::Other);
4009   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4010                                 ImplicitParamDecl::Other);
4011   Args.push_back(&DstArg);
4012   Args.push_back(&SrcArg);
4013   Args.push_back(&LastprivArg);
4014   const auto &TaskDupFnInfo =
4015       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4016   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4017   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4018   auto *TaskDup = llvm::Function::Create(
4019       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4020   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4021   TaskDup->setDoesNotRecurse();
4022   CodeGenFunction CGF(CGM);
4023   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4024                     Loc);
4025 
4026   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4027       CGF.GetAddrOfLocalVar(&DstArg),
4028       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4029   // task_dst->liter = lastpriv;
4030   if (WithLastIter) {
4031     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4032     LValue Base = CGF.EmitLValueForField(
4033         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4034     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4035     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4036         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4037     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4038   }
4039 
4040   // Emit initial values for private copies (if any).
4041   assert(!Privates.empty());
4042   Address KmpTaskSharedsPtr = Address::invalid();
4043   if (!Data.FirstprivateVars.empty()) {
4044     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4045         CGF.GetAddrOfLocalVar(&SrcArg),
4046         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4047     LValue Base = CGF.EmitLValueForField(
4048         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4049     KmpTaskSharedsPtr = Address(
4050         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4051                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4052                                                   KmpTaskTShareds)),
4053                              Loc),
4054         CGM.getNaturalTypeAlignment(SharedsTy));
4055   }
4056   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4057                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4058   CGF.FinishFunction();
4059   return TaskDup;
4060 }
4061 
4062 /// Checks if destructor function is required to be generated.
4063 /// \return true if cleanups are required, false otherwise.
4064 static bool
4065 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4066                          ArrayRef<PrivateDataTy> Privates) {
4067   for (const PrivateDataTy &P : Privates) {
4068     if (P.second.isLocalPrivate())
4069       continue;
4070     QualType Ty = P.second.Original->getType().getNonReferenceType();
4071     if (Ty.isDestructedType())
4072       return true;
4073   }
4074   return false;
4075 }
4076 
4077 namespace {
4078 /// Loop generator for OpenMP iterator expression.
4079 class OMPIteratorGeneratorScope final
4080     : public CodeGenFunction::OMPPrivateScope {
4081   CodeGenFunction &CGF;
4082   const OMPIteratorExpr *E = nullptr;
4083   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4084   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4085   OMPIteratorGeneratorScope() = delete;
4086   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4087 
4088 public:
4089   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4090       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4091     if (!E)
4092       return;
4093     SmallVector<llvm::Value *, 4> Uppers;
4094     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4095       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4096       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4097       addPrivate(VD, [&CGF, VD]() {
4098         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4099       });
4100       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4101       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4102         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4103                                  "counter.addr");
4104       });
4105     }
4106     Privatize();
4107 
4108     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4109       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4110       LValue CLVal =
4111           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4112                              HelperData.CounterVD->getType());
4113       // Counter = 0;
4114       CGF.EmitStoreOfScalar(
4115           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4116           CLVal);
4117       CodeGenFunction::JumpDest &ContDest =
4118           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4119       CodeGenFunction::JumpDest &ExitDest =
4120           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4121       // N = <number-of_iterations>;
4122       llvm::Value *N = Uppers[I];
4123       // cont:
4124       // if (Counter < N) goto body; else goto exit;
4125       CGF.EmitBlock(ContDest.getBlock());
4126       auto *CVal =
4127           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4128       llvm::Value *Cmp =
4129           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4130               ? CGF.Builder.CreateICmpSLT(CVal, N)
4131               : CGF.Builder.CreateICmpULT(CVal, N);
4132       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4133       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4134       // body:
4135       CGF.EmitBlock(BodyBB);
4136       // Iteri = Begini + Counter * Stepi;
4137       CGF.EmitIgnoredExpr(HelperData.Update);
4138     }
4139   }
4140   ~OMPIteratorGeneratorScope() {
4141     if (!E)
4142       return;
4143     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4144       // Counter = Counter + 1;
4145       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4146       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4147       // goto cont;
4148       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4149       // exit:
4150       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4151     }
4152   }
4153 };
4154 } // namespace
4155 
4156 static std::pair<llvm::Value *, llvm::Value *>
4157 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4158   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4159   llvm::Value *Addr;
4160   if (OASE) {
4161     const Expr *Base = OASE->getBase();
4162     Addr = CGF.EmitScalarExpr(Base);
4163   } else {
4164     Addr = CGF.EmitLValue(E).getPointer(CGF);
4165   }
4166   llvm::Value *SizeVal;
4167   QualType Ty = E->getType();
4168   if (OASE) {
4169     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4170     for (const Expr *SE : OASE->getDimensions()) {
4171       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4172       Sz = CGF.EmitScalarConversion(
4173           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4174       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4175     }
4176   } else if (const auto *ASE =
4177                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4178     LValue UpAddrLVal =
4179         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4180     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4181     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4182         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4183     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4184     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4185     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4186   } else {
4187     SizeVal = CGF.getTypeSize(Ty);
4188   }
4189   return std::make_pair(Addr, SizeVal);
4190 }
4191 
4192 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4193 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4194   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4195   if (KmpTaskAffinityInfoTy.isNull()) {
4196     RecordDecl *KmpAffinityInfoRD =
4197         C.buildImplicitRecord("kmp_task_affinity_info_t");
4198     KmpAffinityInfoRD->startDefinition();
4199     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4200     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4201     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4202     KmpAffinityInfoRD->completeDefinition();
4203     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4204   }
4205 }
4206 
4207 CGOpenMPRuntime::TaskResultTy
4208 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4209                               const OMPExecutableDirective &D,
4210                               llvm::Function *TaskFunction, QualType SharedsTy,
4211                               Address Shareds, const OMPTaskDataTy &Data) {
4212   ASTContext &C = CGM.getContext();
4213   llvm::SmallVector<PrivateDataTy, 4> Privates;
4214   // Aggregate privates and sort them by the alignment.
4215   const auto *I = Data.PrivateCopies.begin();
4216   for (const Expr *E : Data.PrivateVars) {
4217     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4218     Privates.emplace_back(
4219         C.getDeclAlign(VD),
4220         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4221                          /*PrivateElemInit=*/nullptr));
4222     ++I;
4223   }
4224   I = Data.FirstprivateCopies.begin();
4225   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4226   for (const Expr *E : Data.FirstprivateVars) {
4227     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4228     Privates.emplace_back(
4229         C.getDeclAlign(VD),
4230         PrivateHelpersTy(
4231             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4232             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4233     ++I;
4234     ++IElemInitRef;
4235   }
4236   I = Data.LastprivateCopies.begin();
4237   for (const Expr *E : Data.LastprivateVars) {
4238     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4239     Privates.emplace_back(
4240         C.getDeclAlign(VD),
4241         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4242                          /*PrivateElemInit=*/nullptr));
4243     ++I;
4244   }
4245   for (const VarDecl *VD : Data.PrivateLocals) {
4246     if (isAllocatableDecl(VD))
4247       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4248     else
4249       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4250   }
4251   llvm::stable_sort(Privates,
4252                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4253                       return L.first > R.first;
4254                     });
4255   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4256   // Build type kmp_routine_entry_t (if not built yet).
4257   emitKmpRoutineEntryT(KmpInt32Ty);
4258   // Build type kmp_task_t (if not built yet).
4259   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4260     if (SavedKmpTaskloopTQTy.isNull()) {
4261       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4262           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4263     }
4264     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4265   } else {
4266     assert((D.getDirectiveKind() == OMPD_task ||
4267             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4268             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4269            "Expected taskloop, task or target directive");
4270     if (SavedKmpTaskTQTy.isNull()) {
4271       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4272           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4273     }
4274     KmpTaskTQTy = SavedKmpTaskTQTy;
4275   }
4276   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4277   // Build particular struct kmp_task_t for the given task.
4278   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4279       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4280   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4281   QualType KmpTaskTWithPrivatesPtrQTy =
4282       C.getPointerType(KmpTaskTWithPrivatesQTy);
4283   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4284   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4285       KmpTaskTWithPrivatesTy->getPointerTo();
4286   llvm::Value *KmpTaskTWithPrivatesTySize =
4287       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4288   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4289 
4290   // Emit initial values for private copies (if any).
4291   llvm::Value *TaskPrivatesMap = nullptr;
4292   llvm::Type *TaskPrivatesMapTy =
4293       std::next(TaskFunction->arg_begin(), 3)->getType();
4294   if (!Privates.empty()) {
4295     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4296     TaskPrivatesMap =
4297         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4298     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4299         TaskPrivatesMap, TaskPrivatesMapTy);
4300   } else {
4301     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4302         cast<llvm::PointerType>(TaskPrivatesMapTy));
4303   }
4304   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4305   // kmp_task_t *tt);
4306   llvm::Function *TaskEntry = emitProxyTaskFunction(
4307       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4308       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4309       TaskPrivatesMap);
4310 
4311   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4312   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4313   // kmp_routine_entry_t *task_entry);
4314   // Task flags. Format is taken from
4315   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4316   // description of kmp_tasking_flags struct.
4317   enum {
4318     TiedFlag = 0x1,
4319     FinalFlag = 0x2,
4320     DestructorsFlag = 0x8,
4321     PriorityFlag = 0x20,
4322     DetachableFlag = 0x40,
4323   };
4324   unsigned Flags = Data.Tied ? TiedFlag : 0;
4325   bool NeedsCleanup = false;
4326   if (!Privates.empty()) {
4327     NeedsCleanup =
4328         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4329     if (NeedsCleanup)
4330       Flags = Flags | DestructorsFlag;
4331   }
4332   if (Data.Priority.getInt())
4333     Flags = Flags | PriorityFlag;
4334   if (D.hasClausesOfKind<OMPDetachClause>())
4335     Flags = Flags | DetachableFlag;
4336   llvm::Value *TaskFlags =
4337       Data.Final.getPointer()
4338           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4339                                      CGF.Builder.getInt32(FinalFlag),
4340                                      CGF.Builder.getInt32(/*C=*/0))
4341           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4342   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4343   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4344   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4345       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4346       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4347           TaskEntry, KmpRoutineEntryPtrTy)};
4348   llvm::Value *NewTask;
4349   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4350     // Check if we have any device clause associated with the directive.
4351     const Expr *Device = nullptr;
4352     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4353       Device = C->getDevice();
4354     // Emit device ID if any otherwise use default value.
4355     llvm::Value *DeviceID;
4356     if (Device)
4357       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4358                                            CGF.Int64Ty, /*isSigned=*/true);
4359     else
4360       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4361     AllocArgs.push_back(DeviceID);
4362     NewTask = CGF.EmitRuntimeCall(
4363         OMPBuilder.getOrCreateRuntimeFunction(
4364             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4365         AllocArgs);
4366   } else {
4367     NewTask =
4368         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4369                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4370                             AllocArgs);
4371   }
4372   // Emit detach clause initialization.
4373   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4374   // task_descriptor);
4375   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4376     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4377     LValue EvtLVal = CGF.EmitLValue(Evt);
4378 
4379     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4380     // int gtid, kmp_task_t *task);
4381     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4382     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4383     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4384     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4385         OMPBuilder.getOrCreateRuntimeFunction(
4386             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4387         {Loc, Tid, NewTask});
4388     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4389                                       Evt->getExprLoc());
4390     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4391   }
4392   // Process affinity clauses.
4393   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4394     // Process list of affinity data.
4395     ASTContext &C = CGM.getContext();
4396     Address AffinitiesArray = Address::invalid();
4397     // Calculate number of elements to form the array of affinity data.
4398     llvm::Value *NumOfElements = nullptr;
4399     unsigned NumAffinities = 0;
4400     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4401       if (const Expr *Modifier = C->getModifier()) {
4402         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4403         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4404           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4405           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4406           NumOfElements =
4407               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4408         }
4409       } else {
4410         NumAffinities += C->varlist_size();
4411       }
4412     }
4413     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4414     // Fields ids in kmp_task_affinity_info record.
4415     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4416 
4417     QualType KmpTaskAffinityInfoArrayTy;
4418     if (NumOfElements) {
4419       NumOfElements = CGF.Builder.CreateNUWAdd(
4420           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4421       auto *OVE = new (C) OpaqueValueExpr(
4422           Loc,
4423           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4424           VK_PRValue);
4425       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4426                                                     RValue::get(NumOfElements));
4427       KmpTaskAffinityInfoArrayTy =
4428           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4429                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4430       // Properly emit variable-sized array.
4431       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4432                                            ImplicitParamDecl::Other);
4433       CGF.EmitVarDecl(*PD);
4434       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4435       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4436                                                 /*isSigned=*/false);
4437     } else {
4438       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4439           KmpTaskAffinityInfoTy,
4440           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4441           ArrayType::Normal, /*IndexTypeQuals=*/0);
4442       AffinitiesArray =
4443           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4444       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4445       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4446                                              /*isSigned=*/false);
4447     }
4448 
4449     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4450     // Fill array by elements without iterators.
4451     unsigned Pos = 0;
4452     bool HasIterator = false;
4453     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4454       if (C->getModifier()) {
4455         HasIterator = true;
4456         continue;
4457       }
4458       for (const Expr *E : C->varlists()) {
4459         llvm::Value *Addr;
4460         llvm::Value *Size;
4461         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4462         LValue Base =
4463             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4464                                KmpTaskAffinityInfoTy);
4465         // affs[i].base_addr = &<Affinities[i].second>;
4466         LValue BaseAddrLVal = CGF.EmitLValueForField(
4467             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4468         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4469                               BaseAddrLVal);
4470         // affs[i].len = sizeof(<Affinities[i].second>);
4471         LValue LenLVal = CGF.EmitLValueForField(
4472             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4473         CGF.EmitStoreOfScalar(Size, LenLVal);
4474         ++Pos;
4475       }
4476     }
4477     LValue PosLVal;
4478     if (HasIterator) {
4479       PosLVal = CGF.MakeAddrLValue(
4480           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4481           C.getSizeType());
4482       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4483     }
4484     // Process elements with iterators.
4485     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4486       const Expr *Modifier = C->getModifier();
4487       if (!Modifier)
4488         continue;
4489       OMPIteratorGeneratorScope IteratorScope(
4490           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4491       for (const Expr *E : C->varlists()) {
4492         llvm::Value *Addr;
4493         llvm::Value *Size;
4494         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4495         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4496         LValue Base = CGF.MakeAddrLValue(
4497             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4498         // affs[i].base_addr = &<Affinities[i].second>;
4499         LValue BaseAddrLVal = CGF.EmitLValueForField(
4500             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4501         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4502                               BaseAddrLVal);
4503         // affs[i].len = sizeof(<Affinities[i].second>);
4504         LValue LenLVal = CGF.EmitLValueForField(
4505             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4506         CGF.EmitStoreOfScalar(Size, LenLVal);
4507         Idx = CGF.Builder.CreateNUWAdd(
4508             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4509         CGF.EmitStoreOfScalar(Idx, PosLVal);
4510       }
4511     }
4512     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4513     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4514     // naffins, kmp_task_affinity_info_t *affin_list);
4515     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4516     llvm::Value *GTid = getThreadID(CGF, Loc);
4517     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4518         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4519     // FIXME: Emit the function and ignore its result for now unless the
4520     // runtime function is properly implemented.
4521     (void)CGF.EmitRuntimeCall(
4522         OMPBuilder.getOrCreateRuntimeFunction(
4523             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4524         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4525   }
4526   llvm::Value *NewTaskNewTaskTTy =
4527       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4528           NewTask, KmpTaskTWithPrivatesPtrTy);
4529   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4530                                                KmpTaskTWithPrivatesQTy);
4531   LValue TDBase =
4532       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4533   // Fill the data in the resulting kmp_task_t record.
4534   // Copy shareds if there are any.
4535   Address KmpTaskSharedsPtr = Address::invalid();
4536   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4537     KmpTaskSharedsPtr =
4538         Address(CGF.EmitLoadOfScalar(
4539                     CGF.EmitLValueForField(
4540                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4541                                            KmpTaskTShareds)),
4542                     Loc),
4543                 CGM.getNaturalTypeAlignment(SharedsTy));
4544     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4545     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4546     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4547   }
4548   // Emit initial values for private copies (if any).
4549   TaskResultTy Result;
4550   if (!Privates.empty()) {
4551     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4552                      SharedsTy, SharedsPtrTy, Data, Privates,
4553                      /*ForDup=*/false);
4554     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4555         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4556       Result.TaskDupFn = emitTaskDupFunction(
4557           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4558           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4559           /*WithLastIter=*/!Data.LastprivateVars.empty());
4560     }
4561   }
4562   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4563   enum { Priority = 0, Destructors = 1 };
4564   // Provide pointer to function with destructors for privates.
4565   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4566   const RecordDecl *KmpCmplrdataUD =
4567       (*FI)->getType()->getAsUnionType()->getDecl();
4568   if (NeedsCleanup) {
4569     llvm::Value *DestructorFn = emitDestructorsFunction(
4570         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4571         KmpTaskTWithPrivatesQTy);
4572     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4573     LValue DestructorsLV = CGF.EmitLValueForField(
4574         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4575     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4576                               DestructorFn, KmpRoutineEntryPtrTy),
4577                           DestructorsLV);
4578   }
4579   // Set priority.
4580   if (Data.Priority.getInt()) {
4581     LValue Data2LV = CGF.EmitLValueForField(
4582         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4583     LValue PriorityLV = CGF.EmitLValueForField(
4584         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4585     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4586   }
4587   Result.NewTask = NewTask;
4588   Result.TaskEntry = TaskEntry;
4589   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4590   Result.TDBase = TDBase;
4591   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4592   return Result;
4593 }
4594 
4595 namespace {
4596 /// Dependence kind for RTL.
4597 enum RTLDependenceKindTy {
4598   DepIn = 0x01,
4599   DepInOut = 0x3,
4600   DepMutexInOutSet = 0x4,
4601   DepInOutSet = 0x8
4602 };
4603 /// Fields ids in kmp_depend_info record.
4604 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4605 } // namespace
4606 
4607 /// Translates internal dependency kind into the runtime kind.
4608 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4609   RTLDependenceKindTy DepKind;
4610   switch (K) {
4611   case OMPC_DEPEND_in:
4612     DepKind = DepIn;
4613     break;
4614   // Out and InOut dependencies must use the same code.
4615   case OMPC_DEPEND_out:
4616   case OMPC_DEPEND_inout:
4617     DepKind = DepInOut;
4618     break;
4619   case OMPC_DEPEND_mutexinoutset:
4620     DepKind = DepMutexInOutSet;
4621     break;
4622   case OMPC_DEPEND_inoutset:
4623     DepKind = DepInOutSet;
4624     break;
4625   case OMPC_DEPEND_source:
4626   case OMPC_DEPEND_sink:
4627   case OMPC_DEPEND_depobj:
4628   case OMPC_DEPEND_unknown:
4629     llvm_unreachable("Unknown task dependence type");
4630   }
4631   return DepKind;
4632 }
4633 
4634 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4635 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4636                            QualType &FlagsTy) {
4637   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4638   if (KmpDependInfoTy.isNull()) {
4639     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4640     KmpDependInfoRD->startDefinition();
4641     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4642     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4643     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4644     KmpDependInfoRD->completeDefinition();
4645     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4646   }
4647 }
4648 
4649 std::pair<llvm::Value *, LValue>
4650 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4651                                    SourceLocation Loc) {
4652   ASTContext &C = CGM.getContext();
4653   QualType FlagsTy;
4654   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4655   RecordDecl *KmpDependInfoRD =
4656       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4657   LValue Base = CGF.EmitLoadOfPointerLValue(
4658       DepobjLVal.getAddress(CGF),
4659       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4660   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4661   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4662           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4663   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4664                             Base.getTBAAInfo());
4665   Address DepObjAddr = CGF.Builder.CreateGEP(
4666       Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4667   LValue NumDepsBase = CGF.MakeAddrLValue(
4668       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4669   // NumDeps = deps[i].base_addr;
4670   LValue BaseAddrLVal = CGF.EmitLValueForField(
4671       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4672   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4673   return std::make_pair(NumDeps, Base);
4674 }
4675 
4676 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4677                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4678                            const OMPTaskDataTy::DependData &Data,
4679                            Address DependenciesArray) {
4680   CodeGenModule &CGM = CGF.CGM;
4681   ASTContext &C = CGM.getContext();
4682   QualType FlagsTy;
4683   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4684   RecordDecl *KmpDependInfoRD =
4685       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4686   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4687 
4688   OMPIteratorGeneratorScope IteratorScope(
4689       CGF, cast_or_null<OMPIteratorExpr>(
4690                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4691                                  : nullptr));
4692   for (const Expr *E : Data.DepExprs) {
4693     llvm::Value *Addr;
4694     llvm::Value *Size;
4695     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4696     LValue Base;
4697     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4698       Base = CGF.MakeAddrLValue(
4699           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4700     } else {
4701       LValue &PosLVal = *Pos.get<LValue *>();
4702       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4703       Base = CGF.MakeAddrLValue(
4704           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4705     }
4706     // deps[i].base_addr = &<Dependencies[i].second>;
4707     LValue BaseAddrLVal = CGF.EmitLValueForField(
4708         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4709     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4710                           BaseAddrLVal);
4711     // deps[i].len = sizeof(<Dependencies[i].second>);
4712     LValue LenLVal = CGF.EmitLValueForField(
4713         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4714     CGF.EmitStoreOfScalar(Size, LenLVal);
4715     // deps[i].flags = <Dependencies[i].first>;
4716     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4717     LValue FlagsLVal = CGF.EmitLValueForField(
4718         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4719     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4720                           FlagsLVal);
4721     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4722       ++(*P);
4723     } else {
4724       LValue &PosLVal = *Pos.get<LValue *>();
4725       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4726       Idx = CGF.Builder.CreateNUWAdd(Idx,
4727                                      llvm::ConstantInt::get(Idx->getType(), 1));
4728       CGF.EmitStoreOfScalar(Idx, PosLVal);
4729     }
4730   }
4731 }
4732 
4733 static SmallVector<llvm::Value *, 4>
4734 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4735                         const OMPTaskDataTy::DependData &Data) {
4736   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4737          "Expected depobj dependecy kind.");
4738   SmallVector<llvm::Value *, 4> Sizes;
4739   SmallVector<LValue, 4> SizeLVals;
4740   ASTContext &C = CGF.getContext();
4741   QualType FlagsTy;
4742   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4743   RecordDecl *KmpDependInfoRD =
4744       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4745   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4746   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4747   {
4748     OMPIteratorGeneratorScope IteratorScope(
4749         CGF, cast_or_null<OMPIteratorExpr>(
4750                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4751                                    : nullptr));
4752     for (const Expr *E : Data.DepExprs) {
4753       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4754       LValue Base = CGF.EmitLoadOfPointerLValue(
4755           DepobjLVal.getAddress(CGF),
4756           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4757       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4758           Base.getAddress(CGF), KmpDependInfoPtrT);
4759       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4760                                 Base.getTBAAInfo());
4761       Address DepObjAddr = CGF.Builder.CreateGEP(
4762           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4763       LValue NumDepsBase = CGF.MakeAddrLValue(
4764           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4765       // NumDeps = deps[i].base_addr;
4766       LValue BaseAddrLVal = CGF.EmitLValueForField(
4767           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4768       llvm::Value *NumDeps =
4769           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4770       LValue NumLVal = CGF.MakeAddrLValue(
4771           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4772           C.getUIntPtrType());
4773       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4774                               NumLVal.getAddress(CGF));
4775       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4776       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4777       CGF.EmitStoreOfScalar(Add, NumLVal);
4778       SizeLVals.push_back(NumLVal);
4779     }
4780   }
4781   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4782     llvm::Value *Size =
4783         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4784     Sizes.push_back(Size);
4785   }
4786   return Sizes;
4787 }
4788 
4789 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4790                                LValue PosLVal,
4791                                const OMPTaskDataTy::DependData &Data,
4792                                Address DependenciesArray) {
4793   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4794          "Expected depobj dependecy kind.");
4795   ASTContext &C = CGF.getContext();
4796   QualType FlagsTy;
4797   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4798   RecordDecl *KmpDependInfoRD =
4799       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4800   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4801   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4802   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4803   {
4804     OMPIteratorGeneratorScope IteratorScope(
4805         CGF, cast_or_null<OMPIteratorExpr>(
4806                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4807                                    : nullptr));
4808     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4809       const Expr *E = Data.DepExprs[I];
4810       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4811       LValue Base = CGF.EmitLoadOfPointerLValue(
4812           DepobjLVal.getAddress(CGF),
4813           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4814       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4815           Base.getAddress(CGF), KmpDependInfoPtrT);
4816       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4817                                 Base.getTBAAInfo());
4818 
4819       // Get number of elements in a single depobj.
4820       Address DepObjAddr = CGF.Builder.CreateGEP(
4821           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4822       LValue NumDepsBase = CGF.MakeAddrLValue(
4823           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4824       // NumDeps = deps[i].base_addr;
4825       LValue BaseAddrLVal = CGF.EmitLValueForField(
4826           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4827       llvm::Value *NumDeps =
4828           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4829 
4830       // memcopy dependency data.
4831       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4832           ElSize,
4833           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4834       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4835       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4836       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4837 
4838       // Increase pos.
4839       // pos += size;
4840       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4841       CGF.EmitStoreOfScalar(Add, PosLVal);
4842     }
4843   }
4844 }
4845 
4846 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4847     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4848     SourceLocation Loc) {
4849   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4850         return D.DepExprs.empty();
4851       }))
4852     return std::make_pair(nullptr, Address::invalid());
4853   // Process list of dependencies.
4854   ASTContext &C = CGM.getContext();
4855   Address DependenciesArray = Address::invalid();
4856   llvm::Value *NumOfElements = nullptr;
4857   unsigned NumDependencies = std::accumulate(
4858       Dependencies.begin(), Dependencies.end(), 0,
4859       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4860         return D.DepKind == OMPC_DEPEND_depobj
4861                    ? V
4862                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4863       });
4864   QualType FlagsTy;
4865   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4866   bool HasDepobjDeps = false;
4867   bool HasRegularWithIterators = false;
4868   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4869   llvm::Value *NumOfRegularWithIterators =
4870       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4871   // Calculate number of depobj dependecies and regular deps with the iterators.
4872   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4873     if (D.DepKind == OMPC_DEPEND_depobj) {
4874       SmallVector<llvm::Value *, 4> Sizes =
4875           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4876       for (llvm::Value *Size : Sizes) {
4877         NumOfDepobjElements =
4878             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4879       }
4880       HasDepobjDeps = true;
4881       continue;
4882     }
4883     // Include number of iterations, if any.
4884 
4885     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4886       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4887         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4888         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4889         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4890             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4891         NumOfRegularWithIterators =
4892             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4893       }
4894       HasRegularWithIterators = true;
4895       continue;
4896     }
4897   }
4898 
4899   QualType KmpDependInfoArrayTy;
4900   if (HasDepobjDeps || HasRegularWithIterators) {
4901     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4902                                            /*isSigned=*/false);
4903     if (HasDepobjDeps) {
4904       NumOfElements =
4905           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4906     }
4907     if (HasRegularWithIterators) {
4908       NumOfElements =
4909           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4910     }
4911     auto *OVE = new (C) OpaqueValueExpr(
4912         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4913         VK_PRValue);
4914     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4915                                                   RValue::get(NumOfElements));
4916     KmpDependInfoArrayTy =
4917         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4918                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4919     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4920     // Properly emit variable-sized array.
4921     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4922                                          ImplicitParamDecl::Other);
4923     CGF.EmitVarDecl(*PD);
4924     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4925     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4926                                               /*isSigned=*/false);
4927   } else {
4928     KmpDependInfoArrayTy = C.getConstantArrayType(
4929         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4930         ArrayType::Normal, /*IndexTypeQuals=*/0);
4931     DependenciesArray =
4932         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4933     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4934     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4935                                            /*isSigned=*/false);
4936   }
4937   unsigned Pos = 0;
4938   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4939     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4940         Dependencies[I].IteratorExpr)
4941       continue;
4942     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4943                    DependenciesArray);
4944   }
4945   // Copy regular dependecies with iterators.
4946   LValue PosLVal = CGF.MakeAddrLValue(
4947       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4948   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4949   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4950     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4951         !Dependencies[I].IteratorExpr)
4952       continue;
4953     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4954                    DependenciesArray);
4955   }
4956   // Copy final depobj arrays without iterators.
4957   if (HasDepobjDeps) {
4958     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4959       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4960         continue;
4961       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4962                          DependenciesArray);
4963     }
4964   }
4965   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4966       DependenciesArray, CGF.VoidPtrTy);
4967   return std::make_pair(NumOfElements, DependenciesArray);
4968 }
4969 
4970 Address CGOpenMPRuntime::emitDepobjDependClause(
4971     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4972     SourceLocation Loc) {
4973   if (Dependencies.DepExprs.empty())
4974     return Address::invalid();
4975   // Process list of dependencies.
4976   ASTContext &C = CGM.getContext();
4977   Address DependenciesArray = Address::invalid();
4978   unsigned NumDependencies = Dependencies.DepExprs.size();
4979   QualType FlagsTy;
4980   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4981   RecordDecl *KmpDependInfoRD =
4982       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4983 
4984   llvm::Value *Size;
4985   // Define type kmp_depend_info[<Dependencies.size()>];
4986   // For depobj reserve one extra element to store the number of elements.
4987   // It is required to handle depobj(x) update(in) construct.
4988   // kmp_depend_info[<Dependencies.size()>] deps;
4989   llvm::Value *NumDepsVal;
4990   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4991   if (const auto *IE =
4992           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4993     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4994     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4995       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4996       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4997       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4998     }
4999     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5000                                     NumDepsVal);
5001     CharUnits SizeInBytes =
5002         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5003     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5004     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5005     NumDepsVal =
5006         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5007   } else {
5008     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5009         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5010         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5011     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5012     Size = CGM.getSize(Sz.alignTo(Align));
5013     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5014   }
5015   // Need to allocate on the dynamic memory.
5016   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5017   // Use default allocator.
5018   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5019   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5020 
5021   llvm::Value *Addr =
5022       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5023                               CGM.getModule(), OMPRTL___kmpc_alloc),
5024                           Args, ".dep.arr.addr");
5025   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5026       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5027   DependenciesArray = Address(Addr, Align);
5028   // Write number of elements in the first element of array for depobj.
5029   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5030   // deps[i].base_addr = NumDependencies;
5031   LValue BaseAddrLVal = CGF.EmitLValueForField(
5032       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5033   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5034   llvm::PointerUnion<unsigned *, LValue *> Pos;
5035   unsigned Idx = 1;
5036   LValue PosLVal;
5037   if (Dependencies.IteratorExpr) {
5038     PosLVal = CGF.MakeAddrLValue(
5039         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5040         C.getSizeType());
5041     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5042                           /*IsInit=*/true);
5043     Pos = &PosLVal;
5044   } else {
5045     Pos = &Idx;
5046   }
5047   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5048   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5049       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5050   return DependenciesArray;
5051 }
5052 
5053 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5054                                         SourceLocation Loc) {
5055   ASTContext &C = CGM.getContext();
5056   QualType FlagsTy;
5057   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5058   LValue Base = CGF.EmitLoadOfPointerLValue(
5059       DepobjLVal.getAddress(CGF),
5060       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5061   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5062   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5063       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5064   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5065       Addr.getElementType(), Addr.getPointer(),
5066       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5067   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5068                                                                CGF.VoidPtrTy);
5069   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5070   // Use default allocator.
5071   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5072   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5073 
5074   // _kmpc_free(gtid, addr, nullptr);
5075   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5076                                 CGM.getModule(), OMPRTL___kmpc_free),
5077                             Args);
5078 }
5079 
5080 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5081                                        OpenMPDependClauseKind NewDepKind,
5082                                        SourceLocation Loc) {
5083   ASTContext &C = CGM.getContext();
5084   QualType FlagsTy;
5085   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5086   RecordDecl *KmpDependInfoRD =
5087       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5088   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5089   llvm::Value *NumDeps;
5090   LValue Base;
5091   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5092 
5093   Address Begin = Base.getAddress(CGF);
5094   // Cast from pointer to array type to pointer to single element.
5095   llvm::Value *End = CGF.Builder.CreateGEP(
5096       Begin.getElementType(), Begin.getPointer(), NumDeps);
5097   // The basic structure here is a while-do loop.
5098   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5099   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5100   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5101   CGF.EmitBlock(BodyBB);
5102   llvm::PHINode *ElementPHI =
5103       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5104   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5105   Begin = Address(ElementPHI, Begin.getAlignment());
5106   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5107                             Base.getTBAAInfo());
5108   // deps[i].flags = NewDepKind;
5109   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5110   LValue FlagsLVal = CGF.EmitLValueForField(
5111       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5112   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5113                         FlagsLVal);
5114 
5115   // Shift the address forward by one element.
5116   Address ElementNext =
5117       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5118   ElementPHI->addIncoming(ElementNext.getPointer(),
5119                           CGF.Builder.GetInsertBlock());
5120   llvm::Value *IsEmpty =
5121       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5122   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5123   // Done.
5124   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5125 }
5126 
5127 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5128                                    const OMPExecutableDirective &D,
5129                                    llvm::Function *TaskFunction,
5130                                    QualType SharedsTy, Address Shareds,
5131                                    const Expr *IfCond,
5132                                    const OMPTaskDataTy &Data) {
5133   if (!CGF.HaveInsertPoint())
5134     return;
5135 
5136   TaskResultTy Result =
5137       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5138   llvm::Value *NewTask = Result.NewTask;
5139   llvm::Function *TaskEntry = Result.TaskEntry;
5140   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5141   LValue TDBase = Result.TDBase;
5142   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5143   // Process list of dependences.
5144   Address DependenciesArray = Address::invalid();
5145   llvm::Value *NumOfElements;
5146   std::tie(NumOfElements, DependenciesArray) =
5147       emitDependClause(CGF, Data.Dependences, Loc);
5148 
5149   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5150   // libcall.
5151   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5152   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5153   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5154   // list is not empty
5155   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5156   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5157   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5158   llvm::Value *DepTaskArgs[7];
5159   if (!Data.Dependences.empty()) {
5160     DepTaskArgs[0] = UpLoc;
5161     DepTaskArgs[1] = ThreadID;
5162     DepTaskArgs[2] = NewTask;
5163     DepTaskArgs[3] = NumOfElements;
5164     DepTaskArgs[4] = DependenciesArray.getPointer();
5165     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5166     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5167   }
5168   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5169                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5170     if (!Data.Tied) {
5171       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5172       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5173       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5174     }
5175     if (!Data.Dependences.empty()) {
5176       CGF.EmitRuntimeCall(
5177           OMPBuilder.getOrCreateRuntimeFunction(
5178               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5179           DepTaskArgs);
5180     } else {
5181       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5182                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5183                           TaskArgs);
5184     }
5185     // Check if parent region is untied and build return for untied task;
5186     if (auto *Region =
5187             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5188       Region->emitUntiedSwitch(CGF);
5189   };
5190 
5191   llvm::Value *DepWaitTaskArgs[6];
5192   if (!Data.Dependences.empty()) {
5193     DepWaitTaskArgs[0] = UpLoc;
5194     DepWaitTaskArgs[1] = ThreadID;
5195     DepWaitTaskArgs[2] = NumOfElements;
5196     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5197     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5198     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5199   }
5200   auto &M = CGM.getModule();
5201   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5202                         TaskEntry, &Data, &DepWaitTaskArgs,
5203                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5204     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5205     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5206     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5207     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5208     // is specified.
5209     if (!Data.Dependences.empty())
5210       CGF.EmitRuntimeCall(
5211           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5212           DepWaitTaskArgs);
5213     // Call proxy_task_entry(gtid, new_task);
5214     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5215                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5216       Action.Enter(CGF);
5217       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5218       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5219                                                           OutlinedFnArgs);
5220     };
5221 
5222     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5223     // kmp_task_t *new_task);
5224     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5225     // kmp_task_t *new_task);
5226     RegionCodeGenTy RCG(CodeGen);
5227     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5228                               M, OMPRTL___kmpc_omp_task_begin_if0),
5229                           TaskArgs,
5230                           OMPBuilder.getOrCreateRuntimeFunction(
5231                               M, OMPRTL___kmpc_omp_task_complete_if0),
5232                           TaskArgs);
5233     RCG.setAction(Action);
5234     RCG(CGF);
5235   };
5236 
5237   if (IfCond) {
5238     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5239   } else {
5240     RegionCodeGenTy ThenRCG(ThenCodeGen);
5241     ThenRCG(CGF);
5242   }
5243 }
5244 
5245 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5246                                        const OMPLoopDirective &D,
5247                                        llvm::Function *TaskFunction,
5248                                        QualType SharedsTy, Address Shareds,
5249                                        const Expr *IfCond,
5250                                        const OMPTaskDataTy &Data) {
5251   if (!CGF.HaveInsertPoint())
5252     return;
5253   TaskResultTy Result =
5254       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5255   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5256   // libcall.
5257   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5258   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5259   // sched, kmp_uint64 grainsize, void *task_dup);
5260   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5261   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5262   llvm::Value *IfVal;
5263   if (IfCond) {
5264     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5265                                       /*isSigned=*/true);
5266   } else {
5267     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5268   }
5269 
5270   LValue LBLVal = CGF.EmitLValueForField(
5271       Result.TDBase,
5272       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5273   const auto *LBVar =
5274       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5275   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5276                        LBLVal.getQuals(),
5277                        /*IsInitializer=*/true);
5278   LValue UBLVal = CGF.EmitLValueForField(
5279       Result.TDBase,
5280       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5281   const auto *UBVar =
5282       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5283   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5284                        UBLVal.getQuals(),
5285                        /*IsInitializer=*/true);
5286   LValue StLVal = CGF.EmitLValueForField(
5287       Result.TDBase,
5288       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5289   const auto *StVar =
5290       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5291   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5292                        StLVal.getQuals(),
5293                        /*IsInitializer=*/true);
5294   // Store reductions address.
5295   LValue RedLVal = CGF.EmitLValueForField(
5296       Result.TDBase,
5297       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5298   if (Data.Reductions) {
5299     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5300   } else {
5301     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5302                                CGF.getContext().VoidPtrTy);
5303   }
5304   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5305   llvm::Value *TaskArgs[] = {
5306       UpLoc,
5307       ThreadID,
5308       Result.NewTask,
5309       IfVal,
5310       LBLVal.getPointer(CGF),
5311       UBLVal.getPointer(CGF),
5312       CGF.EmitLoadOfScalar(StLVal, Loc),
5313       llvm::ConstantInt::getSigned(
5314           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5315       llvm::ConstantInt::getSigned(
5316           CGF.IntTy, Data.Schedule.getPointer()
5317                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5318                          : NoSchedule),
5319       Data.Schedule.getPointer()
5320           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5321                                       /*isSigned=*/false)
5322           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5323       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5324                              Result.TaskDupFn, CGF.VoidPtrTy)
5325                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5326   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5327                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5328                       TaskArgs);
5329 }
5330 
5331 /// Emit reduction operation for each element of array (required for
5332 /// array sections) LHS op = RHS.
5333 /// \param Type Type of array.
5334 /// \param LHSVar Variable on the left side of the reduction operation
5335 /// (references element of array in original variable).
5336 /// \param RHSVar Variable on the right side of the reduction operation
5337 /// (references element of array in original variable).
5338 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5339 /// RHSVar.
5340 static void EmitOMPAggregateReduction(
5341     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5342     const VarDecl *RHSVar,
5343     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5344                                   const Expr *, const Expr *)> &RedOpGen,
5345     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5346     const Expr *UpExpr = nullptr) {
5347   // Perform element-by-element initialization.
5348   QualType ElementTy;
5349   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5350   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5351 
5352   // Drill down to the base element type on both arrays.
5353   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5354   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5355 
5356   llvm::Value *RHSBegin = RHSAddr.getPointer();
5357   llvm::Value *LHSBegin = LHSAddr.getPointer();
5358   // Cast from pointer to array type to pointer to single element.
5359   llvm::Value *LHSEnd =
5360       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5361   // The basic structure here is a while-do loop.
5362   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5363   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5364   llvm::Value *IsEmpty =
5365       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5366   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5367 
5368   // Enter the loop body, making that address the current address.
5369   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5370   CGF.EmitBlock(BodyBB);
5371 
5372   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5373 
5374   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5375       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5376   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5377   Address RHSElementCurrent =
5378       Address(RHSElementPHI,
5379               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5380 
5381   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5382       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5383   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5384   Address LHSElementCurrent =
5385       Address(LHSElementPHI,
5386               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5387 
5388   // Emit copy.
5389   CodeGenFunction::OMPPrivateScope Scope(CGF);
5390   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5391   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5392   Scope.Privatize();
5393   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5394   Scope.ForceCleanup();
5395 
5396   // Shift the address forward by one element.
5397   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5398       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5399       "omp.arraycpy.dest.element");
5400   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5401       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5402       "omp.arraycpy.src.element");
5403   // Check whether we've reached the end.
5404   llvm::Value *Done =
5405       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5406   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5407   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5408   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5409 
5410   // Done.
5411   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5412 }
5413 
5414 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5415 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5416 /// UDR combiner function.
5417 static void emitReductionCombiner(CodeGenFunction &CGF,
5418                                   const Expr *ReductionOp) {
5419   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5420     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5421       if (const auto *DRE =
5422               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5423         if (const auto *DRD =
5424                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5425           std::pair<llvm::Function *, llvm::Function *> Reduction =
5426               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5427           RValue Func = RValue::get(Reduction.first);
5428           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5429           CGF.EmitIgnoredExpr(ReductionOp);
5430           return;
5431         }
5432   CGF.EmitIgnoredExpr(ReductionOp);
5433 }
5434 
5435 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5436     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5437     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5438     ArrayRef<const Expr *> ReductionOps) {
5439   ASTContext &C = CGM.getContext();
5440 
5441   // void reduction_func(void *LHSArg, void *RHSArg);
5442   FunctionArgList Args;
5443   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5444                            ImplicitParamDecl::Other);
5445   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5446                            ImplicitParamDecl::Other);
5447   Args.push_back(&LHSArg);
5448   Args.push_back(&RHSArg);
5449   const auto &CGFI =
5450       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5451   std::string Name = getName({"omp", "reduction", "reduction_func"});
5452   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5453                                     llvm::GlobalValue::InternalLinkage, Name,
5454                                     &CGM.getModule());
5455   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5456   Fn->setDoesNotRecurse();
5457   CodeGenFunction CGF(CGM);
5458   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5459 
5460   // Dst = (void*[n])(LHSArg);
5461   // Src = (void*[n])(RHSArg);
5462   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5463       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5464       ArgsType), CGF.getPointerAlign());
5465   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5466       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5467       ArgsType), CGF.getPointerAlign());
5468 
5469   //  ...
5470   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5471   //  ...
5472   CodeGenFunction::OMPPrivateScope Scope(CGF);
5473   auto IPriv = Privates.begin();
5474   unsigned Idx = 0;
5475   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5476     const auto *RHSVar =
5477         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5478     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5479       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5480     });
5481     const auto *LHSVar =
5482         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5483     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5484       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5485     });
5486     QualType PrivTy = (*IPriv)->getType();
5487     if (PrivTy->isVariablyModifiedType()) {
5488       // Get array size and emit VLA type.
5489       ++Idx;
5490       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5491       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5492       const VariableArrayType *VLA =
5493           CGF.getContext().getAsVariableArrayType(PrivTy);
5494       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5495       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5496           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5497       CGF.EmitVariablyModifiedType(PrivTy);
5498     }
5499   }
5500   Scope.Privatize();
5501   IPriv = Privates.begin();
5502   auto ILHS = LHSExprs.begin();
5503   auto IRHS = RHSExprs.begin();
5504   for (const Expr *E : ReductionOps) {
5505     if ((*IPriv)->getType()->isArrayType()) {
5506       // Emit reduction for array section.
5507       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5508       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5509       EmitOMPAggregateReduction(
5510           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5511           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5512             emitReductionCombiner(CGF, E);
5513           });
5514     } else {
5515       // Emit reduction for array subscript or single variable.
5516       emitReductionCombiner(CGF, E);
5517     }
5518     ++IPriv;
5519     ++ILHS;
5520     ++IRHS;
5521   }
5522   Scope.ForceCleanup();
5523   CGF.FinishFunction();
5524   return Fn;
5525 }
5526 
5527 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5528                                                   const Expr *ReductionOp,
5529                                                   const Expr *PrivateRef,
5530                                                   const DeclRefExpr *LHS,
5531                                                   const DeclRefExpr *RHS) {
5532   if (PrivateRef->getType()->isArrayType()) {
5533     // Emit reduction for array section.
5534     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5535     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5536     EmitOMPAggregateReduction(
5537         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5538         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5539           emitReductionCombiner(CGF, ReductionOp);
5540         });
5541   } else {
5542     // Emit reduction for array subscript or single variable.
5543     emitReductionCombiner(CGF, ReductionOp);
5544   }
5545 }
5546 
5547 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5548                                     ArrayRef<const Expr *> Privates,
5549                                     ArrayRef<const Expr *> LHSExprs,
5550                                     ArrayRef<const Expr *> RHSExprs,
5551                                     ArrayRef<const Expr *> ReductionOps,
5552                                     ReductionOptionsTy Options) {
5553   if (!CGF.HaveInsertPoint())
5554     return;
5555 
5556   bool WithNowait = Options.WithNowait;
5557   bool SimpleReduction = Options.SimpleReduction;
5558 
5559   // Next code should be emitted for reduction:
5560   //
5561   // static kmp_critical_name lock = { 0 };
5562   //
5563   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5564   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5565   //  ...
5566   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5567   //  *(Type<n>-1*)rhs[<n>-1]);
5568   // }
5569   //
5570   // ...
5571   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5572   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5573   // RedList, reduce_func, &<lock>)) {
5574   // case 1:
5575   //  ...
5576   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5577   //  ...
5578   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5579   // break;
5580   // case 2:
5581   //  ...
5582   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5583   //  ...
5584   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5585   // break;
5586   // default:;
5587   // }
5588   //
5589   // if SimpleReduction is true, only the next code is generated:
5590   //  ...
5591   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5592   //  ...
5593 
5594   ASTContext &C = CGM.getContext();
5595 
5596   if (SimpleReduction) {
5597     CodeGenFunction::RunCleanupsScope Scope(CGF);
5598     auto IPriv = Privates.begin();
5599     auto ILHS = LHSExprs.begin();
5600     auto IRHS = RHSExprs.begin();
5601     for (const Expr *E : ReductionOps) {
5602       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5603                                   cast<DeclRefExpr>(*IRHS));
5604       ++IPriv;
5605       ++ILHS;
5606       ++IRHS;
5607     }
5608     return;
5609   }
5610 
5611   // 1. Build a list of reduction variables.
5612   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5613   auto Size = RHSExprs.size();
5614   for (const Expr *E : Privates) {
5615     if (E->getType()->isVariablyModifiedType())
5616       // Reserve place for array size.
5617       ++Size;
5618   }
5619   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5620   QualType ReductionArrayTy =
5621       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5622                              /*IndexTypeQuals=*/0);
5623   Address ReductionList =
5624       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5625   auto IPriv = Privates.begin();
5626   unsigned Idx = 0;
5627   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5628     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5629     CGF.Builder.CreateStore(
5630         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5631             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5632         Elem);
5633     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5634       // Store array size.
5635       ++Idx;
5636       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5637       llvm::Value *Size = CGF.Builder.CreateIntCast(
5638           CGF.getVLASize(
5639                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5640               .NumElts,
5641           CGF.SizeTy, /*isSigned=*/false);
5642       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5643                               Elem);
5644     }
5645   }
5646 
5647   // 2. Emit reduce_func().
5648   llvm::Function *ReductionFn = emitReductionFunction(
5649       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5650       LHSExprs, RHSExprs, ReductionOps);
5651 
5652   // 3. Create static kmp_critical_name lock = { 0 };
5653   std::string Name = getName({"reduction"});
5654   llvm::Value *Lock = getCriticalRegionLock(Name);
5655 
5656   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5657   // RedList, reduce_func, &<lock>);
5658   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5659   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5660   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5661   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5662       ReductionList.getPointer(), CGF.VoidPtrTy);
5663   llvm::Value *Args[] = {
5664       IdentTLoc,                             // ident_t *<loc>
5665       ThreadId,                              // i32 <gtid>
5666       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5667       ReductionArrayTySize,                  // size_type sizeof(RedList)
5668       RL,                                    // void *RedList
5669       ReductionFn, // void (*) (void *, void *) <reduce_func>
5670       Lock         // kmp_critical_name *&<lock>
5671   };
5672   llvm::Value *Res = CGF.EmitRuntimeCall(
5673       OMPBuilder.getOrCreateRuntimeFunction(
5674           CGM.getModule(),
5675           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5676       Args);
5677 
5678   // 5. Build switch(res)
5679   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5680   llvm::SwitchInst *SwInst =
5681       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5682 
5683   // 6. Build case 1:
5684   //  ...
5685   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5686   //  ...
5687   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5688   // break;
5689   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5690   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5691   CGF.EmitBlock(Case1BB);
5692 
5693   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5694   llvm::Value *EndArgs[] = {
5695       IdentTLoc, // ident_t *<loc>
5696       ThreadId,  // i32 <gtid>
5697       Lock       // kmp_critical_name *&<lock>
5698   };
5699   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5700                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5701     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5702     auto IPriv = Privates.begin();
5703     auto ILHS = LHSExprs.begin();
5704     auto IRHS = RHSExprs.begin();
5705     for (const Expr *E : ReductionOps) {
5706       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5707                                      cast<DeclRefExpr>(*IRHS));
5708       ++IPriv;
5709       ++ILHS;
5710       ++IRHS;
5711     }
5712   };
5713   RegionCodeGenTy RCG(CodeGen);
5714   CommonActionTy Action(
5715       nullptr, llvm::None,
5716       OMPBuilder.getOrCreateRuntimeFunction(
5717           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5718                                       : OMPRTL___kmpc_end_reduce),
5719       EndArgs);
5720   RCG.setAction(Action);
5721   RCG(CGF);
5722 
5723   CGF.EmitBranch(DefaultBB);
5724 
5725   // 7. Build case 2:
5726   //  ...
5727   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5728   //  ...
5729   // break;
5730   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5731   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5732   CGF.EmitBlock(Case2BB);
5733 
5734   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5735                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5736     auto ILHS = LHSExprs.begin();
5737     auto IRHS = RHSExprs.begin();
5738     auto IPriv = Privates.begin();
5739     for (const Expr *E : ReductionOps) {
5740       const Expr *XExpr = nullptr;
5741       const Expr *EExpr = nullptr;
5742       const Expr *UpExpr = nullptr;
5743       BinaryOperatorKind BO = BO_Comma;
5744       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5745         if (BO->getOpcode() == BO_Assign) {
5746           XExpr = BO->getLHS();
5747           UpExpr = BO->getRHS();
5748         }
5749       }
5750       // Try to emit update expression as a simple atomic.
5751       const Expr *RHSExpr = UpExpr;
5752       if (RHSExpr) {
5753         // Analyze RHS part of the whole expression.
5754         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5755                 RHSExpr->IgnoreParenImpCasts())) {
5756           // If this is a conditional operator, analyze its condition for
5757           // min/max reduction operator.
5758           RHSExpr = ACO->getCond();
5759         }
5760         if (const auto *BORHS =
5761                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5762           EExpr = BORHS->getRHS();
5763           BO = BORHS->getOpcode();
5764         }
5765       }
5766       if (XExpr) {
5767         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5768         auto &&AtomicRedGen = [BO, VD,
5769                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5770                                     const Expr *EExpr, const Expr *UpExpr) {
5771           LValue X = CGF.EmitLValue(XExpr);
5772           RValue E;
5773           if (EExpr)
5774             E = CGF.EmitAnyExpr(EExpr);
5775           CGF.EmitOMPAtomicSimpleUpdateExpr(
5776               X, E, BO, /*IsXLHSInRHSPart=*/true,
5777               llvm::AtomicOrdering::Monotonic, Loc,
5778               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5779                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5780                 PrivateScope.addPrivate(
5781                     VD, [&CGF, VD, XRValue, Loc]() {
5782                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5783                       CGF.emitOMPSimpleStore(
5784                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5785                           VD->getType().getNonReferenceType(), Loc);
5786                       return LHSTemp;
5787                     });
5788                 (void)PrivateScope.Privatize();
5789                 return CGF.EmitAnyExpr(UpExpr);
5790               });
5791         };
5792         if ((*IPriv)->getType()->isArrayType()) {
5793           // Emit atomic reduction for array section.
5794           const auto *RHSVar =
5795               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5796           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5797                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5798         } else {
5799           // Emit atomic reduction for array subscript or single variable.
5800           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5801         }
5802       } else {
5803         // Emit as a critical region.
5804         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5805                                            const Expr *, const Expr *) {
5806           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5807           std::string Name = RT.getName({"atomic_reduction"});
5808           RT.emitCriticalRegion(
5809               CGF, Name,
5810               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5811                 Action.Enter(CGF);
5812                 emitReductionCombiner(CGF, E);
5813               },
5814               Loc);
5815         };
5816         if ((*IPriv)->getType()->isArrayType()) {
5817           const auto *LHSVar =
5818               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5819           const auto *RHSVar =
5820               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5821           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5822                                     CritRedGen);
5823         } else {
5824           CritRedGen(CGF, nullptr, nullptr, nullptr);
5825         }
5826       }
5827       ++ILHS;
5828       ++IRHS;
5829       ++IPriv;
5830     }
5831   };
5832   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5833   if (!WithNowait) {
5834     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5835     llvm::Value *EndArgs[] = {
5836         IdentTLoc, // ident_t *<loc>
5837         ThreadId,  // i32 <gtid>
5838         Lock       // kmp_critical_name *&<lock>
5839     };
5840     CommonActionTy Action(nullptr, llvm::None,
5841                           OMPBuilder.getOrCreateRuntimeFunction(
5842                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5843                           EndArgs);
5844     AtomicRCG.setAction(Action);
5845     AtomicRCG(CGF);
5846   } else {
5847     AtomicRCG(CGF);
5848   }
5849 
5850   CGF.EmitBranch(DefaultBB);
5851   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5852 }
5853 
5854 /// Generates unique name for artificial threadprivate variables.
5855 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5856 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5857                                       const Expr *Ref) {
5858   SmallString<256> Buffer;
5859   llvm::raw_svector_ostream Out(Buffer);
5860   const clang::DeclRefExpr *DE;
5861   const VarDecl *D = ::getBaseDecl(Ref, DE);
5862   if (!D)
5863     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5864   D = D->getCanonicalDecl();
5865   std::string Name = CGM.getOpenMPRuntime().getName(
5866       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5867   Out << Prefix << Name << "_"
5868       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5869   return std::string(Out.str());
5870 }
5871 
5872 /// Emits reduction initializer function:
5873 /// \code
5874 /// void @.red_init(void* %arg, void* %orig) {
5875 /// %0 = bitcast void* %arg to <type>*
5876 /// store <type> <init>, <type>* %0
5877 /// ret void
5878 /// }
5879 /// \endcode
5880 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5881                                            SourceLocation Loc,
5882                                            ReductionCodeGen &RCG, unsigned N) {
5883   ASTContext &C = CGM.getContext();
5884   QualType VoidPtrTy = C.VoidPtrTy;
5885   VoidPtrTy.addRestrict();
5886   FunctionArgList Args;
5887   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5888                           ImplicitParamDecl::Other);
5889   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5890                               ImplicitParamDecl::Other);
5891   Args.emplace_back(&Param);
5892   Args.emplace_back(&ParamOrig);
5893   const auto &FnInfo =
5894       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5895   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5896   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5897   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5898                                     Name, &CGM.getModule());
5899   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5900   Fn->setDoesNotRecurse();
5901   CodeGenFunction CGF(CGM);
5902   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5903   Address PrivateAddr = CGF.EmitLoadOfPointer(
5904       CGF.GetAddrOfLocalVar(&Param),
5905       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5906   llvm::Value *Size = nullptr;
5907   // If the size of the reduction item is non-constant, load it from global
5908   // threadprivate variable.
5909   if (RCG.getSizes(N).second) {
5910     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5911         CGF, CGM.getContext().getSizeType(),
5912         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5913     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5914                                 CGM.getContext().getSizeType(), Loc);
5915   }
5916   RCG.emitAggregateType(CGF, N, Size);
5917   Address OrigAddr = Address::invalid();
5918   // If initializer uses initializer from declare reduction construct, emit a
5919   // pointer to the address of the original reduction item (reuired by reduction
5920   // initializer)
5921   if (RCG.usesReductionInitializer(N)) {
5922     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5923     OrigAddr = CGF.EmitLoadOfPointer(
5924         SharedAddr,
5925         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5926   }
5927   // Emit the initializer:
5928   // %0 = bitcast void* %arg to <type>*
5929   // store <type> <init>, <type>* %0
5930   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5931                          [](CodeGenFunction &) { return false; });
5932   CGF.FinishFunction();
5933   return Fn;
5934 }
5935 
5936 /// Emits reduction combiner function:
5937 /// \code
5938 /// void @.red_comb(void* %arg0, void* %arg1) {
5939 /// %lhs = bitcast void* %arg0 to <type>*
5940 /// %rhs = bitcast void* %arg1 to <type>*
5941 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5942 /// store <type> %2, <type>* %lhs
5943 /// ret void
5944 /// }
5945 /// \endcode
5946 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5947                                            SourceLocation Loc,
5948                                            ReductionCodeGen &RCG, unsigned N,
5949                                            const Expr *ReductionOp,
5950                                            const Expr *LHS, const Expr *RHS,
5951                                            const Expr *PrivateRef) {
5952   ASTContext &C = CGM.getContext();
5953   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5954   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5955   FunctionArgList Args;
5956   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5957                                C.VoidPtrTy, ImplicitParamDecl::Other);
5958   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5959                             ImplicitParamDecl::Other);
5960   Args.emplace_back(&ParamInOut);
5961   Args.emplace_back(&ParamIn);
5962   const auto &FnInfo =
5963       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5964   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5965   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5966   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5967                                     Name, &CGM.getModule());
5968   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5969   Fn->setDoesNotRecurse();
5970   CodeGenFunction CGF(CGM);
5971   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5972   llvm::Value *Size = nullptr;
5973   // If the size of the reduction item is non-constant, load it from global
5974   // threadprivate variable.
5975   if (RCG.getSizes(N).second) {
5976     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5977         CGF, CGM.getContext().getSizeType(),
5978         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5979     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5980                                 CGM.getContext().getSizeType(), Loc);
5981   }
5982   RCG.emitAggregateType(CGF, N, Size);
5983   // Remap lhs and rhs variables to the addresses of the function arguments.
5984   // %lhs = bitcast void* %arg0 to <type>*
5985   // %rhs = bitcast void* %arg1 to <type>*
5986   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5987   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5988     // Pull out the pointer to the variable.
5989     Address PtrAddr = CGF.EmitLoadOfPointer(
5990         CGF.GetAddrOfLocalVar(&ParamInOut),
5991         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5992     return CGF.Builder.CreateElementBitCast(
5993         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5994   });
5995   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5996     // Pull out the pointer to the variable.
5997     Address PtrAddr = CGF.EmitLoadOfPointer(
5998         CGF.GetAddrOfLocalVar(&ParamIn),
5999         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6000     return CGF.Builder.CreateElementBitCast(
6001         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6002   });
6003   PrivateScope.Privatize();
6004   // Emit the combiner body:
6005   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6006   // store <type> %2, <type>* %lhs
6007   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6008       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6009       cast<DeclRefExpr>(RHS));
6010   CGF.FinishFunction();
6011   return Fn;
6012 }
6013 
6014 /// Emits reduction finalizer function:
6015 /// \code
6016 /// void @.red_fini(void* %arg) {
6017 /// %0 = bitcast void* %arg to <type>*
6018 /// <destroy>(<type>* %0)
6019 /// ret void
6020 /// }
6021 /// \endcode
6022 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6023                                            SourceLocation Loc,
6024                                            ReductionCodeGen &RCG, unsigned N) {
6025   if (!RCG.needCleanups(N))
6026     return nullptr;
6027   ASTContext &C = CGM.getContext();
6028   FunctionArgList Args;
6029   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6030                           ImplicitParamDecl::Other);
6031   Args.emplace_back(&Param);
6032   const auto &FnInfo =
6033       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6034   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6035   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6036   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6037                                     Name, &CGM.getModule());
6038   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6039   Fn->setDoesNotRecurse();
6040   CodeGenFunction CGF(CGM);
6041   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6042   Address PrivateAddr = CGF.EmitLoadOfPointer(
6043       CGF.GetAddrOfLocalVar(&Param),
6044       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6045   llvm::Value *Size = nullptr;
6046   // If the size of the reduction item is non-constant, load it from global
6047   // threadprivate variable.
6048   if (RCG.getSizes(N).second) {
6049     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6050         CGF, CGM.getContext().getSizeType(),
6051         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6052     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6053                                 CGM.getContext().getSizeType(), Loc);
6054   }
6055   RCG.emitAggregateType(CGF, N, Size);
6056   // Emit the finalizer body:
6057   // <destroy>(<type>* %0)
6058   RCG.emitCleanups(CGF, N, PrivateAddr);
6059   CGF.FinishFunction(Loc);
6060   return Fn;
6061 }
6062 
6063 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6064     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6065     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6066   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6067     return nullptr;
6068 
6069   // Build typedef struct:
6070   // kmp_taskred_input {
6071   //   void *reduce_shar; // shared reduction item
6072   //   void *reduce_orig; // original reduction item used for initialization
6073   //   size_t reduce_size; // size of data item
6074   //   void *reduce_init; // data initialization routine
6075   //   void *reduce_fini; // data finalization routine
6076   //   void *reduce_comb; // data combiner routine
6077   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6078   // } kmp_taskred_input_t;
6079   ASTContext &C = CGM.getContext();
6080   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6081   RD->startDefinition();
6082   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6084   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6085   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6086   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6087   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6088   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6089       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6090   RD->completeDefinition();
6091   QualType RDType = C.getRecordType(RD);
6092   unsigned Size = Data.ReductionVars.size();
6093   llvm::APInt ArraySize(/*numBits=*/64, Size);
6094   QualType ArrayRDType = C.getConstantArrayType(
6095       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6096   // kmp_task_red_input_t .rd_input.[Size];
6097   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6098   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6099                        Data.ReductionCopies, Data.ReductionOps);
6100   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6101     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6102     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6103                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6104     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6105         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6106         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6107         ".rd_input.gep.");
6108     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6109     // ElemLVal.reduce_shar = &Shareds[Cnt];
6110     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6111     RCG.emitSharedOrigLValue(CGF, Cnt);
6112     llvm::Value *CastedShared =
6113         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6114     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6115     // ElemLVal.reduce_orig = &Origs[Cnt];
6116     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6117     llvm::Value *CastedOrig =
6118         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6119     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6120     RCG.emitAggregateType(CGF, Cnt);
6121     llvm::Value *SizeValInChars;
6122     llvm::Value *SizeVal;
6123     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6124     // We use delayed creation/initialization for VLAs and array sections. It is
6125     // required because runtime does not provide the way to pass the sizes of
6126     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6127     // threadprivate global variables are used to store these values and use
6128     // them in the functions.
6129     bool DelayedCreation = !!SizeVal;
6130     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6131                                                /*isSigned=*/false);
6132     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6133     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6134     // ElemLVal.reduce_init = init;
6135     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6136     llvm::Value *InitAddr =
6137         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6138     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6139     // ElemLVal.reduce_fini = fini;
6140     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6141     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6142     llvm::Value *FiniAddr = Fini
6143                                 ? CGF.EmitCastToVoidPtr(Fini)
6144                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6145     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6146     // ElemLVal.reduce_comb = comb;
6147     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6148     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6149         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6150         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6151     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6152     // ElemLVal.flags = 0;
6153     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6154     if (DelayedCreation) {
6155       CGF.EmitStoreOfScalar(
6156           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6157           FlagsLVal);
6158     } else
6159       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6160                                  FlagsLVal.getType());
6161   }
6162   if (Data.IsReductionWithTaskMod) {
6163     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6164     // is_ws, int num, void *data);
6165     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6166     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6167                                                   CGM.IntTy, /*isSigned=*/true);
6168     llvm::Value *Args[] = {
6169         IdentTLoc, GTid,
6170         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6171                                /*isSigned=*/true),
6172         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6173         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6174             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6175     return CGF.EmitRuntimeCall(
6176         OMPBuilder.getOrCreateRuntimeFunction(
6177             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6178         Args);
6179   }
6180   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6181   llvm::Value *Args[] = {
6182       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6183                                 /*isSigned=*/true),
6184       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6185       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6186                                                       CGM.VoidPtrTy)};
6187   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6188                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6189                              Args);
6190 }
6191 
6192 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6193                                             SourceLocation Loc,
6194                                             bool IsWorksharingReduction) {
6195   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6196   // is_ws, int num, void *data);
6197   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6198   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6199                                                 CGM.IntTy, /*isSigned=*/true);
6200   llvm::Value *Args[] = {IdentTLoc, GTid,
6201                          llvm::ConstantInt::get(CGM.IntTy,
6202                                                 IsWorksharingReduction ? 1 : 0,
6203                                                 /*isSigned=*/true)};
6204   (void)CGF.EmitRuntimeCall(
6205       OMPBuilder.getOrCreateRuntimeFunction(
6206           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6207       Args);
6208 }
6209 
6210 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6211                                               SourceLocation Loc,
6212                                               ReductionCodeGen &RCG,
6213                                               unsigned N) {
6214   auto Sizes = RCG.getSizes(N);
6215   // Emit threadprivate global variable if the type is non-constant
6216   // (Sizes.second = nullptr).
6217   if (Sizes.second) {
6218     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6219                                                      /*isSigned=*/false);
6220     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6221         CGF, CGM.getContext().getSizeType(),
6222         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6223     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6224   }
6225 }
6226 
6227 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6228                                               SourceLocation Loc,
6229                                               llvm::Value *ReductionsPtr,
6230                                               LValue SharedLVal) {
6231   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6232   // *d);
6233   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6234                                                    CGM.IntTy,
6235                                                    /*isSigned=*/true),
6236                          ReductionsPtr,
6237                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6238                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6239   return Address(
6240       CGF.EmitRuntimeCall(
6241           OMPBuilder.getOrCreateRuntimeFunction(
6242               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6243           Args),
6244       SharedLVal.getAlignment());
6245 }
6246 
6247 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6248                                        const OMPTaskDataTy &Data) {
6249   if (!CGF.HaveInsertPoint())
6250     return;
6251 
6252   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6253     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6254     OMPBuilder.createTaskwait(CGF.Builder);
6255   } else {
6256     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6257     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6258     auto &M = CGM.getModule();
6259     Address DependenciesArray = Address::invalid();
6260     llvm::Value *NumOfElements;
6261     std::tie(NumOfElements, DependenciesArray) =
6262         emitDependClause(CGF, Data.Dependences, Loc);
6263     llvm::Value *DepWaitTaskArgs[6];
6264     if (!Data.Dependences.empty()) {
6265       DepWaitTaskArgs[0] = UpLoc;
6266       DepWaitTaskArgs[1] = ThreadID;
6267       DepWaitTaskArgs[2] = NumOfElements;
6268       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6269       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6270       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6271 
6272       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6273 
6274       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6275       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6276       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6277       // is specified.
6278       CGF.EmitRuntimeCall(
6279           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6280           DepWaitTaskArgs);
6281 
6282     } else {
6283 
6284       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6285       // global_tid);
6286       llvm::Value *Args[] = {UpLoc, ThreadID};
6287       // Ignore return result until untied tasks are supported.
6288       CGF.EmitRuntimeCall(
6289           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6290           Args);
6291     }
6292   }
6293 
6294   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6295     Region->emitUntiedSwitch(CGF);
6296 }
6297 
6298 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6299                                            OpenMPDirectiveKind InnerKind,
6300                                            const RegionCodeGenTy &CodeGen,
6301                                            bool HasCancel) {
6302   if (!CGF.HaveInsertPoint())
6303     return;
6304   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6305                                  InnerKind != OMPD_critical &&
6306                                      InnerKind != OMPD_master &&
6307                                      InnerKind != OMPD_masked);
6308   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6309 }
6310 
6311 namespace {
6312 enum RTCancelKind {
6313   CancelNoreq = 0,
6314   CancelParallel = 1,
6315   CancelLoop = 2,
6316   CancelSections = 3,
6317   CancelTaskgroup = 4
6318 };
6319 } // anonymous namespace
6320 
6321 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6322   RTCancelKind CancelKind = CancelNoreq;
6323   if (CancelRegion == OMPD_parallel)
6324     CancelKind = CancelParallel;
6325   else if (CancelRegion == OMPD_for)
6326     CancelKind = CancelLoop;
6327   else if (CancelRegion == OMPD_sections)
6328     CancelKind = CancelSections;
6329   else {
6330     assert(CancelRegion == OMPD_taskgroup);
6331     CancelKind = CancelTaskgroup;
6332   }
6333   return CancelKind;
6334 }
6335 
6336 void CGOpenMPRuntime::emitCancellationPointCall(
6337     CodeGenFunction &CGF, SourceLocation Loc,
6338     OpenMPDirectiveKind CancelRegion) {
6339   if (!CGF.HaveInsertPoint())
6340     return;
6341   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6342   // global_tid, kmp_int32 cncl_kind);
6343   if (auto *OMPRegionInfo =
6344           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6345     // For 'cancellation point taskgroup', the task region info may not have a
6346     // cancel. This may instead happen in another adjacent task.
6347     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6348       llvm::Value *Args[] = {
6349           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6350           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6351       // Ignore return result until untied tasks are supported.
6352       llvm::Value *Result = CGF.EmitRuntimeCall(
6353           OMPBuilder.getOrCreateRuntimeFunction(
6354               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6355           Args);
6356       // if (__kmpc_cancellationpoint()) {
6357       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6358       //   exit from construct;
6359       // }
6360       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6361       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6362       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6363       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6364       CGF.EmitBlock(ExitBB);
6365       if (CancelRegion == OMPD_parallel)
6366         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6367       // exit from construct;
6368       CodeGenFunction::JumpDest CancelDest =
6369           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6370       CGF.EmitBranchThroughCleanup(CancelDest);
6371       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6372     }
6373   }
6374 }
6375 
6376 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6377                                      const Expr *IfCond,
6378                                      OpenMPDirectiveKind CancelRegion) {
6379   if (!CGF.HaveInsertPoint())
6380     return;
6381   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6382   // kmp_int32 cncl_kind);
6383   auto &M = CGM.getModule();
6384   if (auto *OMPRegionInfo =
6385           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6386     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6387                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6388       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6389       llvm::Value *Args[] = {
6390           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6391           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6392       // Ignore return result until untied tasks are supported.
6393       llvm::Value *Result = CGF.EmitRuntimeCall(
6394           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6395       // if (__kmpc_cancel()) {
6396       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6397       //   exit from construct;
6398       // }
6399       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6400       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6401       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6402       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6403       CGF.EmitBlock(ExitBB);
6404       if (CancelRegion == OMPD_parallel)
6405         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6406       // exit from construct;
6407       CodeGenFunction::JumpDest CancelDest =
6408           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6409       CGF.EmitBranchThroughCleanup(CancelDest);
6410       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6411     };
6412     if (IfCond) {
6413       emitIfClause(CGF, IfCond, ThenGen,
6414                    [](CodeGenFunction &, PrePostActionTy &) {});
6415     } else {
6416       RegionCodeGenTy ThenRCG(ThenGen);
6417       ThenRCG(CGF);
6418     }
6419   }
6420 }
6421 
6422 namespace {
6423 /// Cleanup action for uses_allocators support.
6424 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6425   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6426 
6427 public:
6428   OMPUsesAllocatorsActionTy(
6429       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6430       : Allocators(Allocators) {}
6431   void Enter(CodeGenFunction &CGF) override {
6432     if (!CGF.HaveInsertPoint())
6433       return;
6434     for (const auto &AllocatorData : Allocators) {
6435       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6436           CGF, AllocatorData.first, AllocatorData.second);
6437     }
6438   }
6439   void Exit(CodeGenFunction &CGF) override {
6440     if (!CGF.HaveInsertPoint())
6441       return;
6442     for (const auto &AllocatorData : Allocators) {
6443       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6444                                                         AllocatorData.first);
6445     }
6446   }
6447 };
6448 } // namespace
6449 
6450 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6451     const OMPExecutableDirective &D, StringRef ParentName,
6452     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6453     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6454   assert(!ParentName.empty() && "Invalid target region parent name!");
6455   HasEmittedTargetRegion = true;
6456   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6457   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6458     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6459       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6460       if (!D.AllocatorTraits)
6461         continue;
6462       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6463     }
6464   }
6465   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6466   CodeGen.setAction(UsesAllocatorAction);
6467   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6468                                    IsOffloadEntry, CodeGen);
6469 }
6470 
6471 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6472                                              const Expr *Allocator,
6473                                              const Expr *AllocatorTraits) {
6474   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6475   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6476   // Use default memspace handle.
6477   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6478   llvm::Value *NumTraits = llvm::ConstantInt::get(
6479       CGF.IntTy, cast<ConstantArrayType>(
6480                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6481                      ->getSize()
6482                      .getLimitedValue());
6483   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6484   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6485       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6486   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6487                                            AllocatorTraitsLVal.getBaseInfo(),
6488                                            AllocatorTraitsLVal.getTBAAInfo());
6489   llvm::Value *Traits =
6490       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6491 
6492   llvm::Value *AllocatorVal =
6493       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6494                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6495                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6496   // Store to allocator.
6497   CGF.EmitVarDecl(*cast<VarDecl>(
6498       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6499   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6500   AllocatorVal =
6501       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6502                                Allocator->getType(), Allocator->getExprLoc());
6503   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6504 }
6505 
6506 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6507                                              const Expr *Allocator) {
6508   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6509   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6510   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6511   llvm::Value *AllocatorVal =
6512       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6513   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6514                                           CGF.getContext().VoidPtrTy,
6515                                           Allocator->getExprLoc());
6516   (void)CGF.EmitRuntimeCall(
6517       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6518                                             OMPRTL___kmpc_destroy_allocator),
6519       {ThreadId, AllocatorVal});
6520 }
6521 
6522 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6523     const OMPExecutableDirective &D, StringRef ParentName,
6524     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6525     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6526   // Create a unique name for the entry function using the source location
6527   // information of the current target region. The name will be something like:
6528   //
6529   // __omp_offloading_DD_FFFF_PP_lBB
6530   //
6531   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6532   // mangled name of the function that encloses the target region and BB is the
6533   // line number of the target region.
6534 
6535   unsigned DeviceID;
6536   unsigned FileID;
6537   unsigned Line;
6538   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6539                            Line);
6540   SmallString<64> EntryFnName;
6541   {
6542     llvm::raw_svector_ostream OS(EntryFnName);
6543     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6544        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6545   }
6546 
6547   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6548 
6549   CodeGenFunction CGF(CGM, true);
6550   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6551   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6552 
6553   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6554 
6555   // If this target outline function is not an offload entry, we don't need to
6556   // register it.
6557   if (!IsOffloadEntry)
6558     return;
6559 
6560   // The target region ID is used by the runtime library to identify the current
6561   // target region, so it only has to be unique and not necessarily point to
6562   // anything. It could be the pointer to the outlined function that implements
6563   // the target region, but we aren't using that so that the compiler doesn't
6564   // need to keep that, and could therefore inline the host function if proven
6565   // worthwhile during optimization. In the other hand, if emitting code for the
6566   // device, the ID has to be the function address so that it can retrieved from
6567   // the offloading entry and launched by the runtime library. We also mark the
6568   // outlined function to have external linkage in case we are emitting code for
6569   // the device, because these functions will be entry points to the device.
6570 
6571   if (CGM.getLangOpts().OpenMPIsDevice) {
6572     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6573     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6574     OutlinedFn->setDSOLocal(false);
6575     if (CGM.getTriple().isAMDGCN())
6576       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6577   } else {
6578     std::string Name = getName({EntryFnName, "region_id"});
6579     OutlinedFnID = new llvm::GlobalVariable(
6580         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6581         llvm::GlobalValue::WeakAnyLinkage,
6582         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6583   }
6584 
6585   // Register the information for the entry associated with this target region.
6586   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6587       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6588       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6589 
6590   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6591   int32_t DefaultValTeams = -1;
6592   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6593   if (DefaultValTeams > 0) {
6594     OutlinedFn->addFnAttr("omp_target_num_teams",
6595                           std::to_string(DefaultValTeams));
6596   }
6597   int32_t DefaultValThreads = -1;
6598   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6599   if (DefaultValThreads > 0) {
6600     OutlinedFn->addFnAttr("omp_target_thread_limit",
6601                           std::to_string(DefaultValThreads));
6602   }
6603 
6604   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6605 }
6606 
6607 /// Checks if the expression is constant or does not have non-trivial function
6608 /// calls.
6609 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6610   // We can skip constant expressions.
6611   // We can skip expressions with trivial calls or simple expressions.
6612   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6613           !E->hasNonTrivialCall(Ctx)) &&
6614          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6615 }
6616 
6617 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6618                                                     const Stmt *Body) {
6619   const Stmt *Child = Body->IgnoreContainers();
6620   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6621     Child = nullptr;
6622     for (const Stmt *S : C->body()) {
6623       if (const auto *E = dyn_cast<Expr>(S)) {
6624         if (isTrivial(Ctx, E))
6625           continue;
6626       }
6627       // Some of the statements can be ignored.
6628       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6629           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6630         continue;
6631       // Analyze declarations.
6632       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6633         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6634               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6635                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6636                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6637                   isa<UsingDirectiveDecl>(D) ||
6638                   isa<OMPDeclareReductionDecl>(D) ||
6639                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6640                 return true;
6641               const auto *VD = dyn_cast<VarDecl>(D);
6642               if (!VD)
6643                 return false;
6644               return VD->hasGlobalStorage() || !VD->isUsed();
6645             }))
6646           continue;
6647       }
6648       // Found multiple children - cannot get the one child only.
6649       if (Child)
6650         return nullptr;
6651       Child = S;
6652     }
6653     if (Child)
6654       Child = Child->IgnoreContainers();
6655   }
6656   return Child;
6657 }
6658 
6659 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6660     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6661     int32_t &DefaultVal) {
6662 
6663   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6664   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6665          "Expected target-based executable directive.");
6666   switch (DirectiveKind) {
6667   case OMPD_target: {
6668     const auto *CS = D.getInnermostCapturedStmt();
6669     const auto *Body =
6670         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6671     const Stmt *ChildStmt =
6672         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6673     if (const auto *NestedDir =
6674             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6675       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6676         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6677           const Expr *NumTeams =
6678               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6679           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6680             if (auto Constant =
6681                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6682               DefaultVal = Constant->getExtValue();
6683           return NumTeams;
6684         }
6685         DefaultVal = 0;
6686         return nullptr;
6687       }
6688       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6689           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6690         DefaultVal = 1;
6691         return nullptr;
6692       }
6693       DefaultVal = 1;
6694       return nullptr;
6695     }
6696     // A value of -1 is used to check if we need to emit no teams region
6697     DefaultVal = -1;
6698     return nullptr;
6699   }
6700   case OMPD_target_teams:
6701   case OMPD_target_teams_distribute:
6702   case OMPD_target_teams_distribute_simd:
6703   case OMPD_target_teams_distribute_parallel_for:
6704   case OMPD_target_teams_distribute_parallel_for_simd: {
6705     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6706       const Expr *NumTeams =
6707           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6708       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6709         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6710           DefaultVal = Constant->getExtValue();
6711       return NumTeams;
6712     }
6713     DefaultVal = 0;
6714     return nullptr;
6715   }
6716   case OMPD_target_parallel:
6717   case OMPD_target_parallel_for:
6718   case OMPD_target_parallel_for_simd:
6719   case OMPD_target_simd:
6720     DefaultVal = 1;
6721     return nullptr;
6722   case OMPD_parallel:
6723   case OMPD_for:
6724   case OMPD_parallel_for:
6725   case OMPD_parallel_master:
6726   case OMPD_parallel_sections:
6727   case OMPD_for_simd:
6728   case OMPD_parallel_for_simd:
6729   case OMPD_cancel:
6730   case OMPD_cancellation_point:
6731   case OMPD_ordered:
6732   case OMPD_threadprivate:
6733   case OMPD_allocate:
6734   case OMPD_task:
6735   case OMPD_simd:
6736   case OMPD_tile:
6737   case OMPD_unroll:
6738   case OMPD_sections:
6739   case OMPD_section:
6740   case OMPD_single:
6741   case OMPD_master:
6742   case OMPD_critical:
6743   case OMPD_taskyield:
6744   case OMPD_barrier:
6745   case OMPD_taskwait:
6746   case OMPD_taskgroup:
6747   case OMPD_atomic:
6748   case OMPD_flush:
6749   case OMPD_depobj:
6750   case OMPD_scan:
6751   case OMPD_teams:
6752   case OMPD_target_data:
6753   case OMPD_target_exit_data:
6754   case OMPD_target_enter_data:
6755   case OMPD_distribute:
6756   case OMPD_distribute_simd:
6757   case OMPD_distribute_parallel_for:
6758   case OMPD_distribute_parallel_for_simd:
6759   case OMPD_teams_distribute:
6760   case OMPD_teams_distribute_simd:
6761   case OMPD_teams_distribute_parallel_for:
6762   case OMPD_teams_distribute_parallel_for_simd:
6763   case OMPD_target_update:
6764   case OMPD_declare_simd:
6765   case OMPD_declare_variant:
6766   case OMPD_begin_declare_variant:
6767   case OMPD_end_declare_variant:
6768   case OMPD_declare_target:
6769   case OMPD_end_declare_target:
6770   case OMPD_declare_reduction:
6771   case OMPD_declare_mapper:
6772   case OMPD_taskloop:
6773   case OMPD_taskloop_simd:
6774   case OMPD_master_taskloop:
6775   case OMPD_master_taskloop_simd:
6776   case OMPD_parallel_master_taskloop:
6777   case OMPD_parallel_master_taskloop_simd:
6778   case OMPD_requires:
6779   case OMPD_metadirective:
6780   case OMPD_unknown:
6781     break;
6782   default:
6783     break;
6784   }
6785   llvm_unreachable("Unexpected directive kind.");
6786 }
6787 
6788 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6789     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6790   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6791          "Clauses associated with the teams directive expected to be emitted "
6792          "only for the host!");
6793   CGBuilderTy &Bld = CGF.Builder;
6794   int32_t DefaultNT = -1;
6795   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6796   if (NumTeams != nullptr) {
6797     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6798 
6799     switch (DirectiveKind) {
6800     case OMPD_target: {
6801       const auto *CS = D.getInnermostCapturedStmt();
6802       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6803       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6804       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6805                                                   /*IgnoreResultAssign*/ true);
6806       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6807                              /*isSigned=*/true);
6808     }
6809     case OMPD_target_teams:
6810     case OMPD_target_teams_distribute:
6811     case OMPD_target_teams_distribute_simd:
6812     case OMPD_target_teams_distribute_parallel_for:
6813     case OMPD_target_teams_distribute_parallel_for_simd: {
6814       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6815       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6816                                                   /*IgnoreResultAssign*/ true);
6817       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6818                              /*isSigned=*/true);
6819     }
6820     default:
6821       break;
6822     }
6823   } else if (DefaultNT == -1) {
6824     return nullptr;
6825   }
6826 
6827   return Bld.getInt32(DefaultNT);
6828 }
6829 
6830 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6831                                   llvm::Value *DefaultThreadLimitVal) {
6832   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6833       CGF.getContext(), CS->getCapturedStmt());
6834   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6835     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6836       llvm::Value *NumThreads = nullptr;
6837       llvm::Value *CondVal = nullptr;
6838       // Handle if clause. If if clause present, the number of threads is
6839       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6840       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6841         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6842         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6843         const OMPIfClause *IfClause = nullptr;
6844         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6845           if (C->getNameModifier() == OMPD_unknown ||
6846               C->getNameModifier() == OMPD_parallel) {
6847             IfClause = C;
6848             break;
6849           }
6850         }
6851         if (IfClause) {
6852           const Expr *Cond = IfClause->getCondition();
6853           bool Result;
6854           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6855             if (!Result)
6856               return CGF.Builder.getInt32(1);
6857           } else {
6858             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6859             if (const auto *PreInit =
6860                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6861               for (const auto *I : PreInit->decls()) {
6862                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6863                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6864                 } else {
6865                   CodeGenFunction::AutoVarEmission Emission =
6866                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6867                   CGF.EmitAutoVarCleanups(Emission);
6868                 }
6869               }
6870             }
6871             CondVal = CGF.EvaluateExprAsBool(Cond);
6872           }
6873         }
6874       }
6875       // Check the value of num_threads clause iff if clause was not specified
6876       // or is not evaluated to false.
6877       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6878         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6879         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6880         const auto *NumThreadsClause =
6881             Dir->getSingleClause<OMPNumThreadsClause>();
6882         CodeGenFunction::LexicalScope Scope(
6883             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6884         if (const auto *PreInit =
6885                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6886           for (const auto *I : PreInit->decls()) {
6887             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6888               CGF.EmitVarDecl(cast<VarDecl>(*I));
6889             } else {
6890               CodeGenFunction::AutoVarEmission Emission =
6891                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6892               CGF.EmitAutoVarCleanups(Emission);
6893             }
6894           }
6895         }
6896         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6897         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6898                                                /*isSigned=*/false);
6899         if (DefaultThreadLimitVal)
6900           NumThreads = CGF.Builder.CreateSelect(
6901               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6902               DefaultThreadLimitVal, NumThreads);
6903       } else {
6904         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6905                                            : CGF.Builder.getInt32(0);
6906       }
6907       // Process condition of the if clause.
6908       if (CondVal) {
6909         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6910                                               CGF.Builder.getInt32(1));
6911       }
6912       return NumThreads;
6913     }
6914     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6915       return CGF.Builder.getInt32(1);
6916     return DefaultThreadLimitVal;
6917   }
6918   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6919                                : CGF.Builder.getInt32(0);
6920 }
6921 
6922 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6923     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6924     int32_t &DefaultVal) {
6925   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6926   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6927          "Expected target-based executable directive.");
6928 
6929   switch (DirectiveKind) {
6930   case OMPD_target:
6931     // Teams have no clause thread_limit
6932     return nullptr;
6933   case OMPD_target_teams:
6934   case OMPD_target_teams_distribute:
6935     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6936       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6937       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6938       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6939         if (auto Constant =
6940                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6941           DefaultVal = Constant->getExtValue();
6942       return ThreadLimit;
6943     }
6944     return nullptr;
6945   case OMPD_target_parallel:
6946   case OMPD_target_parallel_for:
6947   case OMPD_target_parallel_for_simd:
6948   case OMPD_target_teams_distribute_parallel_for:
6949   case OMPD_target_teams_distribute_parallel_for_simd: {
6950     Expr *ThreadLimit = nullptr;
6951     Expr *NumThreads = nullptr;
6952     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6953       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6954       ThreadLimit = ThreadLimitClause->getThreadLimit();
6955       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6956         if (auto Constant =
6957                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6958           DefaultVal = Constant->getExtValue();
6959     }
6960     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6961       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6962       NumThreads = NumThreadsClause->getNumThreads();
6963       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6964         if (auto Constant =
6965                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6966           if (Constant->getExtValue() < DefaultVal) {
6967             DefaultVal = Constant->getExtValue();
6968             ThreadLimit = NumThreads;
6969           }
6970         }
6971       }
6972     }
6973     return ThreadLimit;
6974   }
6975   case OMPD_target_teams_distribute_simd:
6976   case OMPD_target_simd:
6977     DefaultVal = 1;
6978     return nullptr;
6979   case OMPD_parallel:
6980   case OMPD_for:
6981   case OMPD_parallel_for:
6982   case OMPD_parallel_master:
6983   case OMPD_parallel_sections:
6984   case OMPD_for_simd:
6985   case OMPD_parallel_for_simd:
6986   case OMPD_cancel:
6987   case OMPD_cancellation_point:
6988   case OMPD_ordered:
6989   case OMPD_threadprivate:
6990   case OMPD_allocate:
6991   case OMPD_task:
6992   case OMPD_simd:
6993   case OMPD_tile:
6994   case OMPD_unroll:
6995   case OMPD_sections:
6996   case OMPD_section:
6997   case OMPD_single:
6998   case OMPD_master:
6999   case OMPD_critical:
7000   case OMPD_taskyield:
7001   case OMPD_barrier:
7002   case OMPD_taskwait:
7003   case OMPD_taskgroup:
7004   case OMPD_atomic:
7005   case OMPD_flush:
7006   case OMPD_depobj:
7007   case OMPD_scan:
7008   case OMPD_teams:
7009   case OMPD_target_data:
7010   case OMPD_target_exit_data:
7011   case OMPD_target_enter_data:
7012   case OMPD_distribute:
7013   case OMPD_distribute_simd:
7014   case OMPD_distribute_parallel_for:
7015   case OMPD_distribute_parallel_for_simd:
7016   case OMPD_teams_distribute:
7017   case OMPD_teams_distribute_simd:
7018   case OMPD_teams_distribute_parallel_for:
7019   case OMPD_teams_distribute_parallel_for_simd:
7020   case OMPD_target_update:
7021   case OMPD_declare_simd:
7022   case OMPD_declare_variant:
7023   case OMPD_begin_declare_variant:
7024   case OMPD_end_declare_variant:
7025   case OMPD_declare_target:
7026   case OMPD_end_declare_target:
7027   case OMPD_declare_reduction:
7028   case OMPD_declare_mapper:
7029   case OMPD_taskloop:
7030   case OMPD_taskloop_simd:
7031   case OMPD_master_taskloop:
7032   case OMPD_master_taskloop_simd:
7033   case OMPD_parallel_master_taskloop:
7034   case OMPD_parallel_master_taskloop_simd:
7035   case OMPD_requires:
7036   case OMPD_unknown:
7037     break;
7038   default:
7039     break;
7040   }
7041   llvm_unreachable("Unsupported directive kind.");
7042 }
7043 
7044 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7045     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7046   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7047          "Clauses associated with the teams directive expected to be emitted "
7048          "only for the host!");
7049   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7050   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7051          "Expected target-based executable directive.");
7052   CGBuilderTy &Bld = CGF.Builder;
7053   llvm::Value *ThreadLimitVal = nullptr;
7054   llvm::Value *NumThreadsVal = nullptr;
7055   switch (DirectiveKind) {
7056   case OMPD_target: {
7057     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7058     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7059       return NumThreads;
7060     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7061         CGF.getContext(), CS->getCapturedStmt());
7062     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7063       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7064         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7065         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7066         const auto *ThreadLimitClause =
7067             Dir->getSingleClause<OMPThreadLimitClause>();
7068         CodeGenFunction::LexicalScope Scope(
7069             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7070         if (const auto *PreInit =
7071                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7072           for (const auto *I : PreInit->decls()) {
7073             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7074               CGF.EmitVarDecl(cast<VarDecl>(*I));
7075             } else {
7076               CodeGenFunction::AutoVarEmission Emission =
7077                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7078               CGF.EmitAutoVarCleanups(Emission);
7079             }
7080           }
7081         }
7082         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7083             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7084         ThreadLimitVal =
7085             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7086       }
7087       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7088           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7089         CS = Dir->getInnermostCapturedStmt();
7090         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7091             CGF.getContext(), CS->getCapturedStmt());
7092         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7093       }
7094       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7095           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7096         CS = Dir->getInnermostCapturedStmt();
7097         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7098           return NumThreads;
7099       }
7100       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7101         return Bld.getInt32(1);
7102     }
7103     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7104   }
7105   case OMPD_target_teams: {
7106     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7107       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7108       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7109       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7110           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7111       ThreadLimitVal =
7112           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7113     }
7114     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7115     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7116       return NumThreads;
7117     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7118         CGF.getContext(), CS->getCapturedStmt());
7119     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7120       if (Dir->getDirectiveKind() == OMPD_distribute) {
7121         CS = Dir->getInnermostCapturedStmt();
7122         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7123           return NumThreads;
7124       }
7125     }
7126     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7127   }
7128   case OMPD_target_teams_distribute:
7129     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7130       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7131       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7132       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7133           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7134       ThreadLimitVal =
7135           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7136     }
7137     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7138   case OMPD_target_parallel:
7139   case OMPD_target_parallel_for:
7140   case OMPD_target_parallel_for_simd:
7141   case OMPD_target_teams_distribute_parallel_for:
7142   case OMPD_target_teams_distribute_parallel_for_simd: {
7143     llvm::Value *CondVal = nullptr;
7144     // Handle if clause. If if clause present, the number of threads is
7145     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7146     if (D.hasClausesOfKind<OMPIfClause>()) {
7147       const OMPIfClause *IfClause = nullptr;
7148       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7149         if (C->getNameModifier() == OMPD_unknown ||
7150             C->getNameModifier() == OMPD_parallel) {
7151           IfClause = C;
7152           break;
7153         }
7154       }
7155       if (IfClause) {
7156         const Expr *Cond = IfClause->getCondition();
7157         bool Result;
7158         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7159           if (!Result)
7160             return Bld.getInt32(1);
7161         } else {
7162           CodeGenFunction::RunCleanupsScope Scope(CGF);
7163           CondVal = CGF.EvaluateExprAsBool(Cond);
7164         }
7165       }
7166     }
7167     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7168       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7169       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7170       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7171           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7172       ThreadLimitVal =
7173           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7174     }
7175     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7176       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7177       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7178       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7179           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7180       NumThreadsVal =
7181           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7182       ThreadLimitVal = ThreadLimitVal
7183                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7184                                                                 ThreadLimitVal),
7185                                               NumThreadsVal, ThreadLimitVal)
7186                            : NumThreadsVal;
7187     }
7188     if (!ThreadLimitVal)
7189       ThreadLimitVal = Bld.getInt32(0);
7190     if (CondVal)
7191       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7192     return ThreadLimitVal;
7193   }
7194   case OMPD_target_teams_distribute_simd:
7195   case OMPD_target_simd:
7196     return Bld.getInt32(1);
7197   case OMPD_parallel:
7198   case OMPD_for:
7199   case OMPD_parallel_for:
7200   case OMPD_parallel_master:
7201   case OMPD_parallel_sections:
7202   case OMPD_for_simd:
7203   case OMPD_parallel_for_simd:
7204   case OMPD_cancel:
7205   case OMPD_cancellation_point:
7206   case OMPD_ordered:
7207   case OMPD_threadprivate:
7208   case OMPD_allocate:
7209   case OMPD_task:
7210   case OMPD_simd:
7211   case OMPD_tile:
7212   case OMPD_unroll:
7213   case OMPD_sections:
7214   case OMPD_section:
7215   case OMPD_single:
7216   case OMPD_master:
7217   case OMPD_critical:
7218   case OMPD_taskyield:
7219   case OMPD_barrier:
7220   case OMPD_taskwait:
7221   case OMPD_taskgroup:
7222   case OMPD_atomic:
7223   case OMPD_flush:
7224   case OMPD_depobj:
7225   case OMPD_scan:
7226   case OMPD_teams:
7227   case OMPD_target_data:
7228   case OMPD_target_exit_data:
7229   case OMPD_target_enter_data:
7230   case OMPD_distribute:
7231   case OMPD_distribute_simd:
7232   case OMPD_distribute_parallel_for:
7233   case OMPD_distribute_parallel_for_simd:
7234   case OMPD_teams_distribute:
7235   case OMPD_teams_distribute_simd:
7236   case OMPD_teams_distribute_parallel_for:
7237   case OMPD_teams_distribute_parallel_for_simd:
7238   case OMPD_target_update:
7239   case OMPD_declare_simd:
7240   case OMPD_declare_variant:
7241   case OMPD_begin_declare_variant:
7242   case OMPD_end_declare_variant:
7243   case OMPD_declare_target:
7244   case OMPD_end_declare_target:
7245   case OMPD_declare_reduction:
7246   case OMPD_declare_mapper:
7247   case OMPD_taskloop:
7248   case OMPD_taskloop_simd:
7249   case OMPD_master_taskloop:
7250   case OMPD_master_taskloop_simd:
7251   case OMPD_parallel_master_taskloop:
7252   case OMPD_parallel_master_taskloop_simd:
7253   case OMPD_requires:
7254   case OMPD_metadirective:
7255   case OMPD_unknown:
7256     break;
7257   default:
7258     break;
7259   }
7260   llvm_unreachable("Unsupported directive kind.");
7261 }
7262 
7263 namespace {
7264 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7265 
7266 // Utility to handle information from clauses associated with a given
7267 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7268 // It provides a convenient interface to obtain the information and generate
7269 // code for that information.
7270 class MappableExprsHandler {
7271 public:
7272   /// Values for bit flags used to specify the mapping type for
7273   /// offloading.
7274   enum OpenMPOffloadMappingFlags : uint64_t {
7275     /// No flags
7276     OMP_MAP_NONE = 0x0,
7277     /// Allocate memory on the device and move data from host to device.
7278     OMP_MAP_TO = 0x01,
7279     /// Allocate memory on the device and move data from device to host.
7280     OMP_MAP_FROM = 0x02,
7281     /// Always perform the requested mapping action on the element, even
7282     /// if it was already mapped before.
7283     OMP_MAP_ALWAYS = 0x04,
7284     /// Delete the element from the device environment, ignoring the
7285     /// current reference count associated with the element.
7286     OMP_MAP_DELETE = 0x08,
7287     /// The element being mapped is a pointer-pointee pair; both the
7288     /// pointer and the pointee should be mapped.
7289     OMP_MAP_PTR_AND_OBJ = 0x10,
7290     /// This flags signals that the base address of an entry should be
7291     /// passed to the target kernel as an argument.
7292     OMP_MAP_TARGET_PARAM = 0x20,
7293     /// Signal that the runtime library has to return the device pointer
7294     /// in the current position for the data being mapped. Used when we have the
7295     /// use_device_ptr or use_device_addr clause.
7296     OMP_MAP_RETURN_PARAM = 0x40,
7297     /// This flag signals that the reference being passed is a pointer to
7298     /// private data.
7299     OMP_MAP_PRIVATE = 0x80,
7300     /// Pass the element to the device by value.
7301     OMP_MAP_LITERAL = 0x100,
7302     /// Implicit map
7303     OMP_MAP_IMPLICIT = 0x200,
7304     /// Close is a hint to the runtime to allocate memory close to
7305     /// the target device.
7306     OMP_MAP_CLOSE = 0x400,
7307     /// 0x800 is reserved for compatibility with XLC.
7308     /// Produce a runtime error if the data is not already allocated.
7309     OMP_MAP_PRESENT = 0x1000,
7310     // Increment and decrement a separate reference counter so that the data
7311     // cannot be unmapped within the associated region.  Thus, this flag is
7312     // intended to be used on 'target' and 'target data' directives because they
7313     // are inherently structured.  It is not intended to be used on 'target
7314     // enter data' and 'target exit data' directives because they are inherently
7315     // dynamic.
7316     // This is an OpenMP extension for the sake of OpenACC support.
7317     OMP_MAP_OMPX_HOLD = 0x2000,
7318     /// Signal that the runtime library should use args as an array of
7319     /// descriptor_dim pointers and use args_size as dims. Used when we have
7320     /// non-contiguous list items in target update directive
7321     OMP_MAP_NON_CONTIG = 0x100000000000,
7322     /// The 16 MSBs of the flags indicate whether the entry is member of some
7323     /// struct/class.
7324     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7325     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7326   };
7327 
7328   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7329   static unsigned getFlagMemberOffset() {
7330     unsigned Offset = 0;
7331     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7332          Remain = Remain >> 1)
7333       Offset++;
7334     return Offset;
7335   }
7336 
7337   /// Class that holds debugging information for a data mapping to be passed to
7338   /// the runtime library.
7339   class MappingExprInfo {
7340     /// The variable declaration used for the data mapping.
7341     const ValueDecl *MapDecl = nullptr;
7342     /// The original expression used in the map clause, or null if there is
7343     /// none.
7344     const Expr *MapExpr = nullptr;
7345 
7346   public:
7347     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7348         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7349 
7350     const ValueDecl *getMapDecl() const { return MapDecl; }
7351     const Expr *getMapExpr() const { return MapExpr; }
7352   };
7353 
7354   /// Class that associates information with a base pointer to be passed to the
7355   /// runtime library.
7356   class BasePointerInfo {
7357     /// The base pointer.
7358     llvm::Value *Ptr = nullptr;
7359     /// The base declaration that refers to this device pointer, or null if
7360     /// there is none.
7361     const ValueDecl *DevPtrDecl = nullptr;
7362 
7363   public:
7364     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7365         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7366     llvm::Value *operator*() const { return Ptr; }
7367     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7368     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7369   };
7370 
7371   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7372   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7373   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7374   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7375   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7376   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7377   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7378 
7379   /// This structure contains combined information generated for mappable
7380   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7381   /// mappers, and non-contiguous information.
7382   struct MapCombinedInfoTy {
7383     struct StructNonContiguousInfo {
7384       bool IsNonContiguous = false;
7385       MapDimArrayTy Dims;
7386       MapNonContiguousArrayTy Offsets;
7387       MapNonContiguousArrayTy Counts;
7388       MapNonContiguousArrayTy Strides;
7389     };
7390     MapExprsArrayTy Exprs;
7391     MapBaseValuesArrayTy BasePointers;
7392     MapValuesArrayTy Pointers;
7393     MapValuesArrayTy Sizes;
7394     MapFlagsArrayTy Types;
7395     MapMappersArrayTy Mappers;
7396     StructNonContiguousInfo NonContigInfo;
7397 
7398     /// Append arrays in \a CurInfo.
7399     void append(MapCombinedInfoTy &CurInfo) {
7400       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7401       BasePointers.append(CurInfo.BasePointers.begin(),
7402                           CurInfo.BasePointers.end());
7403       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7404       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7405       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7406       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7407       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7408                                  CurInfo.NonContigInfo.Dims.end());
7409       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7410                                     CurInfo.NonContigInfo.Offsets.end());
7411       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7412                                    CurInfo.NonContigInfo.Counts.end());
7413       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7414                                     CurInfo.NonContigInfo.Strides.end());
7415     }
7416   };
7417 
7418   /// Map between a struct and the its lowest & highest elements which have been
7419   /// mapped.
7420   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7421   ///                    HE(FieldIndex, Pointer)}
7422   struct StructRangeInfoTy {
7423     MapCombinedInfoTy PreliminaryMapData;
7424     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7425         0, Address::invalid()};
7426     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7427         0, Address::invalid()};
7428     Address Base = Address::invalid();
7429     Address LB = Address::invalid();
7430     bool IsArraySection = false;
7431     bool HasCompleteRecord = false;
7432   };
7433 
7434 private:
7435   /// Kind that defines how a device pointer has to be returned.
7436   struct MapInfo {
7437     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7438     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7439     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7440     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7441     bool ReturnDevicePointer = false;
7442     bool IsImplicit = false;
7443     const ValueDecl *Mapper = nullptr;
7444     const Expr *VarRef = nullptr;
7445     bool ForDeviceAddr = false;
7446 
7447     MapInfo() = default;
7448     MapInfo(
7449         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7450         OpenMPMapClauseKind MapType,
7451         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7452         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7453         bool ReturnDevicePointer, bool IsImplicit,
7454         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7455         bool ForDeviceAddr = false)
7456         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7457           MotionModifiers(MotionModifiers),
7458           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7459           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7460   };
7461 
7462   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7463   /// member and there is no map information about it, then emission of that
7464   /// entry is deferred until the whole struct has been processed.
7465   struct DeferredDevicePtrEntryTy {
7466     const Expr *IE = nullptr;
7467     const ValueDecl *VD = nullptr;
7468     bool ForDeviceAddr = false;
7469 
7470     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7471                              bool ForDeviceAddr)
7472         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7473   };
7474 
7475   /// The target directive from where the mappable clauses were extracted. It
7476   /// is either a executable directive or a user-defined mapper directive.
7477   llvm::PointerUnion<const OMPExecutableDirective *,
7478                      const OMPDeclareMapperDecl *>
7479       CurDir;
7480 
7481   /// Function the directive is being generated for.
7482   CodeGenFunction &CGF;
7483 
7484   /// Set of all first private variables in the current directive.
7485   /// bool data is set to true if the variable is implicitly marked as
7486   /// firstprivate, false otherwise.
7487   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7488 
7489   /// Map between device pointer declarations and their expression components.
7490   /// The key value for declarations in 'this' is null.
7491   llvm::DenseMap<
7492       const ValueDecl *,
7493       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7494       DevPointersMap;
7495 
7496   /// Map between lambda declarations and their map type.
7497   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7498 
7499   llvm::Value *getExprTypeSize(const Expr *E) const {
7500     QualType ExprTy = E->getType().getCanonicalType();
7501 
7502     // Calculate the size for array shaping expression.
7503     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7504       llvm::Value *Size =
7505           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7506       for (const Expr *SE : OAE->getDimensions()) {
7507         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7508         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7509                                       CGF.getContext().getSizeType(),
7510                                       SE->getExprLoc());
7511         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7512       }
7513       return Size;
7514     }
7515 
7516     // Reference types are ignored for mapping purposes.
7517     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7518       ExprTy = RefTy->getPointeeType().getCanonicalType();
7519 
7520     // Given that an array section is considered a built-in type, we need to
7521     // do the calculation based on the length of the section instead of relying
7522     // on CGF.getTypeSize(E->getType()).
7523     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7524       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7525                             OAE->getBase()->IgnoreParenImpCasts())
7526                             .getCanonicalType();
7527 
7528       // If there is no length associated with the expression and lower bound is
7529       // not specified too, that means we are using the whole length of the
7530       // base.
7531       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7532           !OAE->getLowerBound())
7533         return CGF.getTypeSize(BaseTy);
7534 
7535       llvm::Value *ElemSize;
7536       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7537         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7538       } else {
7539         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7540         assert(ATy && "Expecting array type if not a pointer type.");
7541         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7542       }
7543 
7544       // If we don't have a length at this point, that is because we have an
7545       // array section with a single element.
7546       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7547         return ElemSize;
7548 
7549       if (const Expr *LenExpr = OAE->getLength()) {
7550         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7551         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7552                                              CGF.getContext().getSizeType(),
7553                                              LenExpr->getExprLoc());
7554         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7555       }
7556       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7557              OAE->getLowerBound() && "expected array_section[lb:].");
7558       // Size = sizetype - lb * elemtype;
7559       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7560       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7561       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7562                                        CGF.getContext().getSizeType(),
7563                                        OAE->getLowerBound()->getExprLoc());
7564       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7565       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7566       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7567       LengthVal = CGF.Builder.CreateSelect(
7568           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7569       return LengthVal;
7570     }
7571     return CGF.getTypeSize(ExprTy);
7572   }
7573 
7574   /// Return the corresponding bits for a given map clause modifier. Add
7575   /// a flag marking the map as a pointer if requested. Add a flag marking the
7576   /// map as the first one of a series of maps that relate to the same map
7577   /// expression.
7578   OpenMPOffloadMappingFlags getMapTypeBits(
7579       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7580       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7581       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7582     OpenMPOffloadMappingFlags Bits =
7583         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7584     switch (MapType) {
7585     case OMPC_MAP_alloc:
7586     case OMPC_MAP_release:
7587       // alloc and release is the default behavior in the runtime library,  i.e.
7588       // if we don't pass any bits alloc/release that is what the runtime is
7589       // going to do. Therefore, we don't need to signal anything for these two
7590       // type modifiers.
7591       break;
7592     case OMPC_MAP_to:
7593       Bits |= OMP_MAP_TO;
7594       break;
7595     case OMPC_MAP_from:
7596       Bits |= OMP_MAP_FROM;
7597       break;
7598     case OMPC_MAP_tofrom:
7599       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7600       break;
7601     case OMPC_MAP_delete:
7602       Bits |= OMP_MAP_DELETE;
7603       break;
7604     case OMPC_MAP_unknown:
7605       llvm_unreachable("Unexpected map type!");
7606     }
7607     if (AddPtrFlag)
7608       Bits |= OMP_MAP_PTR_AND_OBJ;
7609     if (AddIsTargetParamFlag)
7610       Bits |= OMP_MAP_TARGET_PARAM;
7611     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7612       Bits |= OMP_MAP_ALWAYS;
7613     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7614       Bits |= OMP_MAP_CLOSE;
7615     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7616         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7617       Bits |= OMP_MAP_PRESENT;
7618     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7619       Bits |= OMP_MAP_OMPX_HOLD;
7620     if (IsNonContiguous)
7621       Bits |= OMP_MAP_NON_CONTIG;
7622     return Bits;
7623   }
7624 
7625   /// Return true if the provided expression is a final array section. A
7626   /// final array section, is one whose length can't be proved to be one.
7627   bool isFinalArraySectionExpression(const Expr *E) const {
7628     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7629 
7630     // It is not an array section and therefore not a unity-size one.
7631     if (!OASE)
7632       return false;
7633 
7634     // An array section with no colon always refer to a single element.
7635     if (OASE->getColonLocFirst().isInvalid())
7636       return false;
7637 
7638     const Expr *Length = OASE->getLength();
7639 
7640     // If we don't have a length we have to check if the array has size 1
7641     // for this dimension. Also, we should always expect a length if the
7642     // base type is pointer.
7643     if (!Length) {
7644       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7645                              OASE->getBase()->IgnoreParenImpCasts())
7646                              .getCanonicalType();
7647       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7648         return ATy->getSize().getSExtValue() != 1;
7649       // If we don't have a constant dimension length, we have to consider
7650       // the current section as having any size, so it is not necessarily
7651       // unitary. If it happen to be unity size, that's user fault.
7652       return true;
7653     }
7654 
7655     // Check if the length evaluates to 1.
7656     Expr::EvalResult Result;
7657     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7658       return true; // Can have more that size 1.
7659 
7660     llvm::APSInt ConstLength = Result.Val.getInt();
7661     return ConstLength.getSExtValue() != 1;
7662   }
7663 
7664   /// Generate the base pointers, section pointers, sizes, map type bits, and
7665   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7666   /// map type, map or motion modifiers, and expression components.
7667   /// \a IsFirstComponent should be set to true if the provided set of
7668   /// components is the first associated with a capture.
7669   void generateInfoForComponentList(
7670       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7671       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7672       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7673       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7674       bool IsFirstComponentList, bool IsImplicit,
7675       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7676       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7677       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7678           OverlappedElements = llvm::None) const {
7679     // The following summarizes what has to be generated for each map and the
7680     // types below. The generated information is expressed in this order:
7681     // base pointer, section pointer, size, flags
7682     // (to add to the ones that come from the map type and modifier).
7683     //
7684     // double d;
7685     // int i[100];
7686     // float *p;
7687     //
7688     // struct S1 {
7689     //   int i;
7690     //   float f[50];
7691     // }
7692     // struct S2 {
7693     //   int i;
7694     //   float f[50];
7695     //   S1 s;
7696     //   double *p;
7697     //   struct S2 *ps;
7698     //   int &ref;
7699     // }
7700     // S2 s;
7701     // S2 *ps;
7702     //
7703     // map(d)
7704     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7705     //
7706     // map(i)
7707     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7708     //
7709     // map(i[1:23])
7710     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7711     //
7712     // map(p)
7713     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7714     //
7715     // map(p[1:24])
7716     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7717     // in unified shared memory mode or for local pointers
7718     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7719     //
7720     // map(s)
7721     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7722     //
7723     // map(s.i)
7724     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7725     //
7726     // map(s.s.f)
7727     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7728     //
7729     // map(s.p)
7730     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7731     //
7732     // map(to: s.p[:22])
7733     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7734     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7735     // &(s.p), &(s.p[0]), 22*sizeof(double),
7736     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7737     // (*) alloc space for struct members, only this is a target parameter
7738     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7739     //      optimizes this entry out, same in the examples below)
7740     // (***) map the pointee (map: to)
7741     //
7742     // map(to: s.ref)
7743     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7744     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7745     // (*) alloc space for struct members, only this is a target parameter
7746     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7747     //      optimizes this entry out, same in the examples below)
7748     // (***) map the pointee (map: to)
7749     //
7750     // map(s.ps)
7751     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7752     //
7753     // map(from: s.ps->s.i)
7754     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7755     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7756     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7757     //
7758     // map(to: s.ps->ps)
7759     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7760     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7761     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7762     //
7763     // map(s.ps->ps->ps)
7764     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7765     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7766     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7767     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7768     //
7769     // map(to: s.ps->ps->s.f[:22])
7770     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7771     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7772     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7773     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7774     //
7775     // map(ps)
7776     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7777     //
7778     // map(ps->i)
7779     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7780     //
7781     // map(ps->s.f)
7782     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7783     //
7784     // map(from: ps->p)
7785     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7786     //
7787     // map(to: ps->p[:22])
7788     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7789     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7790     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7791     //
7792     // map(ps->ps)
7793     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7794     //
7795     // map(from: ps->ps->s.i)
7796     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7797     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7798     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7799     //
7800     // map(from: ps->ps->ps)
7801     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7802     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7803     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7804     //
7805     // map(ps->ps->ps->ps)
7806     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7807     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7808     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7809     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7810     //
7811     // map(to: ps->ps->ps->s.f[:22])
7812     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7813     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7814     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7815     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7816     //
7817     // map(to: s.f[:22]) map(from: s.p[:33])
7818     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7819     //     sizeof(double*) (**), TARGET_PARAM
7820     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7821     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7822     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7823     // (*) allocate contiguous space needed to fit all mapped members even if
7824     //     we allocate space for members not mapped (in this example,
7825     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7826     //     them as well because they fall between &s.f[0] and &s.p)
7827     //
7828     // map(from: s.f[:22]) map(to: ps->p[:33])
7829     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7830     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7831     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7832     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7833     // (*) the struct this entry pertains to is the 2nd element in the list of
7834     //     arguments, hence MEMBER_OF(2)
7835     //
7836     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7837     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7838     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7839     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7840     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7841     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7842     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7843     // (*) the struct this entry pertains to is the 4th element in the list
7844     //     of arguments, hence MEMBER_OF(4)
7845 
7846     // Track if the map information being generated is the first for a capture.
7847     bool IsCaptureFirstInfo = IsFirstComponentList;
7848     // When the variable is on a declare target link or in a to clause with
7849     // unified memory, a reference is needed to hold the host/device address
7850     // of the variable.
7851     bool RequiresReference = false;
7852 
7853     // Scan the components from the base to the complete expression.
7854     auto CI = Components.rbegin();
7855     auto CE = Components.rend();
7856     auto I = CI;
7857 
7858     // Track if the map information being generated is the first for a list of
7859     // components.
7860     bool IsExpressionFirstInfo = true;
7861     bool FirstPointerInComplexData = false;
7862     Address BP = Address::invalid();
7863     const Expr *AssocExpr = I->getAssociatedExpression();
7864     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7865     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7866     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7867 
7868     if (isa<MemberExpr>(AssocExpr)) {
7869       // The base is the 'this' pointer. The content of the pointer is going
7870       // to be the base of the field being mapped.
7871       BP = CGF.LoadCXXThisAddress();
7872     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7873                (OASE &&
7874                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7875       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7876     } else if (OAShE &&
7877                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7878       BP = Address(
7879           CGF.EmitScalarExpr(OAShE->getBase()),
7880           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7881     } else {
7882       // The base is the reference to the variable.
7883       // BP = &Var.
7884       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7885       if (const auto *VD =
7886               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7887         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7888                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7889           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7890               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7891                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7892             RequiresReference = true;
7893             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7894           }
7895         }
7896       }
7897 
7898       // If the variable is a pointer and is being dereferenced (i.e. is not
7899       // the last component), the base has to be the pointer itself, not its
7900       // reference. References are ignored for mapping purposes.
7901       QualType Ty =
7902           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7903       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7904         // No need to generate individual map information for the pointer, it
7905         // can be associated with the combined storage if shared memory mode is
7906         // active or the base declaration is not global variable.
7907         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7908         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7909             !VD || VD->hasLocalStorage())
7910           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7911         else
7912           FirstPointerInComplexData = true;
7913         ++I;
7914       }
7915     }
7916 
7917     // Track whether a component of the list should be marked as MEMBER_OF some
7918     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7919     // in a component list should be marked as MEMBER_OF, all subsequent entries
7920     // do not belong to the base struct. E.g.
7921     // struct S2 s;
7922     // s.ps->ps->ps->f[:]
7923     //   (1) (2) (3) (4)
7924     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7925     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7926     // is the pointee of ps(2) which is not member of struct s, so it should not
7927     // be marked as such (it is still PTR_AND_OBJ).
7928     // The variable is initialized to false so that PTR_AND_OBJ entries which
7929     // are not struct members are not considered (e.g. array of pointers to
7930     // data).
7931     bool ShouldBeMemberOf = false;
7932 
7933     // Variable keeping track of whether or not we have encountered a component
7934     // in the component list which is a member expression. Useful when we have a
7935     // pointer or a final array section, in which case it is the previous
7936     // component in the list which tells us whether we have a member expression.
7937     // E.g. X.f[:]
7938     // While processing the final array section "[:]" it is "f" which tells us
7939     // whether we are dealing with a member of a declared struct.
7940     const MemberExpr *EncounteredME = nullptr;
7941 
7942     // Track for the total number of dimension. Start from one for the dummy
7943     // dimension.
7944     uint64_t DimSize = 1;
7945 
7946     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7947     bool IsPrevMemberReference = false;
7948 
7949     for (; I != CE; ++I) {
7950       // If the current component is member of a struct (parent struct) mark it.
7951       if (!EncounteredME) {
7952         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7953         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7954         // as MEMBER_OF the parent struct.
7955         if (EncounteredME) {
7956           ShouldBeMemberOf = true;
7957           // Do not emit as complex pointer if this is actually not array-like
7958           // expression.
7959           if (FirstPointerInComplexData) {
7960             QualType Ty = std::prev(I)
7961                               ->getAssociatedDeclaration()
7962                               ->getType()
7963                               .getNonReferenceType();
7964             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7965             FirstPointerInComplexData = false;
7966           }
7967         }
7968       }
7969 
7970       auto Next = std::next(I);
7971 
7972       // We need to generate the addresses and sizes if this is the last
7973       // component, if the component is a pointer or if it is an array section
7974       // whose length can't be proved to be one. If this is a pointer, it
7975       // becomes the base address for the following components.
7976 
7977       // A final array section, is one whose length can't be proved to be one.
7978       // If the map item is non-contiguous then we don't treat any array section
7979       // as final array section.
7980       bool IsFinalArraySection =
7981           !IsNonContiguous &&
7982           isFinalArraySectionExpression(I->getAssociatedExpression());
7983 
7984       // If we have a declaration for the mapping use that, otherwise use
7985       // the base declaration of the map clause.
7986       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7987                                      ? I->getAssociatedDeclaration()
7988                                      : BaseDecl;
7989       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7990                                                : MapExpr;
7991 
7992       // Get information on whether the element is a pointer. Have to do a
7993       // special treatment for array sections given that they are built-in
7994       // types.
7995       const auto *OASE =
7996           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7997       const auto *OAShE =
7998           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7999       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8000       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8001       bool IsPointer =
8002           OAShE ||
8003           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8004                        .getCanonicalType()
8005                        ->isAnyPointerType()) ||
8006           I->getAssociatedExpression()->getType()->isAnyPointerType();
8007       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8008                                MapDecl &&
8009                                MapDecl->getType()->isLValueReferenceType();
8010       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8011 
8012       if (OASE)
8013         ++DimSize;
8014 
8015       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8016           IsFinalArraySection) {
8017         // If this is not the last component, we expect the pointer to be
8018         // associated with an array expression or member expression.
8019         assert((Next == CE ||
8020                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8021                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8022                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8023                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8024                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8025                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8026                "Unexpected expression");
8027 
8028         Address LB = Address::invalid();
8029         Address LowestElem = Address::invalid();
8030         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8031                                        const MemberExpr *E) {
8032           const Expr *BaseExpr = E->getBase();
8033           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8034           // scalar.
8035           LValue BaseLV;
8036           if (E->isArrow()) {
8037             LValueBaseInfo BaseInfo;
8038             TBAAAccessInfo TBAAInfo;
8039             Address Addr =
8040                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8041             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8042             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8043           } else {
8044             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8045           }
8046           return BaseLV;
8047         };
8048         if (OAShE) {
8049           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8050                                     CGF.getContext().getTypeAlignInChars(
8051                                         OAShE->getBase()->getType()));
8052         } else if (IsMemberReference) {
8053           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8054           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8055           LowestElem = CGF.EmitLValueForFieldInitialization(
8056                               BaseLVal, cast<FieldDecl>(MapDecl))
8057                            .getAddress(CGF);
8058           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8059                    .getAddress(CGF);
8060         } else {
8061           LowestElem = LB =
8062               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8063                   .getAddress(CGF);
8064         }
8065 
8066         // If this component is a pointer inside the base struct then we don't
8067         // need to create any entry for it - it will be combined with the object
8068         // it is pointing to into a single PTR_AND_OBJ entry.
8069         bool IsMemberPointerOrAddr =
8070             EncounteredME &&
8071             (((IsPointer || ForDeviceAddr) &&
8072               I->getAssociatedExpression() == EncounteredME) ||
8073              (IsPrevMemberReference && !IsPointer) ||
8074              (IsMemberReference && Next != CE &&
8075               !Next->getAssociatedExpression()->getType()->isPointerType()));
8076         if (!OverlappedElements.empty() && Next == CE) {
8077           // Handle base element with the info for overlapped elements.
8078           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8079           assert(!IsPointer &&
8080                  "Unexpected base element with the pointer type.");
8081           // Mark the whole struct as the struct that requires allocation on the
8082           // device.
8083           PartialStruct.LowestElem = {0, LowestElem};
8084           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8085               I->getAssociatedExpression()->getType());
8086           Address HB = CGF.Builder.CreateConstGEP(
8087               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8088                                                               CGF.VoidPtrTy),
8089               TypeSize.getQuantity() - 1);
8090           PartialStruct.HighestElem = {
8091               std::numeric_limits<decltype(
8092                   PartialStruct.HighestElem.first)>::max(),
8093               HB};
8094           PartialStruct.Base = BP;
8095           PartialStruct.LB = LB;
8096           assert(
8097               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8098               "Overlapped elements must be used only once for the variable.");
8099           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8100           // Emit data for non-overlapped data.
8101           OpenMPOffloadMappingFlags Flags =
8102               OMP_MAP_MEMBER_OF |
8103               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8104                              /*AddPtrFlag=*/false,
8105                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8106           llvm::Value *Size = nullptr;
8107           // Do bitcopy of all non-overlapped structure elements.
8108           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8109                    Component : OverlappedElements) {
8110             Address ComponentLB = Address::invalid();
8111             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8112                  Component) {
8113               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8114                 const auto *FD = dyn_cast<FieldDecl>(VD);
8115                 if (FD && FD->getType()->isLValueReferenceType()) {
8116                   const auto *ME =
8117                       cast<MemberExpr>(MC.getAssociatedExpression());
8118                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8119                   ComponentLB =
8120                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8121                           .getAddress(CGF);
8122                 } else {
8123                   ComponentLB =
8124                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8125                           .getAddress(CGF);
8126                 }
8127                 Size = CGF.Builder.CreatePtrDiff(
8128                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8129                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8130                 break;
8131               }
8132             }
8133             assert(Size && "Failed to determine structure size");
8134             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8135             CombinedInfo.BasePointers.push_back(BP.getPointer());
8136             CombinedInfo.Pointers.push_back(LB.getPointer());
8137             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8138                 Size, CGF.Int64Ty, /*isSigned=*/true));
8139             CombinedInfo.Types.push_back(Flags);
8140             CombinedInfo.Mappers.push_back(nullptr);
8141             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8142                                                                       : 1);
8143             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8144           }
8145           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8146           CombinedInfo.BasePointers.push_back(BP.getPointer());
8147           CombinedInfo.Pointers.push_back(LB.getPointer());
8148           Size = CGF.Builder.CreatePtrDiff(
8149               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8150               CGF.EmitCastToVoidPtr(LB.getPointer()));
8151           CombinedInfo.Sizes.push_back(
8152               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8153           CombinedInfo.Types.push_back(Flags);
8154           CombinedInfo.Mappers.push_back(nullptr);
8155           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8156                                                                     : 1);
8157           break;
8158         }
8159         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8160         if (!IsMemberPointerOrAddr ||
8161             (Next == CE && MapType != OMPC_MAP_unknown)) {
8162           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8163           CombinedInfo.BasePointers.push_back(BP.getPointer());
8164           CombinedInfo.Pointers.push_back(LB.getPointer());
8165           CombinedInfo.Sizes.push_back(
8166               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8167           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8168                                                                     : 1);
8169 
8170           // If Mapper is valid, the last component inherits the mapper.
8171           bool HasMapper = Mapper && Next == CE;
8172           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8173 
8174           // We need to add a pointer flag for each map that comes from the
8175           // same expression except for the first one. We also need to signal
8176           // this map is the first one that relates with the current capture
8177           // (there is a set of entries for each capture).
8178           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8179               MapType, MapModifiers, MotionModifiers, IsImplicit,
8180               !IsExpressionFirstInfo || RequiresReference ||
8181                   FirstPointerInComplexData || IsMemberReference,
8182               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8183 
8184           if (!IsExpressionFirstInfo || IsMemberReference) {
8185             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8186             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8187             if (IsPointer || (IsMemberReference && Next != CE))
8188               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8189                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8190 
8191             if (ShouldBeMemberOf) {
8192               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8193               // should be later updated with the correct value of MEMBER_OF.
8194               Flags |= OMP_MAP_MEMBER_OF;
8195               // From now on, all subsequent PTR_AND_OBJ entries should not be
8196               // marked as MEMBER_OF.
8197               ShouldBeMemberOf = false;
8198             }
8199           }
8200 
8201           CombinedInfo.Types.push_back(Flags);
8202         }
8203 
8204         // If we have encountered a member expression so far, keep track of the
8205         // mapped member. If the parent is "*this", then the value declaration
8206         // is nullptr.
8207         if (EncounteredME) {
8208           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8209           unsigned FieldIndex = FD->getFieldIndex();
8210 
8211           // Update info about the lowest and highest elements for this struct
8212           if (!PartialStruct.Base.isValid()) {
8213             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8214             if (IsFinalArraySection) {
8215               Address HB =
8216                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8217                       .getAddress(CGF);
8218               PartialStruct.HighestElem = {FieldIndex, HB};
8219             } else {
8220               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8221             }
8222             PartialStruct.Base = BP;
8223             PartialStruct.LB = BP;
8224           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8225             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8226           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8227             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8228           }
8229         }
8230 
8231         // Need to emit combined struct for array sections.
8232         if (IsFinalArraySection || IsNonContiguous)
8233           PartialStruct.IsArraySection = true;
8234 
8235         // If we have a final array section, we are done with this expression.
8236         if (IsFinalArraySection)
8237           break;
8238 
8239         // The pointer becomes the base for the next element.
8240         if (Next != CE)
8241           BP = IsMemberReference ? LowestElem : LB;
8242 
8243         IsExpressionFirstInfo = false;
8244         IsCaptureFirstInfo = false;
8245         FirstPointerInComplexData = false;
8246         IsPrevMemberReference = IsMemberReference;
8247       } else if (FirstPointerInComplexData) {
8248         QualType Ty = Components.rbegin()
8249                           ->getAssociatedDeclaration()
8250                           ->getType()
8251                           .getNonReferenceType();
8252         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8253         FirstPointerInComplexData = false;
8254       }
8255     }
8256     // If ran into the whole component - allocate the space for the whole
8257     // record.
8258     if (!EncounteredME)
8259       PartialStruct.HasCompleteRecord = true;
8260 
8261     if (!IsNonContiguous)
8262       return;
8263 
8264     const ASTContext &Context = CGF.getContext();
8265 
8266     // For supporting stride in array section, we need to initialize the first
8267     // dimension size as 1, first offset as 0, and first count as 1
8268     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8269     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8270     MapValuesArrayTy CurStrides;
8271     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8272     uint64_t ElementTypeSize;
8273 
8274     // Collect Size information for each dimension and get the element size as
8275     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8276     // should be [10, 10] and the first stride is 4 btyes.
8277     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8278          Components) {
8279       const Expr *AssocExpr = Component.getAssociatedExpression();
8280       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8281 
8282       if (!OASE)
8283         continue;
8284 
8285       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8286       auto *CAT = Context.getAsConstantArrayType(Ty);
8287       auto *VAT = Context.getAsVariableArrayType(Ty);
8288 
8289       // We need all the dimension size except for the last dimension.
8290       assert((VAT || CAT || &Component == &*Components.begin()) &&
8291              "Should be either ConstantArray or VariableArray if not the "
8292              "first Component");
8293 
8294       // Get element size if CurStrides is empty.
8295       if (CurStrides.empty()) {
8296         const Type *ElementType = nullptr;
8297         if (CAT)
8298           ElementType = CAT->getElementType().getTypePtr();
8299         else if (VAT)
8300           ElementType = VAT->getElementType().getTypePtr();
8301         else
8302           assert(&Component == &*Components.begin() &&
8303                  "Only expect pointer (non CAT or VAT) when this is the "
8304                  "first Component");
8305         // If ElementType is null, then it means the base is a pointer
8306         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8307         // for next iteration.
8308         if (ElementType) {
8309           // For the case that having pointer as base, we need to remove one
8310           // level of indirection.
8311           if (&Component != &*Components.begin())
8312             ElementType = ElementType->getPointeeOrArrayElementType();
8313           ElementTypeSize =
8314               Context.getTypeSizeInChars(ElementType).getQuantity();
8315           CurStrides.push_back(
8316               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8317         }
8318       }
8319       // Get dimension value except for the last dimension since we don't need
8320       // it.
8321       if (DimSizes.size() < Components.size() - 1) {
8322         if (CAT)
8323           DimSizes.push_back(llvm::ConstantInt::get(
8324               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8325         else if (VAT)
8326           DimSizes.push_back(CGF.Builder.CreateIntCast(
8327               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8328               /*IsSigned=*/false));
8329       }
8330     }
8331 
8332     // Skip the dummy dimension since we have already have its information.
8333     auto DI = DimSizes.begin() + 1;
8334     // Product of dimension.
8335     llvm::Value *DimProd =
8336         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8337 
8338     // Collect info for non-contiguous. Notice that offset, count, and stride
8339     // are only meaningful for array-section, so we insert a null for anything
8340     // other than array-section.
8341     // Also, the size of offset, count, and stride are not the same as
8342     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8343     // count, and stride are the same as the number of non-contiguous
8344     // declaration in target update to/from clause.
8345     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8346          Components) {
8347       const Expr *AssocExpr = Component.getAssociatedExpression();
8348 
8349       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8350         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8351             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8352             /*isSigned=*/false);
8353         CurOffsets.push_back(Offset);
8354         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8355         CurStrides.push_back(CurStrides.back());
8356         continue;
8357       }
8358 
8359       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8360 
8361       if (!OASE)
8362         continue;
8363 
8364       // Offset
8365       const Expr *OffsetExpr = OASE->getLowerBound();
8366       llvm::Value *Offset = nullptr;
8367       if (!OffsetExpr) {
8368         // If offset is absent, then we just set it to zero.
8369         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8370       } else {
8371         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8372                                            CGF.Int64Ty,
8373                                            /*isSigned=*/false);
8374       }
8375       CurOffsets.push_back(Offset);
8376 
8377       // Count
8378       const Expr *CountExpr = OASE->getLength();
8379       llvm::Value *Count = nullptr;
8380       if (!CountExpr) {
8381         // In Clang, once a high dimension is an array section, we construct all
8382         // the lower dimension as array section, however, for case like
8383         // arr[0:2][2], Clang construct the inner dimension as an array section
8384         // but it actually is not in an array section form according to spec.
8385         if (!OASE->getColonLocFirst().isValid() &&
8386             !OASE->getColonLocSecond().isValid()) {
8387           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8388         } else {
8389           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8390           // When the length is absent it defaults to ⌈(size −
8391           // lower-bound)/stride⌉, where size is the size of the array
8392           // dimension.
8393           const Expr *StrideExpr = OASE->getStride();
8394           llvm::Value *Stride =
8395               StrideExpr
8396                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8397                                               CGF.Int64Ty, /*isSigned=*/false)
8398                   : nullptr;
8399           if (Stride)
8400             Count = CGF.Builder.CreateUDiv(
8401                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8402           else
8403             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8404         }
8405       } else {
8406         Count = CGF.EmitScalarExpr(CountExpr);
8407       }
8408       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8409       CurCounts.push_back(Count);
8410 
8411       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8412       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8413       //              Offset      Count     Stride
8414       //    D0          0           1         4    (int)    <- dummy dimension
8415       //    D1          0           2         8    (2 * (1) * 4)
8416       //    D2          1           2         20   (1 * (1 * 5) * 4)
8417       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8418       const Expr *StrideExpr = OASE->getStride();
8419       llvm::Value *Stride =
8420           StrideExpr
8421               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8422                                           CGF.Int64Ty, /*isSigned=*/false)
8423               : nullptr;
8424       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8425       if (Stride)
8426         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8427       else
8428         CurStrides.push_back(DimProd);
8429       if (DI != DimSizes.end())
8430         ++DI;
8431     }
8432 
8433     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8434     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8435     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8436   }
8437 
8438   /// Return the adjusted map modifiers if the declaration a capture refers to
8439   /// appears in a first-private clause. This is expected to be used only with
8440   /// directives that start with 'target'.
8441   MappableExprsHandler::OpenMPOffloadMappingFlags
8442   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8443     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8444 
8445     // A first private variable captured by reference will use only the
8446     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8447     // declaration is known as first-private in this handler.
8448     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8449       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8450         return MappableExprsHandler::OMP_MAP_TO |
8451                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8452       return MappableExprsHandler::OMP_MAP_PRIVATE |
8453              MappableExprsHandler::OMP_MAP_TO;
8454     }
8455     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8456     if (I != LambdasMap.end())
8457       // for map(to: lambda): using user specified map type.
8458       return getMapTypeBits(
8459           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8460           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8461           /*AddPtrFlag=*/false,
8462           /*AddIsTargetParamFlag=*/false,
8463           /*isNonContiguous=*/false);
8464     return MappableExprsHandler::OMP_MAP_TO |
8465            MappableExprsHandler::OMP_MAP_FROM;
8466   }
8467 
8468   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8469     // Rotate by getFlagMemberOffset() bits.
8470     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8471                                                   << getFlagMemberOffset());
8472   }
8473 
8474   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8475                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8476     // If the entry is PTR_AND_OBJ but has not been marked with the special
8477     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8478     // marked as MEMBER_OF.
8479     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8480         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8481       return;
8482 
8483     // Reset the placeholder value to prepare the flag for the assignment of the
8484     // proper MEMBER_OF value.
8485     Flags &= ~OMP_MAP_MEMBER_OF;
8486     Flags |= MemberOfFlag;
8487   }
8488 
8489   void getPlainLayout(const CXXRecordDecl *RD,
8490                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8491                       bool AsBase) const {
8492     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8493 
8494     llvm::StructType *St =
8495         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8496 
8497     unsigned NumElements = St->getNumElements();
8498     llvm::SmallVector<
8499         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8500         RecordLayout(NumElements);
8501 
8502     // Fill bases.
8503     for (const auto &I : RD->bases()) {
8504       if (I.isVirtual())
8505         continue;
8506       const auto *Base = I.getType()->getAsCXXRecordDecl();
8507       // Ignore empty bases.
8508       if (Base->isEmpty() || CGF.getContext()
8509                                  .getASTRecordLayout(Base)
8510                                  .getNonVirtualSize()
8511                                  .isZero())
8512         continue;
8513 
8514       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8515       RecordLayout[FieldIndex] = Base;
8516     }
8517     // Fill in virtual bases.
8518     for (const auto &I : RD->vbases()) {
8519       const auto *Base = I.getType()->getAsCXXRecordDecl();
8520       // Ignore empty bases.
8521       if (Base->isEmpty())
8522         continue;
8523       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8524       if (RecordLayout[FieldIndex])
8525         continue;
8526       RecordLayout[FieldIndex] = Base;
8527     }
8528     // Fill in all the fields.
8529     assert(!RD->isUnion() && "Unexpected union.");
8530     for (const auto *Field : RD->fields()) {
8531       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8532       // will fill in later.)
8533       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8534         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8535         RecordLayout[FieldIndex] = Field;
8536       }
8537     }
8538     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8539              &Data : RecordLayout) {
8540       if (Data.isNull())
8541         continue;
8542       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8543         getPlainLayout(Base, Layout, /*AsBase=*/true);
8544       else
8545         Layout.push_back(Data.get<const FieldDecl *>());
8546     }
8547   }
8548 
8549   /// Generate all the base pointers, section pointers, sizes, map types, and
8550   /// mappers for the extracted mappable expressions (all included in \a
8551   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8552   /// pair of the relevant declaration and index where it occurs is appended to
8553   /// the device pointers info array.
8554   void generateAllInfoForClauses(
8555       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8556       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8557           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8558     // We have to process the component lists that relate with the same
8559     // declaration in a single chunk so that we can generate the map flags
8560     // correctly. Therefore, we organize all lists in a map.
8561     enum MapKind { Present, Allocs, Other, Total };
8562     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8563                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8564         Info;
8565 
8566     // Helper function to fill the information map for the different supported
8567     // clauses.
8568     auto &&InfoGen =
8569         [&Info, &SkipVarSet](
8570             const ValueDecl *D, MapKind Kind,
8571             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8572             OpenMPMapClauseKind MapType,
8573             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8574             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8575             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8576             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8577           if (SkipVarSet.contains(D))
8578             return;
8579           auto It = Info.find(D);
8580           if (It == Info.end())
8581             It = Info
8582                      .insert(std::make_pair(
8583                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8584                      .first;
8585           It->second[Kind].emplace_back(
8586               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8587               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8588         };
8589 
8590     for (const auto *Cl : Clauses) {
8591       const auto *C = dyn_cast<OMPMapClause>(Cl);
8592       if (!C)
8593         continue;
8594       MapKind Kind = Other;
8595       if (llvm::is_contained(C->getMapTypeModifiers(),
8596                              OMPC_MAP_MODIFIER_present))
8597         Kind = Present;
8598       else if (C->getMapType() == OMPC_MAP_alloc)
8599         Kind = Allocs;
8600       const auto *EI = C->getVarRefs().begin();
8601       for (const auto L : C->component_lists()) {
8602         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8603         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8604                 C->getMapTypeModifiers(), llvm::None,
8605                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8606                 E);
8607         ++EI;
8608       }
8609     }
8610     for (const auto *Cl : Clauses) {
8611       const auto *C = dyn_cast<OMPToClause>(Cl);
8612       if (!C)
8613         continue;
8614       MapKind Kind = Other;
8615       if (llvm::is_contained(C->getMotionModifiers(),
8616                              OMPC_MOTION_MODIFIER_present))
8617         Kind = Present;
8618       const auto *EI = C->getVarRefs().begin();
8619       for (const auto L : C->component_lists()) {
8620         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8621                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8622                 C->isImplicit(), std::get<2>(L), *EI);
8623         ++EI;
8624       }
8625     }
8626     for (const auto *Cl : Clauses) {
8627       const auto *C = dyn_cast<OMPFromClause>(Cl);
8628       if (!C)
8629         continue;
8630       MapKind Kind = Other;
8631       if (llvm::is_contained(C->getMotionModifiers(),
8632                              OMPC_MOTION_MODIFIER_present))
8633         Kind = Present;
8634       const auto *EI = C->getVarRefs().begin();
8635       for (const auto L : C->component_lists()) {
8636         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8637                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8638                 C->isImplicit(), std::get<2>(L), *EI);
8639         ++EI;
8640       }
8641     }
8642 
8643     // Look at the use_device_ptr clause information and mark the existing map
8644     // entries as such. If there is no map information for an entry in the
8645     // use_device_ptr list, we create one with map type 'alloc' and zero size
8646     // section. It is the user fault if that was not mapped before. If there is
8647     // no map information and the pointer is a struct member, then we defer the
8648     // emission of that entry until the whole struct has been processed.
8649     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8650                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8651         DeferredInfo;
8652     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8653 
8654     for (const auto *Cl : Clauses) {
8655       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8656       if (!C)
8657         continue;
8658       for (const auto L : C->component_lists()) {
8659         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8660             std::get<1>(L);
8661         assert(!Components.empty() &&
8662                "Not expecting empty list of components!");
8663         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8664         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8665         const Expr *IE = Components.back().getAssociatedExpression();
8666         // If the first component is a member expression, we have to look into
8667         // 'this', which maps to null in the map of map information. Otherwise
8668         // look directly for the information.
8669         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8670 
8671         // We potentially have map information for this declaration already.
8672         // Look for the first set of components that refer to it.
8673         if (It != Info.end()) {
8674           bool Found = false;
8675           for (auto &Data : It->second) {
8676             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8677               return MI.Components.back().getAssociatedDeclaration() == VD;
8678             });
8679             // If we found a map entry, signal that the pointer has to be
8680             // returned and move on to the next declaration. Exclude cases where
8681             // the base pointer is mapped as array subscript, array section or
8682             // array shaping. The base address is passed as a pointer to base in
8683             // this case and cannot be used as a base for use_device_ptr list
8684             // item.
8685             if (CI != Data.end()) {
8686               auto PrevCI = std::next(CI->Components.rbegin());
8687               const auto *VarD = dyn_cast<VarDecl>(VD);
8688               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8689                   isa<MemberExpr>(IE) ||
8690                   !VD->getType().getNonReferenceType()->isPointerType() ||
8691                   PrevCI == CI->Components.rend() ||
8692                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8693                   VarD->hasLocalStorage()) {
8694                 CI->ReturnDevicePointer = true;
8695                 Found = true;
8696                 break;
8697               }
8698             }
8699           }
8700           if (Found)
8701             continue;
8702         }
8703 
8704         // We didn't find any match in our map information - generate a zero
8705         // size array section - if the pointer is a struct member we defer this
8706         // action until the whole struct has been processed.
8707         if (isa<MemberExpr>(IE)) {
8708           // Insert the pointer into Info to be processed by
8709           // generateInfoForComponentList. Because it is a member pointer
8710           // without a pointee, no entry will be generated for it, therefore
8711           // we need to generate one after the whole struct has been processed.
8712           // Nonetheless, generateInfoForComponentList must be called to take
8713           // the pointer into account for the calculation of the range of the
8714           // partial struct.
8715           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8716                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8717                   nullptr);
8718           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8719         } else {
8720           llvm::Value *Ptr =
8721               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8722           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8723           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8724           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8725           UseDevicePtrCombinedInfo.Sizes.push_back(
8726               llvm::Constant::getNullValue(CGF.Int64Ty));
8727           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8728           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8729         }
8730       }
8731     }
8732 
8733     // Look at the use_device_addr clause information and mark the existing map
8734     // entries as such. If there is no map information for an entry in the
8735     // use_device_addr list, we create one with map type 'alloc' and zero size
8736     // section. It is the user fault if that was not mapped before. If there is
8737     // no map information and the pointer is a struct member, then we defer the
8738     // emission of that entry until the whole struct has been processed.
8739     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8740     for (const auto *Cl : Clauses) {
8741       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8742       if (!C)
8743         continue;
8744       for (const auto L : C->component_lists()) {
8745         assert(!std::get<1>(L).empty() &&
8746                "Not expecting empty list of components!");
8747         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8748         if (!Processed.insert(VD).second)
8749           continue;
8750         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8751         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8752         // If the first component is a member expression, we have to look into
8753         // 'this', which maps to null in the map of map information. Otherwise
8754         // look directly for the information.
8755         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8756 
8757         // We potentially have map information for this declaration already.
8758         // Look for the first set of components that refer to it.
8759         if (It != Info.end()) {
8760           bool Found = false;
8761           for (auto &Data : It->second) {
8762             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8763               return MI.Components.back().getAssociatedDeclaration() == VD;
8764             });
8765             // If we found a map entry, signal that the pointer has to be
8766             // returned and move on to the next declaration.
8767             if (CI != Data.end()) {
8768               CI->ReturnDevicePointer = true;
8769               Found = true;
8770               break;
8771             }
8772           }
8773           if (Found)
8774             continue;
8775         }
8776 
8777         // We didn't find any match in our map information - generate a zero
8778         // size array section - if the pointer is a struct member we defer this
8779         // action until the whole struct has been processed.
8780         if (isa<MemberExpr>(IE)) {
8781           // Insert the pointer into Info to be processed by
8782           // generateInfoForComponentList. Because it is a member pointer
8783           // without a pointee, no entry will be generated for it, therefore
8784           // we need to generate one after the whole struct has been processed.
8785           // Nonetheless, generateInfoForComponentList must be called to take
8786           // the pointer into account for the calculation of the range of the
8787           // partial struct.
8788           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8789                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8790                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8791           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8792         } else {
8793           llvm::Value *Ptr;
8794           if (IE->isGLValue())
8795             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8796           else
8797             Ptr = CGF.EmitScalarExpr(IE);
8798           CombinedInfo.Exprs.push_back(VD);
8799           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8800           CombinedInfo.Pointers.push_back(Ptr);
8801           CombinedInfo.Sizes.push_back(
8802               llvm::Constant::getNullValue(CGF.Int64Ty));
8803           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8804           CombinedInfo.Mappers.push_back(nullptr);
8805         }
8806       }
8807     }
8808 
8809     for (const auto &Data : Info) {
8810       StructRangeInfoTy PartialStruct;
8811       // Temporary generated information.
8812       MapCombinedInfoTy CurInfo;
8813       const Decl *D = Data.first;
8814       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8815       for (const auto &M : Data.second) {
8816         for (const MapInfo &L : M) {
8817           assert(!L.Components.empty() &&
8818                  "Not expecting declaration with no component lists.");
8819 
8820           // Remember the current base pointer index.
8821           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8822           CurInfo.NonContigInfo.IsNonContiguous =
8823               L.Components.back().isNonContiguous();
8824           generateInfoForComponentList(
8825               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8826               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8827               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8828 
8829           // If this entry relates with a device pointer, set the relevant
8830           // declaration and add the 'return pointer' flag.
8831           if (L.ReturnDevicePointer) {
8832             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8833                    "Unexpected number of mapped base pointers.");
8834 
8835             const ValueDecl *RelevantVD =
8836                 L.Components.back().getAssociatedDeclaration();
8837             assert(RelevantVD &&
8838                    "No relevant declaration related with device pointer??");
8839 
8840             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8841                 RelevantVD);
8842             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8843           }
8844         }
8845       }
8846 
8847       // Append any pending zero-length pointers which are struct members and
8848       // used with use_device_ptr or use_device_addr.
8849       auto CI = DeferredInfo.find(Data.first);
8850       if (CI != DeferredInfo.end()) {
8851         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8852           llvm::Value *BasePtr;
8853           llvm::Value *Ptr;
8854           if (L.ForDeviceAddr) {
8855             if (L.IE->isGLValue())
8856               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8857             else
8858               Ptr = this->CGF.EmitScalarExpr(L.IE);
8859             BasePtr = Ptr;
8860             // Entry is RETURN_PARAM. Also, set the placeholder value
8861             // MEMBER_OF=FFFF so that the entry is later updated with the
8862             // correct value of MEMBER_OF.
8863             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8864           } else {
8865             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8866             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8867                                              L.IE->getExprLoc());
8868             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8869             // placeholder value MEMBER_OF=FFFF so that the entry is later
8870             // updated with the correct value of MEMBER_OF.
8871             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8872                                     OMP_MAP_MEMBER_OF);
8873           }
8874           CurInfo.Exprs.push_back(L.VD);
8875           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8876           CurInfo.Pointers.push_back(Ptr);
8877           CurInfo.Sizes.push_back(
8878               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8879           CurInfo.Mappers.push_back(nullptr);
8880         }
8881       }
8882       // If there is an entry in PartialStruct it means we have a struct with
8883       // individual members mapped. Emit an extra combined entry.
8884       if (PartialStruct.Base.isValid()) {
8885         CurInfo.NonContigInfo.Dims.push_back(0);
8886         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8887       }
8888 
8889       // We need to append the results of this capture to what we already
8890       // have.
8891       CombinedInfo.append(CurInfo);
8892     }
8893     // Append data for use_device_ptr clauses.
8894     CombinedInfo.append(UseDevicePtrCombinedInfo);
8895   }
8896 
8897 public:
8898   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8899       : CurDir(&Dir), CGF(CGF) {
8900     // Extract firstprivate clause information.
8901     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8902       for (const auto *D : C->varlists())
8903         FirstPrivateDecls.try_emplace(
8904             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8905     // Extract implicit firstprivates from uses_allocators clauses.
8906     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8907       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8908         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8909         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8910           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8911                                         /*Implicit=*/true);
8912         else if (const auto *VD = dyn_cast<VarDecl>(
8913                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8914                          ->getDecl()))
8915           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8916       }
8917     }
8918     // Extract device pointer clause information.
8919     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8920       for (auto L : C->component_lists())
8921         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8922     // Extract map information.
8923     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8924       if (C->getMapType() != OMPC_MAP_to)
8925         continue;
8926       for (auto L : C->component_lists()) {
8927         const ValueDecl *VD = std::get<0>(L);
8928         const auto *RD = VD ? VD->getType()
8929                                   .getCanonicalType()
8930                                   .getNonReferenceType()
8931                                   ->getAsCXXRecordDecl()
8932                             : nullptr;
8933         if (RD && RD->isLambda())
8934           LambdasMap.try_emplace(std::get<0>(L), C);
8935       }
8936     }
8937   }
8938 
8939   /// Constructor for the declare mapper directive.
8940   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8941       : CurDir(&Dir), CGF(CGF) {}
8942 
8943   /// Generate code for the combined entry if we have a partially mapped struct
8944   /// and take care of the mapping flags of the arguments corresponding to
8945   /// individual struct members.
8946   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8947                          MapFlagsArrayTy &CurTypes,
8948                          const StructRangeInfoTy &PartialStruct,
8949                          const ValueDecl *VD = nullptr,
8950                          bool NotTargetParams = true) const {
8951     if (CurTypes.size() == 1 &&
8952         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8953         !PartialStruct.IsArraySection)
8954       return;
8955     Address LBAddr = PartialStruct.LowestElem.second;
8956     Address HBAddr = PartialStruct.HighestElem.second;
8957     if (PartialStruct.HasCompleteRecord) {
8958       LBAddr = PartialStruct.LB;
8959       HBAddr = PartialStruct.LB;
8960     }
8961     CombinedInfo.Exprs.push_back(VD);
8962     // Base is the base of the struct
8963     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8964     // Pointer is the address of the lowest element
8965     llvm::Value *LB = LBAddr.getPointer();
8966     CombinedInfo.Pointers.push_back(LB);
8967     // There should not be a mapper for a combined entry.
8968     CombinedInfo.Mappers.push_back(nullptr);
8969     // Size is (addr of {highest+1} element) - (addr of lowest element)
8970     llvm::Value *HB = HBAddr.getPointer();
8971     llvm::Value *HAddr =
8972         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8973     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8974     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8975     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8976     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8977                                                   /*isSigned=*/false);
8978     CombinedInfo.Sizes.push_back(Size);
8979     // Map type is always TARGET_PARAM, if generate info for captures.
8980     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8981                                                  : OMP_MAP_TARGET_PARAM);
8982     // If any element has the present modifier, then make sure the runtime
8983     // doesn't attempt to allocate the struct.
8984     if (CurTypes.end() !=
8985         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8986           return Type & OMP_MAP_PRESENT;
8987         }))
8988       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8989     // Remove TARGET_PARAM flag from the first element
8990     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8991     // If any element has the ompx_hold modifier, then make sure the runtime
8992     // uses the hold reference count for the struct as a whole so that it won't
8993     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8994     // elements as well so the runtime knows which reference count to check
8995     // when determining whether it's time for device-to-host transfers of
8996     // individual elements.
8997     if (CurTypes.end() !=
8998         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8999           return Type & OMP_MAP_OMPX_HOLD;
9000         })) {
9001       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9002       for (auto &M : CurTypes)
9003         M |= OMP_MAP_OMPX_HOLD;
9004     }
9005 
9006     // All other current entries will be MEMBER_OF the combined entry
9007     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9008     // 0xFFFF in the MEMBER_OF field).
9009     OpenMPOffloadMappingFlags MemberOfFlag =
9010         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9011     for (auto &M : CurTypes)
9012       setCorrectMemberOfFlag(M, MemberOfFlag);
9013   }
9014 
9015   /// Generate all the base pointers, section pointers, sizes, map types, and
9016   /// mappers for the extracted mappable expressions (all included in \a
9017   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9018   /// pair of the relevant declaration and index where it occurs is appended to
9019   /// the device pointers info array.
9020   void generateAllInfo(
9021       MapCombinedInfoTy &CombinedInfo,
9022       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9023           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9024     assert(CurDir.is<const OMPExecutableDirective *>() &&
9025            "Expect a executable directive");
9026     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9027     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9028   }
9029 
9030   /// Generate all the base pointers, section pointers, sizes, map types, and
9031   /// mappers for the extracted map clauses of user-defined mapper (all included
9032   /// in \a CombinedInfo).
9033   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9034     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9035            "Expect a declare mapper directive");
9036     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9037     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9038   }
9039 
9040   /// Emit capture info for lambdas for variables captured by reference.
9041   void generateInfoForLambdaCaptures(
9042       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9043       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9044     const auto *RD = VD->getType()
9045                          .getCanonicalType()
9046                          .getNonReferenceType()
9047                          ->getAsCXXRecordDecl();
9048     if (!RD || !RD->isLambda())
9049       return;
9050     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9051     LValue VDLVal = CGF.MakeAddrLValue(
9052         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9053     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9054     FieldDecl *ThisCapture = nullptr;
9055     RD->getCaptureFields(Captures, ThisCapture);
9056     if (ThisCapture) {
9057       LValue ThisLVal =
9058           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9059       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9060       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9061                                  VDLVal.getPointer(CGF));
9062       CombinedInfo.Exprs.push_back(VD);
9063       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9064       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9065       CombinedInfo.Sizes.push_back(
9066           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9067                                     CGF.Int64Ty, /*isSigned=*/true));
9068       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9069                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9070       CombinedInfo.Mappers.push_back(nullptr);
9071     }
9072     for (const LambdaCapture &LC : RD->captures()) {
9073       if (!LC.capturesVariable())
9074         continue;
9075       const VarDecl *VD = LC.getCapturedVar();
9076       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9077         continue;
9078       auto It = Captures.find(VD);
9079       assert(It != Captures.end() && "Found lambda capture without field.");
9080       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9081       if (LC.getCaptureKind() == LCK_ByRef) {
9082         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9083         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9084                                    VDLVal.getPointer(CGF));
9085         CombinedInfo.Exprs.push_back(VD);
9086         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9087         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9088         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9089             CGF.getTypeSize(
9090                 VD->getType().getCanonicalType().getNonReferenceType()),
9091             CGF.Int64Ty, /*isSigned=*/true));
9092       } else {
9093         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9094         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9095                                    VDLVal.getPointer(CGF));
9096         CombinedInfo.Exprs.push_back(VD);
9097         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9098         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9099         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9100       }
9101       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9102                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9103       CombinedInfo.Mappers.push_back(nullptr);
9104     }
9105   }
9106 
9107   /// Set correct indices for lambdas captures.
9108   void adjustMemberOfForLambdaCaptures(
9109       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9110       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9111       MapFlagsArrayTy &Types) const {
9112     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9113       // Set correct member_of idx for all implicit lambda captures.
9114       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9115                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9116         continue;
9117       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9118       assert(BasePtr && "Unable to find base lambda address.");
9119       int TgtIdx = -1;
9120       for (unsigned J = I; J > 0; --J) {
9121         unsigned Idx = J - 1;
9122         if (Pointers[Idx] != BasePtr)
9123           continue;
9124         TgtIdx = Idx;
9125         break;
9126       }
9127       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9128       // All other current entries will be MEMBER_OF the combined entry
9129       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9130       // 0xFFFF in the MEMBER_OF field).
9131       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9132       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9133     }
9134   }
9135 
9136   /// Generate the base pointers, section pointers, sizes, map types, and
9137   /// mappers associated to a given capture (all included in \a CombinedInfo).
9138   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9139                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9140                               StructRangeInfoTy &PartialStruct) const {
9141     assert(!Cap->capturesVariableArrayType() &&
9142            "Not expecting to generate map info for a variable array type!");
9143 
9144     // We need to know when we generating information for the first component
9145     const ValueDecl *VD = Cap->capturesThis()
9146                               ? nullptr
9147                               : Cap->getCapturedVar()->getCanonicalDecl();
9148 
9149     // for map(to: lambda): skip here, processing it in
9150     // generateDefaultMapInfo
9151     if (LambdasMap.count(VD))
9152       return;
9153 
9154     // If this declaration appears in a is_device_ptr clause we just have to
9155     // pass the pointer by value. If it is a reference to a declaration, we just
9156     // pass its value.
9157     if (DevPointersMap.count(VD)) {
9158       CombinedInfo.Exprs.push_back(VD);
9159       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9160       CombinedInfo.Pointers.push_back(Arg);
9161       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9162           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9163           /*isSigned=*/true));
9164       CombinedInfo.Types.push_back(
9165           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9166           OMP_MAP_TARGET_PARAM);
9167       CombinedInfo.Mappers.push_back(nullptr);
9168       return;
9169     }
9170 
9171     using MapData =
9172         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9173                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9174                    const ValueDecl *, const Expr *>;
9175     SmallVector<MapData, 4> DeclComponentLists;
9176     assert(CurDir.is<const OMPExecutableDirective *>() &&
9177            "Expect a executable directive");
9178     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9179     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9180       const auto *EI = C->getVarRefs().begin();
9181       for (const auto L : C->decl_component_lists(VD)) {
9182         const ValueDecl *VDecl, *Mapper;
9183         // The Expression is not correct if the mapping is implicit
9184         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9185         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9186         std::tie(VDecl, Components, Mapper) = L;
9187         assert(VDecl == VD && "We got information for the wrong declaration??");
9188         assert(!Components.empty() &&
9189                "Not expecting declaration with no component lists.");
9190         DeclComponentLists.emplace_back(Components, C->getMapType(),
9191                                         C->getMapTypeModifiers(),
9192                                         C->isImplicit(), Mapper, E);
9193         ++EI;
9194       }
9195     }
9196     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9197                                              const MapData &RHS) {
9198       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9199       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9200       bool HasPresent =
9201           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9202       bool HasAllocs = MapType == OMPC_MAP_alloc;
9203       MapModifiers = std::get<2>(RHS);
9204       MapType = std::get<1>(LHS);
9205       bool HasPresentR =
9206           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9207       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9208       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9209     });
9210 
9211     // Find overlapping elements (including the offset from the base element).
9212     llvm::SmallDenseMap<
9213         const MapData *,
9214         llvm::SmallVector<
9215             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9216         4>
9217         OverlappedData;
9218     size_t Count = 0;
9219     for (const MapData &L : DeclComponentLists) {
9220       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9221       OpenMPMapClauseKind MapType;
9222       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9223       bool IsImplicit;
9224       const ValueDecl *Mapper;
9225       const Expr *VarRef;
9226       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9227           L;
9228       ++Count;
9229       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9230         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9231         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9232                  VarRef) = L1;
9233         auto CI = Components.rbegin();
9234         auto CE = Components.rend();
9235         auto SI = Components1.rbegin();
9236         auto SE = Components1.rend();
9237         for (; CI != CE && SI != SE; ++CI, ++SI) {
9238           if (CI->getAssociatedExpression()->getStmtClass() !=
9239               SI->getAssociatedExpression()->getStmtClass())
9240             break;
9241           // Are we dealing with different variables/fields?
9242           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9243             break;
9244         }
9245         // Found overlapping if, at least for one component, reached the head
9246         // of the components list.
9247         if (CI == CE || SI == SE) {
9248           // Ignore it if it is the same component.
9249           if (CI == CE && SI == SE)
9250             continue;
9251           const auto It = (SI == SE) ? CI : SI;
9252           // If one component is a pointer and another one is a kind of
9253           // dereference of this pointer (array subscript, section, dereference,
9254           // etc.), it is not an overlapping.
9255           // Same, if one component is a base and another component is a
9256           // dereferenced pointer memberexpr with the same base.
9257           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9258               (std::prev(It)->getAssociatedDeclaration() &&
9259                std::prev(It)
9260                    ->getAssociatedDeclaration()
9261                    ->getType()
9262                    ->isPointerType()) ||
9263               (It->getAssociatedDeclaration() &&
9264                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9265                std::next(It) != CE && std::next(It) != SE))
9266             continue;
9267           const MapData &BaseData = CI == CE ? L : L1;
9268           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9269               SI == SE ? Components : Components1;
9270           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9271           OverlappedElements.getSecond().push_back(SubData);
9272         }
9273       }
9274     }
9275     // Sort the overlapped elements for each item.
9276     llvm::SmallVector<const FieldDecl *, 4> Layout;
9277     if (!OverlappedData.empty()) {
9278       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9279       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9280       while (BaseType != OrigType) {
9281         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9282         OrigType = BaseType->getPointeeOrArrayElementType();
9283       }
9284 
9285       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9286         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9287       else {
9288         const auto *RD = BaseType->getAsRecordDecl();
9289         Layout.append(RD->field_begin(), RD->field_end());
9290       }
9291     }
9292     for (auto &Pair : OverlappedData) {
9293       llvm::stable_sort(
9294           Pair.getSecond(),
9295           [&Layout](
9296               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9297               OMPClauseMappableExprCommon::MappableExprComponentListRef
9298                   Second) {
9299             auto CI = First.rbegin();
9300             auto CE = First.rend();
9301             auto SI = Second.rbegin();
9302             auto SE = Second.rend();
9303             for (; CI != CE && SI != SE; ++CI, ++SI) {
9304               if (CI->getAssociatedExpression()->getStmtClass() !=
9305                   SI->getAssociatedExpression()->getStmtClass())
9306                 break;
9307               // Are we dealing with different variables/fields?
9308               if (CI->getAssociatedDeclaration() !=
9309                   SI->getAssociatedDeclaration())
9310                 break;
9311             }
9312 
9313             // Lists contain the same elements.
9314             if (CI == CE && SI == SE)
9315               return false;
9316 
9317             // List with less elements is less than list with more elements.
9318             if (CI == CE || SI == SE)
9319               return CI == CE;
9320 
9321             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9322             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9323             if (FD1->getParent() == FD2->getParent())
9324               return FD1->getFieldIndex() < FD2->getFieldIndex();
9325             const auto *It =
9326                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9327                   return FD == FD1 || FD == FD2;
9328                 });
9329             return *It == FD1;
9330           });
9331     }
9332 
9333     // Associated with a capture, because the mapping flags depend on it.
9334     // Go through all of the elements with the overlapped elements.
9335     bool IsFirstComponentList = true;
9336     for (const auto &Pair : OverlappedData) {
9337       const MapData &L = *Pair.getFirst();
9338       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9339       OpenMPMapClauseKind MapType;
9340       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9341       bool IsImplicit;
9342       const ValueDecl *Mapper;
9343       const Expr *VarRef;
9344       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9345           L;
9346       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9347           OverlappedComponents = Pair.getSecond();
9348       generateInfoForComponentList(
9349           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9350           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9351           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9352       IsFirstComponentList = false;
9353     }
9354     // Go through other elements without overlapped elements.
9355     for (const MapData &L : DeclComponentLists) {
9356       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9357       OpenMPMapClauseKind MapType;
9358       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9359       bool IsImplicit;
9360       const ValueDecl *Mapper;
9361       const Expr *VarRef;
9362       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9363           L;
9364       auto It = OverlappedData.find(&L);
9365       if (It == OverlappedData.end())
9366         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9367                                      Components, CombinedInfo, PartialStruct,
9368                                      IsFirstComponentList, IsImplicit, Mapper,
9369                                      /*ForDeviceAddr=*/false, VD, VarRef);
9370       IsFirstComponentList = false;
9371     }
9372   }
9373 
9374   /// Generate the default map information for a given capture \a CI,
9375   /// record field declaration \a RI and captured value \a CV.
9376   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9377                               const FieldDecl &RI, llvm::Value *CV,
9378                               MapCombinedInfoTy &CombinedInfo) const {
9379     bool IsImplicit = true;
9380     // Do the default mapping.
9381     if (CI.capturesThis()) {
9382       CombinedInfo.Exprs.push_back(nullptr);
9383       CombinedInfo.BasePointers.push_back(CV);
9384       CombinedInfo.Pointers.push_back(CV);
9385       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9386       CombinedInfo.Sizes.push_back(
9387           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9388                                     CGF.Int64Ty, /*isSigned=*/true));
9389       // Default map type.
9390       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9391     } else if (CI.capturesVariableByCopy()) {
9392       const VarDecl *VD = CI.getCapturedVar();
9393       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9394       CombinedInfo.BasePointers.push_back(CV);
9395       CombinedInfo.Pointers.push_back(CV);
9396       if (!RI.getType()->isAnyPointerType()) {
9397         // We have to signal to the runtime captures passed by value that are
9398         // not pointers.
9399         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9400         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9401             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9402       } else {
9403         // Pointers are implicitly mapped with a zero size and no flags
9404         // (other than first map that is added for all implicit maps).
9405         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9406         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9407       }
9408       auto I = FirstPrivateDecls.find(VD);
9409       if (I != FirstPrivateDecls.end())
9410         IsImplicit = I->getSecond();
9411     } else {
9412       assert(CI.capturesVariable() && "Expected captured reference.");
9413       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9414       QualType ElementType = PtrTy->getPointeeType();
9415       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9416           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9417       // The default map type for a scalar/complex type is 'to' because by
9418       // default the value doesn't have to be retrieved. For an aggregate
9419       // type, the default is 'tofrom'.
9420       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9421       const VarDecl *VD = CI.getCapturedVar();
9422       auto I = FirstPrivateDecls.find(VD);
9423       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9424       CombinedInfo.BasePointers.push_back(CV);
9425       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9426         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9427             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9428             AlignmentSource::Decl));
9429         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9430       } else {
9431         CombinedInfo.Pointers.push_back(CV);
9432       }
9433       if (I != FirstPrivateDecls.end())
9434         IsImplicit = I->getSecond();
9435     }
9436     // Every default map produces a single argument which is a target parameter.
9437     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9438 
9439     // Add flag stating this is an implicit map.
9440     if (IsImplicit)
9441       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9442 
9443     // No user-defined mapper for default mapping.
9444     CombinedInfo.Mappers.push_back(nullptr);
9445   }
9446 };
9447 } // anonymous namespace
9448 
9449 static void emitNonContiguousDescriptor(
9450     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9451     CGOpenMPRuntime::TargetDataInfo &Info) {
9452   CodeGenModule &CGM = CGF.CGM;
9453   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9454       &NonContigInfo = CombinedInfo.NonContigInfo;
9455 
9456   // Build an array of struct descriptor_dim and then assign it to
9457   // offload_args.
9458   //
9459   // struct descriptor_dim {
9460   //  uint64_t offset;
9461   //  uint64_t count;
9462   //  uint64_t stride
9463   // };
9464   ASTContext &C = CGF.getContext();
9465   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9466   RecordDecl *RD;
9467   RD = C.buildImplicitRecord("descriptor_dim");
9468   RD->startDefinition();
9469   addFieldToRecordDecl(C, RD, Int64Ty);
9470   addFieldToRecordDecl(C, RD, Int64Ty);
9471   addFieldToRecordDecl(C, RD, Int64Ty);
9472   RD->completeDefinition();
9473   QualType DimTy = C.getRecordType(RD);
9474 
9475   enum { OffsetFD = 0, CountFD, StrideFD };
9476   // We need two index variable here since the size of "Dims" is the same as the
9477   // size of Components, however, the size of offset, count, and stride is equal
9478   // to the size of base declaration that is non-contiguous.
9479   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9480     // Skip emitting ir if dimension size is 1 since it cannot be
9481     // non-contiguous.
9482     if (NonContigInfo.Dims[I] == 1)
9483       continue;
9484     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9485     QualType ArrayTy =
9486         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9487     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9488     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9489       unsigned RevIdx = EE - II - 1;
9490       LValue DimsLVal = CGF.MakeAddrLValue(
9491           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9492       // Offset
9493       LValue OffsetLVal = CGF.EmitLValueForField(
9494           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9495       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9496       // Count
9497       LValue CountLVal = CGF.EmitLValueForField(
9498           DimsLVal, *std::next(RD->field_begin(), CountFD));
9499       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9500       // Stride
9501       LValue StrideLVal = CGF.EmitLValueForField(
9502           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9503       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9504     }
9505     // args[I] = &dims
9506     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9507         DimsAddr, CGM.Int8PtrTy);
9508     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9509         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9510         Info.PointersArray, 0, I);
9511     Address PAddr(P, CGF.getPointerAlign());
9512     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9513     ++L;
9514   }
9515 }
9516 
9517 // Try to extract the base declaration from a `this->x` expression if possible.
9518 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9519   if (!E)
9520     return nullptr;
9521 
9522   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9523     if (const MemberExpr *ME =
9524             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9525       return ME->getMemberDecl();
9526   return nullptr;
9527 }
9528 
9529 /// Emit a string constant containing the names of the values mapped to the
9530 /// offloading runtime library.
9531 llvm::Constant *
9532 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9533                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9534 
9535   uint32_t SrcLocStrSize;
9536   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9537     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9538 
9539   SourceLocation Loc;
9540   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9541     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9542       Loc = VD->getLocation();
9543     else
9544       Loc = MapExprs.getMapExpr()->getExprLoc();
9545   } else {
9546     Loc = MapExprs.getMapDecl()->getLocation();
9547   }
9548 
9549   std::string ExprName;
9550   if (MapExprs.getMapExpr()) {
9551     PrintingPolicy P(CGF.getContext().getLangOpts());
9552     llvm::raw_string_ostream OS(ExprName);
9553     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9554     OS.flush();
9555   } else {
9556     ExprName = MapExprs.getMapDecl()->getNameAsString();
9557   }
9558 
9559   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9560   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9561                                          PLoc.getLine(), PLoc.getColumn(),
9562                                          SrcLocStrSize);
9563 }
9564 
9565 /// Emit the arrays used to pass the captures and map information to the
9566 /// offloading runtime library. If there is no map or capture information,
9567 /// return nullptr by reference.
9568 static void emitOffloadingArrays(
9569     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9570     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9571     bool IsNonContiguous = false) {
9572   CodeGenModule &CGM = CGF.CGM;
9573   ASTContext &Ctx = CGF.getContext();
9574 
9575   // Reset the array information.
9576   Info.clearArrayInfo();
9577   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9578 
9579   if (Info.NumberOfPtrs) {
9580     // Detect if we have any capture size requiring runtime evaluation of the
9581     // size so that a constant array could be eventually used.
9582     bool hasRuntimeEvaluationCaptureSize = false;
9583     for (llvm::Value *S : CombinedInfo.Sizes)
9584       if (!isa<llvm::Constant>(S)) {
9585         hasRuntimeEvaluationCaptureSize = true;
9586         break;
9587       }
9588 
9589     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9590     QualType PointerArrayType = Ctx.getConstantArrayType(
9591         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9592         /*IndexTypeQuals=*/0);
9593 
9594     Info.BasePointersArray =
9595         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9596     Info.PointersArray =
9597         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9598     Address MappersArray =
9599         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9600     Info.MappersArray = MappersArray.getPointer();
9601 
9602     // If we don't have any VLA types or other types that require runtime
9603     // evaluation, we can use a constant array for the map sizes, otherwise we
9604     // need to fill up the arrays as we do for the pointers.
9605     QualType Int64Ty =
9606         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9607     if (hasRuntimeEvaluationCaptureSize) {
9608       QualType SizeArrayType = Ctx.getConstantArrayType(
9609           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9610           /*IndexTypeQuals=*/0);
9611       Info.SizesArray =
9612           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9613     } else {
9614       // We expect all the sizes to be constant, so we collect them to create
9615       // a constant array.
9616       SmallVector<llvm::Constant *, 16> ConstSizes;
9617       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9618         if (IsNonContiguous &&
9619             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9620           ConstSizes.push_back(llvm::ConstantInt::get(
9621               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9622         } else {
9623           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9624         }
9625       }
9626 
9627       auto *SizesArrayInit = llvm::ConstantArray::get(
9628           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9629       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9630       auto *SizesArrayGbl = new llvm::GlobalVariable(
9631           CGM.getModule(), SizesArrayInit->getType(),
9632           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9633           SizesArrayInit, Name);
9634       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9635       Info.SizesArray = SizesArrayGbl;
9636     }
9637 
9638     // The map types are always constant so we don't need to generate code to
9639     // fill arrays. Instead, we create an array constant.
9640     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9641     llvm::copy(CombinedInfo.Types, Mapping.begin());
9642     std::string MaptypesName =
9643         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9644     auto *MapTypesArrayGbl =
9645         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9646     Info.MapTypesArray = MapTypesArrayGbl;
9647 
9648     // The information types are only built if there is debug information
9649     // requested.
9650     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9651       Info.MapNamesArray = llvm::Constant::getNullValue(
9652           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9653     } else {
9654       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9655         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9656       };
9657       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9658       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9659       std::string MapnamesName =
9660           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9661       auto *MapNamesArrayGbl =
9662           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9663       Info.MapNamesArray = MapNamesArrayGbl;
9664     }
9665 
9666     // If there's a present map type modifier, it must not be applied to the end
9667     // of a region, so generate a separate map type array in that case.
9668     if (Info.separateBeginEndCalls()) {
9669       bool EndMapTypesDiffer = false;
9670       for (uint64_t &Type : Mapping) {
9671         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9672           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9673           EndMapTypesDiffer = true;
9674         }
9675       }
9676       if (EndMapTypesDiffer) {
9677         MapTypesArrayGbl =
9678             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9679         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9680       }
9681     }
9682 
9683     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9684       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9685       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9686           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9687           Info.BasePointersArray, 0, I);
9688       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9689           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9690       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9691       CGF.Builder.CreateStore(BPVal, BPAddr);
9692 
9693       if (Info.requiresDevicePointerInfo())
9694         if (const ValueDecl *DevVD =
9695                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9696           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9697 
9698       llvm::Value *PVal = CombinedInfo.Pointers[I];
9699       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9700           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9701           Info.PointersArray, 0, I);
9702       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9703           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9704       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9705       CGF.Builder.CreateStore(PVal, PAddr);
9706 
9707       if (hasRuntimeEvaluationCaptureSize) {
9708         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9709             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9710             Info.SizesArray,
9711             /*Idx0=*/0,
9712             /*Idx1=*/I);
9713         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9714         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9715                                                           CGM.Int64Ty,
9716                                                           /*isSigned=*/true),
9717                                 SAddr);
9718       }
9719 
9720       // Fill up the mapper array.
9721       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9722       if (CombinedInfo.Mappers[I]) {
9723         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9724             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9725         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9726         Info.HasMapper = true;
9727       }
9728       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9729       CGF.Builder.CreateStore(MFunc, MAddr);
9730     }
9731   }
9732 
9733   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9734       Info.NumberOfPtrs == 0)
9735     return;
9736 
9737   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9738 }
9739 
9740 namespace {
9741 /// Additional arguments for emitOffloadingArraysArgument function.
9742 struct ArgumentsOptions {
9743   bool ForEndCall = false;
9744   ArgumentsOptions() = default;
9745   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9746 };
9747 } // namespace
9748 
9749 /// Emit the arguments to be passed to the runtime library based on the
9750 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9751 /// ForEndCall, emit map types to be passed for the end of the region instead of
9752 /// the beginning.
9753 static void emitOffloadingArraysArgument(
9754     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9755     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9756     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9757     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9758     const ArgumentsOptions &Options = ArgumentsOptions()) {
9759   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9760          "expected region end call to runtime only when end call is separate");
9761   CodeGenModule &CGM = CGF.CGM;
9762   if (Info.NumberOfPtrs) {
9763     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9764         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9765         Info.BasePointersArray,
9766         /*Idx0=*/0, /*Idx1=*/0);
9767     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9768         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9769         Info.PointersArray,
9770         /*Idx0=*/0,
9771         /*Idx1=*/0);
9772     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9773         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9774         /*Idx0=*/0, /*Idx1=*/0);
9775     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9776         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9777         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9778                                                     : Info.MapTypesArray,
9779         /*Idx0=*/0,
9780         /*Idx1=*/0);
9781 
9782     // Only emit the mapper information arrays if debug information is
9783     // requested.
9784     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9785       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9786     else
9787       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9788           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9789           Info.MapNamesArray,
9790           /*Idx0=*/0,
9791           /*Idx1=*/0);
9792     // If there is no user-defined mapper, set the mapper array to nullptr to
9793     // avoid an unnecessary data privatization
9794     if (!Info.HasMapper)
9795       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9796     else
9797       MappersArrayArg =
9798           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9799   } else {
9800     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9801     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9802     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9803     MapTypesArrayArg =
9804         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9805     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9806     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9807   }
9808 }
9809 
9810 /// Check for inner distribute directive.
9811 static const OMPExecutableDirective *
9812 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9813   const auto *CS = D.getInnermostCapturedStmt();
9814   const auto *Body =
9815       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9816   const Stmt *ChildStmt =
9817       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9818 
9819   if (const auto *NestedDir =
9820           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9821     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9822     switch (D.getDirectiveKind()) {
9823     case OMPD_target:
9824       if (isOpenMPDistributeDirective(DKind))
9825         return NestedDir;
9826       if (DKind == OMPD_teams) {
9827         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9828             /*IgnoreCaptured=*/true);
9829         if (!Body)
9830           return nullptr;
9831         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9832         if (const auto *NND =
9833                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9834           DKind = NND->getDirectiveKind();
9835           if (isOpenMPDistributeDirective(DKind))
9836             return NND;
9837         }
9838       }
9839       return nullptr;
9840     case OMPD_target_teams:
9841       if (isOpenMPDistributeDirective(DKind))
9842         return NestedDir;
9843       return nullptr;
9844     case OMPD_target_parallel:
9845     case OMPD_target_simd:
9846     case OMPD_target_parallel_for:
9847     case OMPD_target_parallel_for_simd:
9848       return nullptr;
9849     case OMPD_target_teams_distribute:
9850     case OMPD_target_teams_distribute_simd:
9851     case OMPD_target_teams_distribute_parallel_for:
9852     case OMPD_target_teams_distribute_parallel_for_simd:
9853     case OMPD_parallel:
9854     case OMPD_for:
9855     case OMPD_parallel_for:
9856     case OMPD_parallel_master:
9857     case OMPD_parallel_sections:
9858     case OMPD_for_simd:
9859     case OMPD_parallel_for_simd:
9860     case OMPD_cancel:
9861     case OMPD_cancellation_point:
9862     case OMPD_ordered:
9863     case OMPD_threadprivate:
9864     case OMPD_allocate:
9865     case OMPD_task:
9866     case OMPD_simd:
9867     case OMPD_tile:
9868     case OMPD_unroll:
9869     case OMPD_sections:
9870     case OMPD_section:
9871     case OMPD_single:
9872     case OMPD_master:
9873     case OMPD_critical:
9874     case OMPD_taskyield:
9875     case OMPD_barrier:
9876     case OMPD_taskwait:
9877     case OMPD_taskgroup:
9878     case OMPD_atomic:
9879     case OMPD_flush:
9880     case OMPD_depobj:
9881     case OMPD_scan:
9882     case OMPD_teams:
9883     case OMPD_target_data:
9884     case OMPD_target_exit_data:
9885     case OMPD_target_enter_data:
9886     case OMPD_distribute:
9887     case OMPD_distribute_simd:
9888     case OMPD_distribute_parallel_for:
9889     case OMPD_distribute_parallel_for_simd:
9890     case OMPD_teams_distribute:
9891     case OMPD_teams_distribute_simd:
9892     case OMPD_teams_distribute_parallel_for:
9893     case OMPD_teams_distribute_parallel_for_simd:
9894     case OMPD_target_update:
9895     case OMPD_declare_simd:
9896     case OMPD_declare_variant:
9897     case OMPD_begin_declare_variant:
9898     case OMPD_end_declare_variant:
9899     case OMPD_declare_target:
9900     case OMPD_end_declare_target:
9901     case OMPD_declare_reduction:
9902     case OMPD_declare_mapper:
9903     case OMPD_taskloop:
9904     case OMPD_taskloop_simd:
9905     case OMPD_master_taskloop:
9906     case OMPD_master_taskloop_simd:
9907     case OMPD_parallel_master_taskloop:
9908     case OMPD_parallel_master_taskloop_simd:
9909     case OMPD_requires:
9910     case OMPD_metadirective:
9911     case OMPD_unknown:
9912     default:
9913       llvm_unreachable("Unexpected directive.");
9914     }
9915   }
9916 
9917   return nullptr;
9918 }
9919 
9920 /// Emit the user-defined mapper function. The code generation follows the
9921 /// pattern in the example below.
9922 /// \code
9923 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9924 ///                                           void *base, void *begin,
9925 ///                                           int64_t size, int64_t type,
9926 ///                                           void *name = nullptr) {
9927 ///   // Allocate space for an array section first or add a base/begin for
9928 ///   // pointer dereference.
9929 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9930 ///       !maptype.IsDelete)
9931 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9932 ///                                 size*sizeof(Ty), clearToFromMember(type));
9933 ///   // Map members.
9934 ///   for (unsigned i = 0; i < size; i++) {
9935 ///     // For each component specified by this mapper:
9936 ///     for (auto c : begin[i]->all_components) {
9937 ///       if (c.hasMapper())
9938 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9939 ///                       c.arg_type, c.arg_name);
9940 ///       else
9941 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9942 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9943 ///                                     c.arg_name);
9944 ///     }
9945 ///   }
9946 ///   // Delete the array section.
9947 ///   if (size > 1 && maptype.IsDelete)
9948 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9949 ///                                 size*sizeof(Ty), clearToFromMember(type));
9950 /// }
9951 /// \endcode
9952 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9953                                             CodeGenFunction *CGF) {
9954   if (UDMMap.count(D) > 0)
9955     return;
9956   ASTContext &C = CGM.getContext();
9957   QualType Ty = D->getType();
9958   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9959   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9960   auto *MapperVarDecl =
9961       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9962   SourceLocation Loc = D->getLocation();
9963   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9964 
9965   // Prepare mapper function arguments and attributes.
9966   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9967                               C.VoidPtrTy, ImplicitParamDecl::Other);
9968   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9969                             ImplicitParamDecl::Other);
9970   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9971                              C.VoidPtrTy, ImplicitParamDecl::Other);
9972   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9973                             ImplicitParamDecl::Other);
9974   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9975                             ImplicitParamDecl::Other);
9976   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9977                             ImplicitParamDecl::Other);
9978   FunctionArgList Args;
9979   Args.push_back(&HandleArg);
9980   Args.push_back(&BaseArg);
9981   Args.push_back(&BeginArg);
9982   Args.push_back(&SizeArg);
9983   Args.push_back(&TypeArg);
9984   Args.push_back(&NameArg);
9985   const CGFunctionInfo &FnInfo =
9986       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9987   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9988   SmallString<64> TyStr;
9989   llvm::raw_svector_ostream Out(TyStr);
9990   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9991   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9992   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9993                                     Name, &CGM.getModule());
9994   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9995   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9996   // Start the mapper function code generation.
9997   CodeGenFunction MapperCGF(CGM);
9998   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9999   // Compute the starting and end addresses of array elements.
10000   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10001       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10002       C.getPointerType(Int64Ty), Loc);
10003   // Prepare common arguments for array initiation and deletion.
10004   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10005       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10006       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10007   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10008       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10009       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10010   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10011       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10012       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10013   // Convert the size in bytes into the number of array elements.
10014   Size = MapperCGF.Builder.CreateExactUDiv(
10015       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10016   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10017       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10018   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10019       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10020   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10021       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10022       C.getPointerType(Int64Ty), Loc);
10023   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10024       MapperCGF.GetAddrOfLocalVar(&NameArg),
10025       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10026 
10027   // Emit array initiation if this is an array section and \p MapType indicates
10028   // that memory allocation is required.
10029   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10030   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10031                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10032 
10033   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10034 
10035   // Emit the loop header block.
10036   MapperCGF.EmitBlock(HeadBB);
10037   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10038   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10039   // Evaluate whether the initial condition is satisfied.
10040   llvm::Value *IsEmpty =
10041       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10042   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10043   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10044 
10045   // Emit the loop body block.
10046   MapperCGF.EmitBlock(BodyBB);
10047   llvm::BasicBlock *LastBB = BodyBB;
10048   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10049       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10050   PtrPHI->addIncoming(PtrBegin, EntryBB);
10051   Address PtrCurrent =
10052       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10053                           .getAlignment()
10054                           .alignmentOfArrayElement(ElementSize));
10055   // Privatize the declared variable of mapper to be the current array element.
10056   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10057   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10058   (void)Scope.Privatize();
10059 
10060   // Get map clause information. Fill up the arrays with all mapped variables.
10061   MappableExprsHandler::MapCombinedInfoTy Info;
10062   MappableExprsHandler MEHandler(*D, MapperCGF);
10063   MEHandler.generateAllInfoForMapper(Info);
10064 
10065   // Call the runtime API __tgt_mapper_num_components to get the number of
10066   // pre-existing components.
10067   llvm::Value *OffloadingArgs[] = {Handle};
10068   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10069       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10070                                             OMPRTL___tgt_mapper_num_components),
10071       OffloadingArgs);
10072   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10073       PreviousSize,
10074       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10075 
10076   // Fill up the runtime mapper handle for all components.
10077   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10078     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10079         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10080     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10081         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10082     llvm::Value *CurSizeArg = Info.Sizes[I];
10083     llvm::Value *CurNameArg =
10084         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10085             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10086             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10087 
10088     // Extract the MEMBER_OF field from the map type.
10089     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10090     llvm::Value *MemberMapType =
10091         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10092 
10093     // Combine the map type inherited from user-defined mapper with that
10094     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10095     // bits of the \a MapType, which is the input argument of the mapper
10096     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10097     // bits of MemberMapType.
10098     // [OpenMP 5.0], 1.2.6. map-type decay.
10099     //        | alloc |  to   | from  | tofrom | release | delete
10100     // ----------------------------------------------------------
10101     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10102     // to     | alloc |  to   | alloc |   to   | release | delete
10103     // from   | alloc | alloc | from  |  from  | release | delete
10104     // tofrom | alloc |  to   | from  | tofrom | release | delete
10105     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10106         MapType,
10107         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10108                                    MappableExprsHandler::OMP_MAP_FROM));
10109     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10110     llvm::BasicBlock *AllocElseBB =
10111         MapperCGF.createBasicBlock("omp.type.alloc.else");
10112     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10113     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10114     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10115     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10116     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10117     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10118     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10119     MapperCGF.EmitBlock(AllocBB);
10120     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10121         MemberMapType,
10122         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10123                                      MappableExprsHandler::OMP_MAP_FROM)));
10124     MapperCGF.Builder.CreateBr(EndBB);
10125     MapperCGF.EmitBlock(AllocElseBB);
10126     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10127         LeftToFrom,
10128         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10129     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10130     // In case of to, clear OMP_MAP_FROM.
10131     MapperCGF.EmitBlock(ToBB);
10132     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10133         MemberMapType,
10134         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10135     MapperCGF.Builder.CreateBr(EndBB);
10136     MapperCGF.EmitBlock(ToElseBB);
10137     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10138         LeftToFrom,
10139         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10140     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10141     // In case of from, clear OMP_MAP_TO.
10142     MapperCGF.EmitBlock(FromBB);
10143     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10144         MemberMapType,
10145         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10146     // In case of tofrom, do nothing.
10147     MapperCGF.EmitBlock(EndBB);
10148     LastBB = EndBB;
10149     llvm::PHINode *CurMapType =
10150         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10151     CurMapType->addIncoming(AllocMapType, AllocBB);
10152     CurMapType->addIncoming(ToMapType, ToBB);
10153     CurMapType->addIncoming(FromMapType, FromBB);
10154     CurMapType->addIncoming(MemberMapType, ToElseBB);
10155 
10156     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10157                                      CurSizeArg, CurMapType, CurNameArg};
10158     if (Info.Mappers[I]) {
10159       // Call the corresponding mapper function.
10160       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10161           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10162       assert(MapperFunc && "Expect a valid mapper function is available.");
10163       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10164     } else {
10165       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10166       // data structure.
10167       MapperCGF.EmitRuntimeCall(
10168           OMPBuilder.getOrCreateRuntimeFunction(
10169               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10170           OffloadingArgs);
10171     }
10172   }
10173 
10174   // Update the pointer to point to the next element that needs to be mapped,
10175   // and check whether we have mapped all elements.
10176   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10177   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10178       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10179   PtrPHI->addIncoming(PtrNext, LastBB);
10180   llvm::Value *IsDone =
10181       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10182   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10183   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10184 
10185   MapperCGF.EmitBlock(ExitBB);
10186   // Emit array deletion if this is an array section and \p MapType indicates
10187   // that deletion is required.
10188   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10189                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10190 
10191   // Emit the function exit block.
10192   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10193   MapperCGF.FinishFunction();
10194   UDMMap.try_emplace(D, Fn);
10195   if (CGF) {
10196     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10197     Decls.second.push_back(D);
10198   }
10199 }
10200 
10201 /// Emit the array initialization or deletion portion for user-defined mapper
10202 /// code generation. First, it evaluates whether an array section is mapped and
10203 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10204 /// true, and \a MapType indicates to not delete this array, array
10205 /// initialization code is generated. If \a IsInit is false, and \a MapType
10206 /// indicates to not this array, array deletion code is generated.
10207 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10208     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10209     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10210     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10211     bool IsInit) {
10212   StringRef Prefix = IsInit ? ".init" : ".del";
10213 
10214   // Evaluate if this is an array section.
10215   llvm::BasicBlock *BodyBB =
10216       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10217   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10218       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10219   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10220       MapType,
10221       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10222   llvm::Value *DeleteCond;
10223   llvm::Value *Cond;
10224   if (IsInit) {
10225     // base != begin?
10226     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10227     // IsPtrAndObj?
10228     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10229         MapType,
10230         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10231     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10232     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10233     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10234     DeleteCond = MapperCGF.Builder.CreateIsNull(
10235         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10236   } else {
10237     Cond = IsArray;
10238     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10239         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10240   }
10241   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10242   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10243 
10244   MapperCGF.EmitBlock(BodyBB);
10245   // Get the array size by multiplying element size and element number (i.e., \p
10246   // Size).
10247   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10248       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10249   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10250   // memory allocation/deletion purpose only.
10251   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10252       MapType,
10253       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10254                                    MappableExprsHandler::OMP_MAP_FROM)));
10255   MapTypeArg = MapperCGF.Builder.CreateOr(
10256       MapTypeArg,
10257       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10258 
10259   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10260   // data structure.
10261   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10262                                    ArraySize, MapTypeArg, MapName};
10263   MapperCGF.EmitRuntimeCall(
10264       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10265                                             OMPRTL___tgt_push_mapper_component),
10266       OffloadingArgs);
10267 }
10268 
10269 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10270     const OMPDeclareMapperDecl *D) {
10271   auto I = UDMMap.find(D);
10272   if (I != UDMMap.end())
10273     return I->second;
10274   emitUserDefinedMapper(D);
10275   return UDMMap.lookup(D);
10276 }
10277 
10278 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10279     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10280     llvm::Value *DeviceID,
10281     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10282                                      const OMPLoopDirective &D)>
10283         SizeEmitter) {
10284   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10285   const OMPExecutableDirective *TD = &D;
10286   // Get nested teams distribute kind directive, if any.
10287   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10288     TD = getNestedDistributeDirective(CGM.getContext(), D);
10289   if (!TD)
10290     return;
10291   const auto *LD = cast<OMPLoopDirective>(TD);
10292   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10293                                                          PrePostActionTy &) {
10294     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10295       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10296       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10297       CGF.EmitRuntimeCall(
10298           OMPBuilder.getOrCreateRuntimeFunction(
10299               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10300           Args);
10301     }
10302   };
10303   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10304 }
10305 
10306 void CGOpenMPRuntime::emitTargetCall(
10307     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10308     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10309     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10310     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10311                                      const OMPLoopDirective &D)>
10312         SizeEmitter) {
10313   if (!CGF.HaveInsertPoint())
10314     return;
10315 
10316   assert(OutlinedFn && "Invalid outlined function!");
10317 
10318   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10319                                  D.hasClausesOfKind<OMPNowaitClause>();
10320   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10321   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10322   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10323                                             PrePostActionTy &) {
10324     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10325   };
10326   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10327 
10328   CodeGenFunction::OMPTargetDataInfo InputInfo;
10329   llvm::Value *MapTypesArray = nullptr;
10330   llvm::Value *MapNamesArray = nullptr;
10331   // Fill up the pointer arrays and transfer execution to the device.
10332   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10333                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10334                     &CapturedVars,
10335                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10336     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10337       // Reverse offloading is not supported, so just execute on the host.
10338       if (RequiresOuterTask) {
10339         CapturedVars.clear();
10340         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10341       }
10342       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10343       return;
10344     }
10345 
10346     // On top of the arrays that were filled up, the target offloading call
10347     // takes as arguments the device id as well as the host pointer. The host
10348     // pointer is used by the runtime library to identify the current target
10349     // region, so it only has to be unique and not necessarily point to
10350     // anything. It could be the pointer to the outlined function that
10351     // implements the target region, but we aren't using that so that the
10352     // compiler doesn't need to keep that, and could therefore inline the host
10353     // function if proven worthwhile during optimization.
10354 
10355     // From this point on, we need to have an ID of the target region defined.
10356     assert(OutlinedFnID && "Invalid outlined function ID!");
10357 
10358     // Emit device ID if any.
10359     llvm::Value *DeviceID;
10360     if (Device.getPointer()) {
10361       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10362               Device.getInt() == OMPC_DEVICE_device_num) &&
10363              "Expected device_num modifier.");
10364       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10365       DeviceID =
10366           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10367     } else {
10368       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10369     }
10370 
10371     // Emit the number of elements in the offloading arrays.
10372     llvm::Value *PointerNum =
10373         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10374 
10375     // Return value of the runtime offloading call.
10376     llvm::Value *Return;
10377 
10378     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10379     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10380 
10381     // Source location for the ident struct
10382     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10383 
10384     // Emit tripcount for the target loop-based directive.
10385     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10386 
10387     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10388     // The target region is an outlined function launched by the runtime
10389     // via calls __tgt_target() or __tgt_target_teams().
10390     //
10391     // __tgt_target() launches a target region with one team and one thread,
10392     // executing a serial region.  This master thread may in turn launch
10393     // more threads within its team upon encountering a parallel region,
10394     // however, no additional teams can be launched on the device.
10395     //
10396     // __tgt_target_teams() launches a target region with one or more teams,
10397     // each with one or more threads.  This call is required for target
10398     // constructs such as:
10399     //  'target teams'
10400     //  'target' / 'teams'
10401     //  'target teams distribute parallel for'
10402     //  'target parallel'
10403     // and so on.
10404     //
10405     // Note that on the host and CPU targets, the runtime implementation of
10406     // these calls simply call the outlined function without forking threads.
10407     // The outlined functions themselves have runtime calls to
10408     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10409     // the compiler in emitTeamsCall() and emitParallelCall().
10410     //
10411     // In contrast, on the NVPTX target, the implementation of
10412     // __tgt_target_teams() launches a GPU kernel with the requested number
10413     // of teams and threads so no additional calls to the runtime are required.
10414     if (NumTeams) {
10415       // If we have NumTeams defined this means that we have an enclosed teams
10416       // region. Therefore we also expect to have NumThreads defined. These two
10417       // values should be defined in the presence of a teams directive,
10418       // regardless of having any clauses associated. If the user is using teams
10419       // but no clauses, these two values will be the default that should be
10420       // passed to the runtime library - a 32-bit integer with the value zero.
10421       assert(NumThreads && "Thread limit expression should be available along "
10422                            "with number of teams.");
10423       SmallVector<llvm::Value *> OffloadingArgs = {
10424           RTLoc,
10425           DeviceID,
10426           OutlinedFnID,
10427           PointerNum,
10428           InputInfo.BasePointersArray.getPointer(),
10429           InputInfo.PointersArray.getPointer(),
10430           InputInfo.SizesArray.getPointer(),
10431           MapTypesArray,
10432           MapNamesArray,
10433           InputInfo.MappersArray.getPointer(),
10434           NumTeams,
10435           NumThreads};
10436       if (HasNowait) {
10437         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10438         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10439         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10440         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10441         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10442         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10443       }
10444       Return = CGF.EmitRuntimeCall(
10445           OMPBuilder.getOrCreateRuntimeFunction(
10446               CGM.getModule(), HasNowait
10447                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10448                                    : OMPRTL___tgt_target_teams_mapper),
10449           OffloadingArgs);
10450     } else {
10451       SmallVector<llvm::Value *> OffloadingArgs = {
10452           RTLoc,
10453           DeviceID,
10454           OutlinedFnID,
10455           PointerNum,
10456           InputInfo.BasePointersArray.getPointer(),
10457           InputInfo.PointersArray.getPointer(),
10458           InputInfo.SizesArray.getPointer(),
10459           MapTypesArray,
10460           MapNamesArray,
10461           InputInfo.MappersArray.getPointer()};
10462       if (HasNowait) {
10463         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10464         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10465         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10466         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10467         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10468         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10469       }
10470       Return = CGF.EmitRuntimeCall(
10471           OMPBuilder.getOrCreateRuntimeFunction(
10472               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10473                                          : OMPRTL___tgt_target_mapper),
10474           OffloadingArgs);
10475     }
10476 
10477     // Check the error code and execute the host version if required.
10478     llvm::BasicBlock *OffloadFailedBlock =
10479         CGF.createBasicBlock("omp_offload.failed");
10480     llvm::BasicBlock *OffloadContBlock =
10481         CGF.createBasicBlock("omp_offload.cont");
10482     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10483     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10484 
10485     CGF.EmitBlock(OffloadFailedBlock);
10486     if (RequiresOuterTask) {
10487       CapturedVars.clear();
10488       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10489     }
10490     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10491     CGF.EmitBranch(OffloadContBlock);
10492 
10493     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10494   };
10495 
10496   // Notify that the host version must be executed.
10497   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10498                     RequiresOuterTask](CodeGenFunction &CGF,
10499                                        PrePostActionTy &) {
10500     if (RequiresOuterTask) {
10501       CapturedVars.clear();
10502       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10503     }
10504     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10505   };
10506 
10507   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10508                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10509                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10510     // Fill up the arrays with all the captured variables.
10511     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10512 
10513     // Get mappable expression information.
10514     MappableExprsHandler MEHandler(D, CGF);
10515     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10516     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10517 
10518     auto RI = CS.getCapturedRecordDecl()->field_begin();
10519     auto *CV = CapturedVars.begin();
10520     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10521                                               CE = CS.capture_end();
10522          CI != CE; ++CI, ++RI, ++CV) {
10523       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10524       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10525 
10526       // VLA sizes are passed to the outlined region by copy and do not have map
10527       // information associated.
10528       if (CI->capturesVariableArrayType()) {
10529         CurInfo.Exprs.push_back(nullptr);
10530         CurInfo.BasePointers.push_back(*CV);
10531         CurInfo.Pointers.push_back(*CV);
10532         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10533             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10534         // Copy to the device as an argument. No need to retrieve it.
10535         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10536                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10537                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10538         CurInfo.Mappers.push_back(nullptr);
10539       } else {
10540         // If we have any information in the map clause, we use it, otherwise we
10541         // just do a default mapping.
10542         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10543         if (!CI->capturesThis())
10544           MappedVarSet.insert(CI->getCapturedVar());
10545         else
10546           MappedVarSet.insert(nullptr);
10547         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10548           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10549         // Generate correct mapping for variables captured by reference in
10550         // lambdas.
10551         if (CI->capturesVariable())
10552           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10553                                                   CurInfo, LambdaPointers);
10554       }
10555       // We expect to have at least an element of information for this capture.
10556       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10557              "Non-existing map pointer for capture!");
10558       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10559              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10560              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10561              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10562              "Inconsistent map information sizes!");
10563 
10564       // If there is an entry in PartialStruct it means we have a struct with
10565       // individual members mapped. Emit an extra combined entry.
10566       if (PartialStruct.Base.isValid()) {
10567         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10568         MEHandler.emitCombinedEntry(
10569             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10570             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10571       }
10572 
10573       // We need to append the results of this capture to what we already have.
10574       CombinedInfo.append(CurInfo);
10575     }
10576     // Adjust MEMBER_OF flags for the lambdas captures.
10577     MEHandler.adjustMemberOfForLambdaCaptures(
10578         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10579         CombinedInfo.Types);
10580     // Map any list items in a map clause that were not captures because they
10581     // weren't referenced within the construct.
10582     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10583 
10584     TargetDataInfo Info;
10585     // Fill up the arrays and create the arguments.
10586     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10587     emitOffloadingArraysArgument(
10588         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10589         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10590         {/*ForEndCall=*/false});
10591 
10592     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10593     InputInfo.BasePointersArray =
10594         Address(Info.BasePointersArray, CGM.getPointerAlign());
10595     InputInfo.PointersArray =
10596         Address(Info.PointersArray, CGM.getPointerAlign());
10597     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10598     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10599     MapTypesArray = Info.MapTypesArray;
10600     MapNamesArray = Info.MapNamesArray;
10601     if (RequiresOuterTask)
10602       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10603     else
10604       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10605   };
10606 
10607   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10608                              CodeGenFunction &CGF, PrePostActionTy &) {
10609     if (RequiresOuterTask) {
10610       CodeGenFunction::OMPTargetDataInfo InputInfo;
10611       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10612     } else {
10613       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10614     }
10615   };
10616 
10617   // If we have a target function ID it means that we need to support
10618   // offloading, otherwise, just execute on the host. We need to execute on host
10619   // regardless of the conditional in the if clause if, e.g., the user do not
10620   // specify target triples.
10621   if (OutlinedFnID) {
10622     if (IfCond) {
10623       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10624     } else {
10625       RegionCodeGenTy ThenRCG(TargetThenGen);
10626       ThenRCG(CGF);
10627     }
10628   } else {
10629     RegionCodeGenTy ElseRCG(TargetElseGen);
10630     ElseRCG(CGF);
10631   }
10632 }
10633 
10634 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10635                                                     StringRef ParentName) {
10636   if (!S)
10637     return;
10638 
10639   // Codegen OMP target directives that offload compute to the device.
10640   bool RequiresDeviceCodegen =
10641       isa<OMPExecutableDirective>(S) &&
10642       isOpenMPTargetExecutionDirective(
10643           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10644 
10645   if (RequiresDeviceCodegen) {
10646     const auto &E = *cast<OMPExecutableDirective>(S);
10647     unsigned DeviceID;
10648     unsigned FileID;
10649     unsigned Line;
10650     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10651                              FileID, Line);
10652 
10653     // Is this a target region that should not be emitted as an entry point? If
10654     // so just signal we are done with this target region.
10655     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10656                                                             ParentName, Line))
10657       return;
10658 
10659     switch (E.getDirectiveKind()) {
10660     case OMPD_target:
10661       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10662                                                    cast<OMPTargetDirective>(E));
10663       break;
10664     case OMPD_target_parallel:
10665       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10666           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10667       break;
10668     case OMPD_target_teams:
10669       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10670           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10671       break;
10672     case OMPD_target_teams_distribute:
10673       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10674           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10675       break;
10676     case OMPD_target_teams_distribute_simd:
10677       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10678           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10679       break;
10680     case OMPD_target_parallel_for:
10681       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10682           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10683       break;
10684     case OMPD_target_parallel_for_simd:
10685       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10686           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10687       break;
10688     case OMPD_target_simd:
10689       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10690           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10691       break;
10692     case OMPD_target_teams_distribute_parallel_for:
10693       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10694           CGM, ParentName,
10695           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10696       break;
10697     case OMPD_target_teams_distribute_parallel_for_simd:
10698       CodeGenFunction::
10699           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10700               CGM, ParentName,
10701               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10702       break;
10703     case OMPD_parallel:
10704     case OMPD_for:
10705     case OMPD_parallel_for:
10706     case OMPD_parallel_master:
10707     case OMPD_parallel_sections:
10708     case OMPD_for_simd:
10709     case OMPD_parallel_for_simd:
10710     case OMPD_cancel:
10711     case OMPD_cancellation_point:
10712     case OMPD_ordered:
10713     case OMPD_threadprivate:
10714     case OMPD_allocate:
10715     case OMPD_task:
10716     case OMPD_simd:
10717     case OMPD_tile:
10718     case OMPD_unroll:
10719     case OMPD_sections:
10720     case OMPD_section:
10721     case OMPD_single:
10722     case OMPD_master:
10723     case OMPD_critical:
10724     case OMPD_taskyield:
10725     case OMPD_barrier:
10726     case OMPD_taskwait:
10727     case OMPD_taskgroup:
10728     case OMPD_atomic:
10729     case OMPD_flush:
10730     case OMPD_depobj:
10731     case OMPD_scan:
10732     case OMPD_teams:
10733     case OMPD_target_data:
10734     case OMPD_target_exit_data:
10735     case OMPD_target_enter_data:
10736     case OMPD_distribute:
10737     case OMPD_distribute_simd:
10738     case OMPD_distribute_parallel_for:
10739     case OMPD_distribute_parallel_for_simd:
10740     case OMPD_teams_distribute:
10741     case OMPD_teams_distribute_simd:
10742     case OMPD_teams_distribute_parallel_for:
10743     case OMPD_teams_distribute_parallel_for_simd:
10744     case OMPD_target_update:
10745     case OMPD_declare_simd:
10746     case OMPD_declare_variant:
10747     case OMPD_begin_declare_variant:
10748     case OMPD_end_declare_variant:
10749     case OMPD_declare_target:
10750     case OMPD_end_declare_target:
10751     case OMPD_declare_reduction:
10752     case OMPD_declare_mapper:
10753     case OMPD_taskloop:
10754     case OMPD_taskloop_simd:
10755     case OMPD_master_taskloop:
10756     case OMPD_master_taskloop_simd:
10757     case OMPD_parallel_master_taskloop:
10758     case OMPD_parallel_master_taskloop_simd:
10759     case OMPD_requires:
10760     case OMPD_metadirective:
10761     case OMPD_unknown:
10762     default:
10763       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10764     }
10765     return;
10766   }
10767 
10768   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10769     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10770       return;
10771 
10772     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10773     return;
10774   }
10775 
10776   // If this is a lambda function, look into its body.
10777   if (const auto *L = dyn_cast<LambdaExpr>(S))
10778     S = L->getBody();
10779 
10780   // Keep looking for target regions recursively.
10781   for (const Stmt *II : S->children())
10782     scanForTargetRegionsFunctions(II, ParentName);
10783 }
10784 
10785 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10786   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10787       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10788   if (!DevTy)
10789     return false;
10790   // Do not emit device_type(nohost) functions for the host.
10791   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10792     return true;
10793   // Do not emit device_type(host) functions for the device.
10794   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10795     return true;
10796   return false;
10797 }
10798 
10799 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10800   // If emitting code for the host, we do not process FD here. Instead we do
10801   // the normal code generation.
10802   if (!CGM.getLangOpts().OpenMPIsDevice) {
10803     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10804       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10805                                   CGM.getLangOpts().OpenMPIsDevice))
10806         return true;
10807     return false;
10808   }
10809 
10810   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10811   // Try to detect target regions in the function.
10812   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10813     StringRef Name = CGM.getMangledName(GD);
10814     scanForTargetRegionsFunctions(FD->getBody(), Name);
10815     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10816                                 CGM.getLangOpts().OpenMPIsDevice))
10817       return true;
10818   }
10819 
10820   // Do not to emit function if it is not marked as declare target.
10821   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10822          AlreadyEmittedTargetDecls.count(VD) == 0;
10823 }
10824 
10825 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10826   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10827                               CGM.getLangOpts().OpenMPIsDevice))
10828     return true;
10829 
10830   if (!CGM.getLangOpts().OpenMPIsDevice)
10831     return false;
10832 
10833   // Check if there are Ctors/Dtors in this declaration and look for target
10834   // regions in it. We use the complete variant to produce the kernel name
10835   // mangling.
10836   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10837   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10838     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10839       StringRef ParentName =
10840           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10841       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10842     }
10843     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10844       StringRef ParentName =
10845           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10846       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10847     }
10848   }
10849 
10850   // Do not to emit variable if it is not marked as declare target.
10851   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10852       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10853           cast<VarDecl>(GD.getDecl()));
10854   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10855       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10856        HasRequiresUnifiedSharedMemory)) {
10857     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10858     return true;
10859   }
10860   return false;
10861 }
10862 
10863 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10864                                                    llvm::Constant *Addr) {
10865   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10866       !CGM.getLangOpts().OpenMPIsDevice)
10867     return;
10868 
10869   // If we have host/nohost variables, they do not need to be registered.
10870   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10871       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10872   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10873     return;
10874 
10875   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10876       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10877   if (!Res) {
10878     if (CGM.getLangOpts().OpenMPIsDevice) {
10879       // Register non-target variables being emitted in device code (debug info
10880       // may cause this).
10881       StringRef VarName = CGM.getMangledName(VD);
10882       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10883     }
10884     return;
10885   }
10886   // Register declare target variables.
10887   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10888   StringRef VarName;
10889   CharUnits VarSize;
10890   llvm::GlobalValue::LinkageTypes Linkage;
10891 
10892   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10893       !HasRequiresUnifiedSharedMemory) {
10894     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10895     VarName = CGM.getMangledName(VD);
10896     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10897       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10898       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10899     } else {
10900       VarSize = CharUnits::Zero();
10901     }
10902     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10903     // Temp solution to prevent optimizations of the internal variables.
10904     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10905       // Do not create a "ref-variable" if the original is not also available
10906       // on the host.
10907       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10908         return;
10909       std::string RefName = getName({VarName, "ref"});
10910       if (!CGM.GetGlobalValue(RefName)) {
10911         llvm::Constant *AddrRef =
10912             getOrCreateInternalVariable(Addr->getType(), RefName);
10913         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10914         GVAddrRef->setConstant(/*Val=*/true);
10915         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10916         GVAddrRef->setInitializer(Addr);
10917         CGM.addCompilerUsedGlobal(GVAddrRef);
10918       }
10919     }
10920   } else {
10921     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10922             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10923              HasRequiresUnifiedSharedMemory)) &&
10924            "Declare target attribute must link or to with unified memory.");
10925     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10926       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10927     else
10928       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10929 
10930     if (CGM.getLangOpts().OpenMPIsDevice) {
10931       VarName = Addr->getName();
10932       Addr = nullptr;
10933     } else {
10934       VarName = getAddrOfDeclareTargetVar(VD).getName();
10935       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10936     }
10937     VarSize = CGM.getPointerSize();
10938     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10939   }
10940 
10941   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10942       VarName, Addr, VarSize, Flags, Linkage);
10943 }
10944 
10945 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10946   if (isa<FunctionDecl>(GD.getDecl()) ||
10947       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10948     return emitTargetFunctions(GD);
10949 
10950   return emitTargetGlobalVariable(GD);
10951 }
10952 
10953 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10954   for (const VarDecl *VD : DeferredGlobalVariables) {
10955     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10956         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10957     if (!Res)
10958       continue;
10959     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10960         !HasRequiresUnifiedSharedMemory) {
10961       CGM.EmitGlobal(VD);
10962     } else {
10963       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10964               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10965                HasRequiresUnifiedSharedMemory)) &&
10966              "Expected link clause or to clause with unified memory.");
10967       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10968     }
10969   }
10970 }
10971 
10972 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10973     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10974   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10975          " Expected target-based directive.");
10976 }
10977 
10978 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10979   for (const OMPClause *Clause : D->clauselists()) {
10980     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10981       HasRequiresUnifiedSharedMemory = true;
10982     } else if (const auto *AC =
10983                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10984       switch (AC->getAtomicDefaultMemOrderKind()) {
10985       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10986         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10987         break;
10988       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10989         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10990         break;
10991       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10992         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10993         break;
10994       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10995         break;
10996       }
10997     }
10998   }
10999 }
11000 
11001 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11002   return RequiresAtomicOrdering;
11003 }
11004 
11005 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11006                                                        LangAS &AS) {
11007   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11008     return false;
11009   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11010   switch(A->getAllocatorType()) {
11011   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11012   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11013   // Not supported, fallback to the default mem space.
11014   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11015   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11016   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11017   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11018   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11019   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11020   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11021     AS = LangAS::Default;
11022     return true;
11023   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11024     llvm_unreachable("Expected predefined allocator for the variables with the "
11025                      "static storage.");
11026   }
11027   return false;
11028 }
11029 
11030 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11031   return HasRequiresUnifiedSharedMemory;
11032 }
11033 
11034 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11035     CodeGenModule &CGM)
11036     : CGM(CGM) {
11037   if (CGM.getLangOpts().OpenMPIsDevice) {
11038     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11039     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11040   }
11041 }
11042 
11043 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11044   if (CGM.getLangOpts().OpenMPIsDevice)
11045     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11046 }
11047 
11048 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11049   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11050     return true;
11051 
11052   const auto *D = cast<FunctionDecl>(GD.getDecl());
11053   // Do not to emit function if it is marked as declare target as it was already
11054   // emitted.
11055   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11056     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11057       if (auto *F = dyn_cast_or_null<llvm::Function>(
11058               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11059         return !F->isDeclaration();
11060       return false;
11061     }
11062     return true;
11063   }
11064 
11065   return !AlreadyEmittedTargetDecls.insert(D).second;
11066 }
11067 
11068 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11069   // If we don't have entries or if we are emitting code for the device, we
11070   // don't need to do anything.
11071   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11072       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11073       (OffloadEntriesInfoManager.empty() &&
11074        !HasEmittedDeclareTargetRegion &&
11075        !HasEmittedTargetRegion))
11076     return nullptr;
11077 
11078   // Create and register the function that handles the requires directives.
11079   ASTContext &C = CGM.getContext();
11080 
11081   llvm::Function *RequiresRegFn;
11082   {
11083     CodeGenFunction CGF(CGM);
11084     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11085     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11086     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11087     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11088     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11089     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11090     // TODO: check for other requires clauses.
11091     // The requires directive takes effect only when a target region is
11092     // present in the compilation unit. Otherwise it is ignored and not
11093     // passed to the runtime. This avoids the runtime from throwing an error
11094     // for mismatching requires clauses across compilation units that don't
11095     // contain at least 1 target region.
11096     assert((HasEmittedTargetRegion ||
11097             HasEmittedDeclareTargetRegion ||
11098             !OffloadEntriesInfoManager.empty()) &&
11099            "Target or declare target region expected.");
11100     if (HasRequiresUnifiedSharedMemory)
11101       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11102     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11103                             CGM.getModule(), OMPRTL___tgt_register_requires),
11104                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11105     CGF.FinishFunction();
11106   }
11107   return RequiresRegFn;
11108 }
11109 
11110 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11111                                     const OMPExecutableDirective &D,
11112                                     SourceLocation Loc,
11113                                     llvm::Function *OutlinedFn,
11114                                     ArrayRef<llvm::Value *> CapturedVars) {
11115   if (!CGF.HaveInsertPoint())
11116     return;
11117 
11118   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11119   CodeGenFunction::RunCleanupsScope Scope(CGF);
11120 
11121   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11122   llvm::Value *Args[] = {
11123       RTLoc,
11124       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11125       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11126   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11127   RealArgs.append(std::begin(Args), std::end(Args));
11128   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11129 
11130   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11131       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11132   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11133 }
11134 
11135 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11136                                          const Expr *NumTeams,
11137                                          const Expr *ThreadLimit,
11138                                          SourceLocation Loc) {
11139   if (!CGF.HaveInsertPoint())
11140     return;
11141 
11142   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11143 
11144   llvm::Value *NumTeamsVal =
11145       NumTeams
11146           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11147                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11148           : CGF.Builder.getInt32(0);
11149 
11150   llvm::Value *ThreadLimitVal =
11151       ThreadLimit
11152           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11153                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11154           : CGF.Builder.getInt32(0);
11155 
11156   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11157   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11158                                      ThreadLimitVal};
11159   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11160                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11161                       PushNumTeamsArgs);
11162 }
11163 
11164 void CGOpenMPRuntime::emitTargetDataCalls(
11165     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11166     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11167   if (!CGF.HaveInsertPoint())
11168     return;
11169 
11170   // Action used to replace the default codegen action and turn privatization
11171   // off.
11172   PrePostActionTy NoPrivAction;
11173 
11174   // Generate the code for the opening of the data environment. Capture all the
11175   // arguments of the runtime call by reference because they are used in the
11176   // closing of the region.
11177   auto &&BeginThenGen = [this, &D, Device, &Info,
11178                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11179     // Fill up the arrays with all the mapped variables.
11180     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11181 
11182     // Get map clause information.
11183     MappableExprsHandler MEHandler(D, CGF);
11184     MEHandler.generateAllInfo(CombinedInfo);
11185 
11186     // Fill up the arrays and create the arguments.
11187     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11188                          /*IsNonContiguous=*/true);
11189 
11190     llvm::Value *BasePointersArrayArg = nullptr;
11191     llvm::Value *PointersArrayArg = nullptr;
11192     llvm::Value *SizesArrayArg = nullptr;
11193     llvm::Value *MapTypesArrayArg = nullptr;
11194     llvm::Value *MapNamesArrayArg = nullptr;
11195     llvm::Value *MappersArrayArg = nullptr;
11196     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11197                                  SizesArrayArg, MapTypesArrayArg,
11198                                  MapNamesArrayArg, MappersArrayArg, Info);
11199 
11200     // Emit device ID if any.
11201     llvm::Value *DeviceID = nullptr;
11202     if (Device) {
11203       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11204                                            CGF.Int64Ty, /*isSigned=*/true);
11205     } else {
11206       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11207     }
11208 
11209     // Emit the number of elements in the offloading arrays.
11210     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11211     //
11212     // Source location for the ident struct
11213     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11214 
11215     llvm::Value *OffloadingArgs[] = {RTLoc,
11216                                      DeviceID,
11217                                      PointerNum,
11218                                      BasePointersArrayArg,
11219                                      PointersArrayArg,
11220                                      SizesArrayArg,
11221                                      MapTypesArrayArg,
11222                                      MapNamesArrayArg,
11223                                      MappersArrayArg};
11224     CGF.EmitRuntimeCall(
11225         OMPBuilder.getOrCreateRuntimeFunction(
11226             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11227         OffloadingArgs);
11228 
11229     // If device pointer privatization is required, emit the body of the region
11230     // here. It will have to be duplicated: with and without privatization.
11231     if (!Info.CaptureDeviceAddrMap.empty())
11232       CodeGen(CGF);
11233   };
11234 
11235   // Generate code for the closing of the data region.
11236   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11237                                                 PrePostActionTy &) {
11238     assert(Info.isValid() && "Invalid data environment closing arguments.");
11239 
11240     llvm::Value *BasePointersArrayArg = nullptr;
11241     llvm::Value *PointersArrayArg = nullptr;
11242     llvm::Value *SizesArrayArg = nullptr;
11243     llvm::Value *MapTypesArrayArg = nullptr;
11244     llvm::Value *MapNamesArrayArg = nullptr;
11245     llvm::Value *MappersArrayArg = nullptr;
11246     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11247                                  SizesArrayArg, MapTypesArrayArg,
11248                                  MapNamesArrayArg, MappersArrayArg, Info,
11249                                  {/*ForEndCall=*/true});
11250 
11251     // Emit device ID if any.
11252     llvm::Value *DeviceID = nullptr;
11253     if (Device) {
11254       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11255                                            CGF.Int64Ty, /*isSigned=*/true);
11256     } else {
11257       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11258     }
11259 
11260     // Emit the number of elements in the offloading arrays.
11261     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11262 
11263     // Source location for the ident struct
11264     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11265 
11266     llvm::Value *OffloadingArgs[] = {RTLoc,
11267                                      DeviceID,
11268                                      PointerNum,
11269                                      BasePointersArrayArg,
11270                                      PointersArrayArg,
11271                                      SizesArrayArg,
11272                                      MapTypesArrayArg,
11273                                      MapNamesArrayArg,
11274                                      MappersArrayArg};
11275     CGF.EmitRuntimeCall(
11276         OMPBuilder.getOrCreateRuntimeFunction(
11277             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11278         OffloadingArgs);
11279   };
11280 
11281   // If we need device pointer privatization, we need to emit the body of the
11282   // region with no privatization in the 'else' branch of the conditional.
11283   // Otherwise, we don't have to do anything.
11284   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11285                                                          PrePostActionTy &) {
11286     if (!Info.CaptureDeviceAddrMap.empty()) {
11287       CodeGen.setAction(NoPrivAction);
11288       CodeGen(CGF);
11289     }
11290   };
11291 
11292   // We don't have to do anything to close the region if the if clause evaluates
11293   // to false.
11294   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11295 
11296   if (IfCond) {
11297     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11298   } else {
11299     RegionCodeGenTy RCG(BeginThenGen);
11300     RCG(CGF);
11301   }
11302 
11303   // If we don't require privatization of device pointers, we emit the body in
11304   // between the runtime calls. This avoids duplicating the body code.
11305   if (Info.CaptureDeviceAddrMap.empty()) {
11306     CodeGen.setAction(NoPrivAction);
11307     CodeGen(CGF);
11308   }
11309 
11310   if (IfCond) {
11311     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11312   } else {
11313     RegionCodeGenTy RCG(EndThenGen);
11314     RCG(CGF);
11315   }
11316 }
11317 
11318 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11319     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11320     const Expr *Device) {
11321   if (!CGF.HaveInsertPoint())
11322     return;
11323 
11324   assert((isa<OMPTargetEnterDataDirective>(D) ||
11325           isa<OMPTargetExitDataDirective>(D) ||
11326           isa<OMPTargetUpdateDirective>(D)) &&
11327          "Expecting either target enter, exit data, or update directives.");
11328 
11329   CodeGenFunction::OMPTargetDataInfo InputInfo;
11330   llvm::Value *MapTypesArray = nullptr;
11331   llvm::Value *MapNamesArray = nullptr;
11332   // Generate the code for the opening of the data environment.
11333   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11334                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11335     // Emit device ID if any.
11336     llvm::Value *DeviceID = nullptr;
11337     if (Device) {
11338       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11339                                            CGF.Int64Ty, /*isSigned=*/true);
11340     } else {
11341       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11342     }
11343 
11344     // Emit the number of elements in the offloading arrays.
11345     llvm::Constant *PointerNum =
11346         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11347 
11348     // Source location for the ident struct
11349     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11350 
11351     llvm::Value *OffloadingArgs[] = {RTLoc,
11352                                      DeviceID,
11353                                      PointerNum,
11354                                      InputInfo.BasePointersArray.getPointer(),
11355                                      InputInfo.PointersArray.getPointer(),
11356                                      InputInfo.SizesArray.getPointer(),
11357                                      MapTypesArray,
11358                                      MapNamesArray,
11359                                      InputInfo.MappersArray.getPointer()};
11360 
11361     // Select the right runtime function call for each standalone
11362     // directive.
11363     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11364     RuntimeFunction RTLFn;
11365     switch (D.getDirectiveKind()) {
11366     case OMPD_target_enter_data:
11367       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11368                         : OMPRTL___tgt_target_data_begin_mapper;
11369       break;
11370     case OMPD_target_exit_data:
11371       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11372                         : OMPRTL___tgt_target_data_end_mapper;
11373       break;
11374     case OMPD_target_update:
11375       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11376                         : OMPRTL___tgt_target_data_update_mapper;
11377       break;
11378     case OMPD_parallel:
11379     case OMPD_for:
11380     case OMPD_parallel_for:
11381     case OMPD_parallel_master:
11382     case OMPD_parallel_sections:
11383     case OMPD_for_simd:
11384     case OMPD_parallel_for_simd:
11385     case OMPD_cancel:
11386     case OMPD_cancellation_point:
11387     case OMPD_ordered:
11388     case OMPD_threadprivate:
11389     case OMPD_allocate:
11390     case OMPD_task:
11391     case OMPD_simd:
11392     case OMPD_tile:
11393     case OMPD_unroll:
11394     case OMPD_sections:
11395     case OMPD_section:
11396     case OMPD_single:
11397     case OMPD_master:
11398     case OMPD_critical:
11399     case OMPD_taskyield:
11400     case OMPD_barrier:
11401     case OMPD_taskwait:
11402     case OMPD_taskgroup:
11403     case OMPD_atomic:
11404     case OMPD_flush:
11405     case OMPD_depobj:
11406     case OMPD_scan:
11407     case OMPD_teams:
11408     case OMPD_target_data:
11409     case OMPD_distribute:
11410     case OMPD_distribute_simd:
11411     case OMPD_distribute_parallel_for:
11412     case OMPD_distribute_parallel_for_simd:
11413     case OMPD_teams_distribute:
11414     case OMPD_teams_distribute_simd:
11415     case OMPD_teams_distribute_parallel_for:
11416     case OMPD_teams_distribute_parallel_for_simd:
11417     case OMPD_declare_simd:
11418     case OMPD_declare_variant:
11419     case OMPD_begin_declare_variant:
11420     case OMPD_end_declare_variant:
11421     case OMPD_declare_target:
11422     case OMPD_end_declare_target:
11423     case OMPD_declare_reduction:
11424     case OMPD_declare_mapper:
11425     case OMPD_taskloop:
11426     case OMPD_taskloop_simd:
11427     case OMPD_master_taskloop:
11428     case OMPD_master_taskloop_simd:
11429     case OMPD_parallel_master_taskloop:
11430     case OMPD_parallel_master_taskloop_simd:
11431     case OMPD_target:
11432     case OMPD_target_simd:
11433     case OMPD_target_teams_distribute:
11434     case OMPD_target_teams_distribute_simd:
11435     case OMPD_target_teams_distribute_parallel_for:
11436     case OMPD_target_teams_distribute_parallel_for_simd:
11437     case OMPD_target_teams:
11438     case OMPD_target_parallel:
11439     case OMPD_target_parallel_for:
11440     case OMPD_target_parallel_for_simd:
11441     case OMPD_requires:
11442     case OMPD_metadirective:
11443     case OMPD_unknown:
11444     default:
11445       llvm_unreachable("Unexpected standalone target data directive.");
11446       break;
11447     }
11448     CGF.EmitRuntimeCall(
11449         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11450         OffloadingArgs);
11451   };
11452 
11453   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11454                           &MapNamesArray](CodeGenFunction &CGF,
11455                                           PrePostActionTy &) {
11456     // Fill up the arrays with all the mapped variables.
11457     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11458 
11459     // Get map clause information.
11460     MappableExprsHandler MEHandler(D, CGF);
11461     MEHandler.generateAllInfo(CombinedInfo);
11462 
11463     TargetDataInfo Info;
11464     // Fill up the arrays and create the arguments.
11465     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11466                          /*IsNonContiguous=*/true);
11467     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11468                              D.hasClausesOfKind<OMPNowaitClause>();
11469     emitOffloadingArraysArgument(
11470         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11471         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11472         {/*ForEndCall=*/false});
11473     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11474     InputInfo.BasePointersArray =
11475         Address(Info.BasePointersArray, CGM.getPointerAlign());
11476     InputInfo.PointersArray =
11477         Address(Info.PointersArray, CGM.getPointerAlign());
11478     InputInfo.SizesArray =
11479         Address(Info.SizesArray, CGM.getPointerAlign());
11480     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11481     MapTypesArray = Info.MapTypesArray;
11482     MapNamesArray = Info.MapNamesArray;
11483     if (RequiresOuterTask)
11484       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11485     else
11486       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11487   };
11488 
11489   if (IfCond) {
11490     emitIfClause(CGF, IfCond, TargetThenGen,
11491                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11492   } else {
11493     RegionCodeGenTy ThenRCG(TargetThenGen);
11494     ThenRCG(CGF);
11495   }
11496 }
11497 
11498 namespace {
11499   /// Kind of parameter in a function with 'declare simd' directive.
11500   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11501   /// Attribute set of the parameter.
11502   struct ParamAttrTy {
11503     ParamKindTy Kind = Vector;
11504     llvm::APSInt StrideOrArg;
11505     llvm::APSInt Alignment;
11506   };
11507 } // namespace
11508 
11509 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11510                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11511   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11512   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11513   // of that clause. The VLEN value must be power of 2.
11514   // In other case the notion of the function`s "characteristic data type" (CDT)
11515   // is used to compute the vector length.
11516   // CDT is defined in the following order:
11517   //   a) For non-void function, the CDT is the return type.
11518   //   b) If the function has any non-uniform, non-linear parameters, then the
11519   //   CDT is the type of the first such parameter.
11520   //   c) If the CDT determined by a) or b) above is struct, union, or class
11521   //   type which is pass-by-value (except for the type that maps to the
11522   //   built-in complex data type), the characteristic data type is int.
11523   //   d) If none of the above three cases is applicable, the CDT is int.
11524   // The VLEN is then determined based on the CDT and the size of vector
11525   // register of that ISA for which current vector version is generated. The
11526   // VLEN is computed using the formula below:
11527   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11528   // where vector register size specified in section 3.2.1 Registers and the
11529   // Stack Frame of original AMD64 ABI document.
11530   QualType RetType = FD->getReturnType();
11531   if (RetType.isNull())
11532     return 0;
11533   ASTContext &C = FD->getASTContext();
11534   QualType CDT;
11535   if (!RetType.isNull() && !RetType->isVoidType()) {
11536     CDT = RetType;
11537   } else {
11538     unsigned Offset = 0;
11539     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11540       if (ParamAttrs[Offset].Kind == Vector)
11541         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11542       ++Offset;
11543     }
11544     if (CDT.isNull()) {
11545       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11546         if (ParamAttrs[I + Offset].Kind == Vector) {
11547           CDT = FD->getParamDecl(I)->getType();
11548           break;
11549         }
11550       }
11551     }
11552   }
11553   if (CDT.isNull())
11554     CDT = C.IntTy;
11555   CDT = CDT->getCanonicalTypeUnqualified();
11556   if (CDT->isRecordType() || CDT->isUnionType())
11557     CDT = C.IntTy;
11558   return C.getTypeSize(CDT);
11559 }
11560 
11561 static void
11562 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11563                            const llvm::APSInt &VLENVal,
11564                            ArrayRef<ParamAttrTy> ParamAttrs,
11565                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11566   struct ISADataTy {
11567     char ISA;
11568     unsigned VecRegSize;
11569   };
11570   ISADataTy ISAData[] = {
11571       {
11572           'b', 128
11573       }, // SSE
11574       {
11575           'c', 256
11576       }, // AVX
11577       {
11578           'd', 256
11579       }, // AVX2
11580       {
11581           'e', 512
11582       }, // AVX512
11583   };
11584   llvm::SmallVector<char, 2> Masked;
11585   switch (State) {
11586   case OMPDeclareSimdDeclAttr::BS_Undefined:
11587     Masked.push_back('N');
11588     Masked.push_back('M');
11589     break;
11590   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11591     Masked.push_back('N');
11592     break;
11593   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11594     Masked.push_back('M');
11595     break;
11596   }
11597   for (char Mask : Masked) {
11598     for (const ISADataTy &Data : ISAData) {
11599       SmallString<256> Buffer;
11600       llvm::raw_svector_ostream Out(Buffer);
11601       Out << "_ZGV" << Data.ISA << Mask;
11602       if (!VLENVal) {
11603         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11604         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11605         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11606       } else {
11607         Out << VLENVal;
11608       }
11609       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11610         switch (ParamAttr.Kind){
11611         case LinearWithVarStride:
11612           Out << 's' << ParamAttr.StrideOrArg;
11613           break;
11614         case Linear:
11615           Out << 'l';
11616           if (ParamAttr.StrideOrArg != 1)
11617             Out << ParamAttr.StrideOrArg;
11618           break;
11619         case Uniform:
11620           Out << 'u';
11621           break;
11622         case Vector:
11623           Out << 'v';
11624           break;
11625         }
11626         if (!!ParamAttr.Alignment)
11627           Out << 'a' << ParamAttr.Alignment;
11628       }
11629       Out << '_' << Fn->getName();
11630       Fn->addFnAttr(Out.str());
11631     }
11632   }
11633 }
11634 
11635 // This are the Functions that are needed to mangle the name of the
11636 // vector functions generated by the compiler, according to the rules
11637 // defined in the "Vector Function ABI specifications for AArch64",
11638 // available at
11639 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11640 
11641 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11642 ///
11643 /// TODO: Need to implement the behavior for reference marked with a
11644 /// var or no linear modifiers (1.b in the section). For this, we
11645 /// need to extend ParamKindTy to support the linear modifiers.
11646 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11647   QT = QT.getCanonicalType();
11648 
11649   if (QT->isVoidType())
11650     return false;
11651 
11652   if (Kind == ParamKindTy::Uniform)
11653     return false;
11654 
11655   if (Kind == ParamKindTy::Linear)
11656     return false;
11657 
11658   // TODO: Handle linear references with modifiers
11659 
11660   if (Kind == ParamKindTy::LinearWithVarStride)
11661     return false;
11662 
11663   return true;
11664 }
11665 
11666 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11667 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11668   QT = QT.getCanonicalType();
11669   unsigned Size = C.getTypeSize(QT);
11670 
11671   // Only scalars and complex within 16 bytes wide set PVB to true.
11672   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11673     return false;
11674 
11675   if (QT->isFloatingType())
11676     return true;
11677 
11678   if (QT->isIntegerType())
11679     return true;
11680 
11681   if (QT->isPointerType())
11682     return true;
11683 
11684   // TODO: Add support for complex types (section 3.1.2, item 2).
11685 
11686   return false;
11687 }
11688 
11689 /// Computes the lane size (LS) of a return type or of an input parameter,
11690 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11691 /// TODO: Add support for references, section 3.2.1, item 1.
11692 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11693   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11694     QualType PTy = QT.getCanonicalType()->getPointeeType();
11695     if (getAArch64PBV(PTy, C))
11696       return C.getTypeSize(PTy);
11697   }
11698   if (getAArch64PBV(QT, C))
11699     return C.getTypeSize(QT);
11700 
11701   return C.getTypeSize(C.getUIntPtrType());
11702 }
11703 
11704 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11705 // signature of the scalar function, as defined in 3.2.2 of the
11706 // AAVFABI.
11707 static std::tuple<unsigned, unsigned, bool>
11708 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11709   QualType RetType = FD->getReturnType().getCanonicalType();
11710 
11711   ASTContext &C = FD->getASTContext();
11712 
11713   bool OutputBecomesInput = false;
11714 
11715   llvm::SmallVector<unsigned, 8> Sizes;
11716   if (!RetType->isVoidType()) {
11717     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11718     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11719       OutputBecomesInput = true;
11720   }
11721   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11722     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11723     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11724   }
11725 
11726   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11727   // The LS of a function parameter / return value can only be a power
11728   // of 2, starting from 8 bits, up to 128.
11729   assert(llvm::all_of(Sizes,
11730                       [](unsigned Size) {
11731                         return Size == 8 || Size == 16 || Size == 32 ||
11732                                Size == 64 || Size == 128;
11733                       }) &&
11734          "Invalid size");
11735 
11736   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11737                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11738                          OutputBecomesInput);
11739 }
11740 
11741 /// Mangle the parameter part of the vector function name according to
11742 /// their OpenMP classification. The mangling function is defined in
11743 /// section 3.5 of the AAVFABI.
11744 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11745   SmallString<256> Buffer;
11746   llvm::raw_svector_ostream Out(Buffer);
11747   for (const auto &ParamAttr : ParamAttrs) {
11748     switch (ParamAttr.Kind) {
11749     case LinearWithVarStride:
11750       Out << "ls" << ParamAttr.StrideOrArg;
11751       break;
11752     case Linear:
11753       Out << 'l';
11754       // Don't print the step value if it is not present or if it is
11755       // equal to 1.
11756       if (ParamAttr.StrideOrArg != 1)
11757         Out << ParamAttr.StrideOrArg;
11758       break;
11759     case Uniform:
11760       Out << 'u';
11761       break;
11762     case Vector:
11763       Out << 'v';
11764       break;
11765     }
11766 
11767     if (!!ParamAttr.Alignment)
11768       Out << 'a' << ParamAttr.Alignment;
11769   }
11770 
11771   return std::string(Out.str());
11772 }
11773 
11774 // Function used to add the attribute. The parameter `VLEN` is
11775 // templated to allow the use of "x" when targeting scalable functions
11776 // for SVE.
11777 template <typename T>
11778 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11779                                  char ISA, StringRef ParSeq,
11780                                  StringRef MangledName, bool OutputBecomesInput,
11781                                  llvm::Function *Fn) {
11782   SmallString<256> Buffer;
11783   llvm::raw_svector_ostream Out(Buffer);
11784   Out << Prefix << ISA << LMask << VLEN;
11785   if (OutputBecomesInput)
11786     Out << "v";
11787   Out << ParSeq << "_" << MangledName;
11788   Fn->addFnAttr(Out.str());
11789 }
11790 
11791 // Helper function to generate the Advanced SIMD names depending on
11792 // the value of the NDS when simdlen is not present.
11793 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11794                                       StringRef Prefix, char ISA,
11795                                       StringRef ParSeq, StringRef MangledName,
11796                                       bool OutputBecomesInput,
11797                                       llvm::Function *Fn) {
11798   switch (NDS) {
11799   case 8:
11800     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11801                          OutputBecomesInput, Fn);
11802     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11803                          OutputBecomesInput, Fn);
11804     break;
11805   case 16:
11806     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11807                          OutputBecomesInput, Fn);
11808     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11809                          OutputBecomesInput, Fn);
11810     break;
11811   case 32:
11812     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11813                          OutputBecomesInput, Fn);
11814     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11815                          OutputBecomesInput, Fn);
11816     break;
11817   case 64:
11818   case 128:
11819     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11820                          OutputBecomesInput, Fn);
11821     break;
11822   default:
11823     llvm_unreachable("Scalar type is too wide.");
11824   }
11825 }
11826 
11827 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11828 static void emitAArch64DeclareSimdFunction(
11829     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11830     ArrayRef<ParamAttrTy> ParamAttrs,
11831     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11832     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11833 
11834   // Get basic data for building the vector signature.
11835   const auto Data = getNDSWDS(FD, ParamAttrs);
11836   const unsigned NDS = std::get<0>(Data);
11837   const unsigned WDS = std::get<1>(Data);
11838   const bool OutputBecomesInput = std::get<2>(Data);
11839 
11840   // Check the values provided via `simdlen` by the user.
11841   // 1. A `simdlen(1)` doesn't produce vector signatures,
11842   if (UserVLEN == 1) {
11843     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11844         DiagnosticsEngine::Warning,
11845         "The clause simdlen(1) has no effect when targeting aarch64.");
11846     CGM.getDiags().Report(SLoc, DiagID);
11847     return;
11848   }
11849 
11850   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11851   // Advanced SIMD output.
11852   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11853     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11854         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11855                                     "power of 2 when targeting Advanced SIMD.");
11856     CGM.getDiags().Report(SLoc, DiagID);
11857     return;
11858   }
11859 
11860   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11861   // limits.
11862   if (ISA == 's' && UserVLEN != 0) {
11863     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11864       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11865           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11866                                       "lanes in the architectural constraints "
11867                                       "for SVE (min is 128-bit, max is "
11868                                       "2048-bit, by steps of 128-bit)");
11869       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11870       return;
11871     }
11872   }
11873 
11874   // Sort out parameter sequence.
11875   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11876   StringRef Prefix = "_ZGV";
11877   // Generate simdlen from user input (if any).
11878   if (UserVLEN) {
11879     if (ISA == 's') {
11880       // SVE generates only a masked function.
11881       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11882                            OutputBecomesInput, Fn);
11883     } else {
11884       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11885       // Advanced SIMD generates one or two functions, depending on
11886       // the `[not]inbranch` clause.
11887       switch (State) {
11888       case OMPDeclareSimdDeclAttr::BS_Undefined:
11889         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11890                              OutputBecomesInput, Fn);
11891         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11892                              OutputBecomesInput, Fn);
11893         break;
11894       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11895         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11896                              OutputBecomesInput, Fn);
11897         break;
11898       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11899         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11900                              OutputBecomesInput, Fn);
11901         break;
11902       }
11903     }
11904   } else {
11905     // If no user simdlen is provided, follow the AAVFABI rules for
11906     // generating the vector length.
11907     if (ISA == 's') {
11908       // SVE, section 3.4.1, item 1.
11909       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11910                            OutputBecomesInput, Fn);
11911     } else {
11912       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11913       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11914       // two vector names depending on the use of the clause
11915       // `[not]inbranch`.
11916       switch (State) {
11917       case OMPDeclareSimdDeclAttr::BS_Undefined:
11918         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11919                                   OutputBecomesInput, Fn);
11920         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11921                                   OutputBecomesInput, Fn);
11922         break;
11923       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11924         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11925                                   OutputBecomesInput, Fn);
11926         break;
11927       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11928         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11929                                   OutputBecomesInput, Fn);
11930         break;
11931       }
11932     }
11933   }
11934 }
11935 
11936 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11937                                               llvm::Function *Fn) {
11938   ASTContext &C = CGM.getContext();
11939   FD = FD->getMostRecentDecl();
11940   // Map params to their positions in function decl.
11941   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11942   if (isa<CXXMethodDecl>(FD))
11943     ParamPositions.try_emplace(FD, 0);
11944   unsigned ParamPos = ParamPositions.size();
11945   for (const ParmVarDecl *P : FD->parameters()) {
11946     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11947     ++ParamPos;
11948   }
11949   while (FD) {
11950     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11951       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11952       // Mark uniform parameters.
11953       for (const Expr *E : Attr->uniforms()) {
11954         E = E->IgnoreParenImpCasts();
11955         unsigned Pos;
11956         if (isa<CXXThisExpr>(E)) {
11957           Pos = ParamPositions[FD];
11958         } else {
11959           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11960                                 ->getCanonicalDecl();
11961           Pos = ParamPositions[PVD];
11962         }
11963         ParamAttrs[Pos].Kind = Uniform;
11964       }
11965       // Get alignment info.
11966       auto NI = Attr->alignments_begin();
11967       for (const Expr *E : Attr->aligneds()) {
11968         E = E->IgnoreParenImpCasts();
11969         unsigned Pos;
11970         QualType ParmTy;
11971         if (isa<CXXThisExpr>(E)) {
11972           Pos = ParamPositions[FD];
11973           ParmTy = E->getType();
11974         } else {
11975           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11976                                 ->getCanonicalDecl();
11977           Pos = ParamPositions[PVD];
11978           ParmTy = PVD->getType();
11979         }
11980         ParamAttrs[Pos].Alignment =
11981             (*NI)
11982                 ? (*NI)->EvaluateKnownConstInt(C)
11983                 : llvm::APSInt::getUnsigned(
11984                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11985                           .getQuantity());
11986         ++NI;
11987       }
11988       // Mark linear parameters.
11989       auto SI = Attr->steps_begin();
11990       auto MI = Attr->modifiers_begin();
11991       for (const Expr *E : Attr->linears()) {
11992         E = E->IgnoreParenImpCasts();
11993         unsigned Pos;
11994         // Rescaling factor needed to compute the linear parameter
11995         // value in the mangled name.
11996         unsigned PtrRescalingFactor = 1;
11997         if (isa<CXXThisExpr>(E)) {
11998           Pos = ParamPositions[FD];
11999         } else {
12000           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12001                                 ->getCanonicalDecl();
12002           Pos = ParamPositions[PVD];
12003           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12004             PtrRescalingFactor = CGM.getContext()
12005                                      .getTypeSizeInChars(P->getPointeeType())
12006                                      .getQuantity();
12007         }
12008         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12009         ParamAttr.Kind = Linear;
12010         // Assuming a stride of 1, for `linear` without modifiers.
12011         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12012         if (*SI) {
12013           Expr::EvalResult Result;
12014           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12015             if (const auto *DRE =
12016                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12017               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12018                 ParamAttr.Kind = LinearWithVarStride;
12019                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12020                     ParamPositions[StridePVD->getCanonicalDecl()]);
12021               }
12022             }
12023           } else {
12024             ParamAttr.StrideOrArg = Result.Val.getInt();
12025           }
12026         }
12027         // If we are using a linear clause on a pointer, we need to
12028         // rescale the value of linear_step with the byte size of the
12029         // pointee type.
12030         if (Linear == ParamAttr.Kind)
12031           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12032         ++SI;
12033         ++MI;
12034       }
12035       llvm::APSInt VLENVal;
12036       SourceLocation ExprLoc;
12037       const Expr *VLENExpr = Attr->getSimdlen();
12038       if (VLENExpr) {
12039         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12040         ExprLoc = VLENExpr->getExprLoc();
12041       }
12042       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12043       if (CGM.getTriple().isX86()) {
12044         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12045       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12046         unsigned VLEN = VLENVal.getExtValue();
12047         StringRef MangledName = Fn->getName();
12048         if (CGM.getTarget().hasFeature("sve"))
12049           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12050                                          MangledName, 's', 128, Fn, ExprLoc);
12051         if (CGM.getTarget().hasFeature("neon"))
12052           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12053                                          MangledName, 'n', 128, Fn, ExprLoc);
12054       }
12055     }
12056     FD = FD->getPreviousDecl();
12057   }
12058 }
12059 
12060 namespace {
12061 /// Cleanup action for doacross support.
12062 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12063 public:
12064   static const int DoacrossFinArgs = 2;
12065 
12066 private:
12067   llvm::FunctionCallee RTLFn;
12068   llvm::Value *Args[DoacrossFinArgs];
12069 
12070 public:
12071   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12072                     ArrayRef<llvm::Value *> CallArgs)
12073       : RTLFn(RTLFn) {
12074     assert(CallArgs.size() == DoacrossFinArgs);
12075     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12076   }
12077   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12078     if (!CGF.HaveInsertPoint())
12079       return;
12080     CGF.EmitRuntimeCall(RTLFn, Args);
12081   }
12082 };
12083 } // namespace
12084 
12085 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12086                                        const OMPLoopDirective &D,
12087                                        ArrayRef<Expr *> NumIterations) {
12088   if (!CGF.HaveInsertPoint())
12089     return;
12090 
12091   ASTContext &C = CGM.getContext();
12092   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12093   RecordDecl *RD;
12094   if (KmpDimTy.isNull()) {
12095     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12096     //  kmp_int64 lo; // lower
12097     //  kmp_int64 up; // upper
12098     //  kmp_int64 st; // stride
12099     // };
12100     RD = C.buildImplicitRecord("kmp_dim");
12101     RD->startDefinition();
12102     addFieldToRecordDecl(C, RD, Int64Ty);
12103     addFieldToRecordDecl(C, RD, Int64Ty);
12104     addFieldToRecordDecl(C, RD, Int64Ty);
12105     RD->completeDefinition();
12106     KmpDimTy = C.getRecordType(RD);
12107   } else {
12108     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12109   }
12110   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12111   QualType ArrayTy =
12112       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12113 
12114   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12115   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12116   enum { LowerFD = 0, UpperFD, StrideFD };
12117   // Fill dims with data.
12118   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12119     LValue DimsLVal = CGF.MakeAddrLValue(
12120         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12121     // dims.upper = num_iterations;
12122     LValue UpperLVal = CGF.EmitLValueForField(
12123         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12124     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12125         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12126         Int64Ty, NumIterations[I]->getExprLoc());
12127     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12128     // dims.stride = 1;
12129     LValue StrideLVal = CGF.EmitLValueForField(
12130         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12131     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12132                           StrideLVal);
12133   }
12134 
12135   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12136   // kmp_int32 num_dims, struct kmp_dim * dims);
12137   llvm::Value *Args[] = {
12138       emitUpdateLocation(CGF, D.getBeginLoc()),
12139       getThreadID(CGF, D.getBeginLoc()),
12140       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12141       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12142           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12143           CGM.VoidPtrTy)};
12144 
12145   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12146       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12147   CGF.EmitRuntimeCall(RTLFn, Args);
12148   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12149       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12150   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12151       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12152   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12153                                              llvm::makeArrayRef(FiniArgs));
12154 }
12155 
12156 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12157                                           const OMPDependClause *C) {
12158   QualType Int64Ty =
12159       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12160   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12161   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12162       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12163   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12164   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12165     const Expr *CounterVal = C->getLoopData(I);
12166     assert(CounterVal);
12167     llvm::Value *CntVal = CGF.EmitScalarConversion(
12168         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12169         CounterVal->getExprLoc());
12170     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12171                           /*Volatile=*/false, Int64Ty);
12172   }
12173   llvm::Value *Args[] = {
12174       emitUpdateLocation(CGF, C->getBeginLoc()),
12175       getThreadID(CGF, C->getBeginLoc()),
12176       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12177   llvm::FunctionCallee RTLFn;
12178   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12179     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12180                                                   OMPRTL___kmpc_doacross_post);
12181   } else {
12182     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12183     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12184                                                   OMPRTL___kmpc_doacross_wait);
12185   }
12186   CGF.EmitRuntimeCall(RTLFn, Args);
12187 }
12188 
12189 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12190                                llvm::FunctionCallee Callee,
12191                                ArrayRef<llvm::Value *> Args) const {
12192   assert(Loc.isValid() && "Outlined function call location must be valid.");
12193   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12194 
12195   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12196     if (Fn->doesNotThrow()) {
12197       CGF.EmitNounwindRuntimeCall(Fn, Args);
12198       return;
12199     }
12200   }
12201   CGF.EmitRuntimeCall(Callee, Args);
12202 }
12203 
12204 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12205     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12206     ArrayRef<llvm::Value *> Args) const {
12207   emitCall(CGF, Loc, OutlinedFn, Args);
12208 }
12209 
12210 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12211   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12212     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12213       HasEmittedDeclareTargetRegion = true;
12214 }
12215 
12216 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12217                                              const VarDecl *NativeParam,
12218                                              const VarDecl *TargetParam) const {
12219   return CGF.GetAddrOfLocalVar(NativeParam);
12220 }
12221 
12222 /// Return allocator value from expression, or return a null allocator (default
12223 /// when no allocator specified).
12224 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12225                                     const Expr *Allocator) {
12226   llvm::Value *AllocVal;
12227   if (Allocator) {
12228     AllocVal = CGF.EmitScalarExpr(Allocator);
12229     // According to the standard, the original allocator type is a enum
12230     // (integer). Convert to pointer type, if required.
12231     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12232                                         CGF.getContext().VoidPtrTy,
12233                                         Allocator->getExprLoc());
12234   } else {
12235     // If no allocator specified, it defaults to the null allocator.
12236     AllocVal = llvm::Constant::getNullValue(
12237         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12238   }
12239   return AllocVal;
12240 }
12241 
12242 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12243                                                    const VarDecl *VD) {
12244   if (!VD)
12245     return Address::invalid();
12246   Address UntiedAddr = Address::invalid();
12247   Address UntiedRealAddr = Address::invalid();
12248   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12249   if (It != FunctionToUntiedTaskStackMap.end()) {
12250     const UntiedLocalVarsAddressesMap &UntiedData =
12251         UntiedLocalVarsStack[It->second];
12252     auto I = UntiedData.find(VD);
12253     if (I != UntiedData.end()) {
12254       UntiedAddr = I->second.first;
12255       UntiedRealAddr = I->second.second;
12256     }
12257   }
12258   const VarDecl *CVD = VD->getCanonicalDecl();
12259   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12260     // Use the default allocation.
12261     if (!isAllocatableDecl(VD))
12262       return UntiedAddr;
12263     llvm::Value *Size;
12264     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12265     if (CVD->getType()->isVariablyModifiedType()) {
12266       Size = CGF.getTypeSize(CVD->getType());
12267       // Align the size: ((size + align - 1) / align) * align
12268       Size = CGF.Builder.CreateNUWAdd(
12269           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12270       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12271       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12272     } else {
12273       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12274       Size = CGM.getSize(Sz.alignTo(Align));
12275     }
12276     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12277     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12278     const Expr *Allocator = AA->getAllocator();
12279     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12280     llvm::Value *Alignment =
12281         AA->getAlignment()
12282             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12283                                         CGM.SizeTy, /*isSigned=*/false)
12284             : nullptr;
12285     SmallVector<llvm::Value *, 4> Args;
12286     Args.push_back(ThreadID);
12287     if (Alignment)
12288       Args.push_back(Alignment);
12289     Args.push_back(Size);
12290     Args.push_back(AllocVal);
12291     llvm::omp::RuntimeFunction FnID =
12292         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12293     llvm::Value *Addr = CGF.EmitRuntimeCall(
12294         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12295         getName({CVD->getName(), ".void.addr"}));
12296     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12297         CGM.getModule(), OMPRTL___kmpc_free);
12298     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12299     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12300         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12301     if (UntiedAddr.isValid())
12302       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12303 
12304     // Cleanup action for allocate support.
12305     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12306       llvm::FunctionCallee RTLFn;
12307       SourceLocation::UIntTy LocEncoding;
12308       Address Addr;
12309       const Expr *AllocExpr;
12310 
12311     public:
12312       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12313                            SourceLocation::UIntTy LocEncoding, Address Addr,
12314                            const Expr *AllocExpr)
12315           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12316             AllocExpr(AllocExpr) {}
12317       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12318         if (!CGF.HaveInsertPoint())
12319           return;
12320         llvm::Value *Args[3];
12321         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12322             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12323         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12324             Addr.getPointer(), CGF.VoidPtrTy);
12325         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12326         Args[2] = AllocVal;
12327         CGF.EmitRuntimeCall(RTLFn, Args);
12328       }
12329     };
12330     Address VDAddr =
12331         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12332     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12333         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12334         VDAddr, Allocator);
12335     if (UntiedRealAddr.isValid())
12336       if (auto *Region =
12337               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12338         Region->emitUntiedSwitch(CGF);
12339     return VDAddr;
12340   }
12341   return UntiedAddr;
12342 }
12343 
12344 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12345                                              const VarDecl *VD) const {
12346   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12347   if (It == FunctionToUntiedTaskStackMap.end())
12348     return false;
12349   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12350 }
12351 
12352 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12353     CodeGenModule &CGM, const OMPLoopDirective &S)
12354     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12355   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12356   if (!NeedToPush)
12357     return;
12358   NontemporalDeclsSet &DS =
12359       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12360   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12361     for (const Stmt *Ref : C->private_refs()) {
12362       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12363       const ValueDecl *VD;
12364       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12365         VD = DRE->getDecl();
12366       } else {
12367         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12368         assert((ME->isImplicitCXXThis() ||
12369                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12370                "Expected member of current class.");
12371         VD = ME->getMemberDecl();
12372       }
12373       DS.insert(VD);
12374     }
12375   }
12376 }
12377 
12378 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12379   if (!NeedToPush)
12380     return;
12381   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12382 }
12383 
12384 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12385     CodeGenFunction &CGF,
12386     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12387                           std::pair<Address, Address>> &LocalVars)
12388     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12389   if (!NeedToPush)
12390     return;
12391   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12392       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12393   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12394 }
12395 
12396 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12397   if (!NeedToPush)
12398     return;
12399   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12400 }
12401 
12402 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12403   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12404 
12405   return llvm::any_of(
12406       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12407       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12408 }
12409 
12410 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12411     const OMPExecutableDirective &S,
12412     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12413     const {
12414   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12415   // Vars in target/task regions must be excluded completely.
12416   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12417       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12418     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12419     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12420     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12421     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12422       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12423         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12424     }
12425   }
12426   // Exclude vars in private clauses.
12427   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12428     for (const Expr *Ref : C->varlists()) {
12429       if (!Ref->getType()->isScalarType())
12430         continue;
12431       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12432       if (!DRE)
12433         continue;
12434       NeedToCheckForLPCs.insert(DRE->getDecl());
12435     }
12436   }
12437   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12438     for (const Expr *Ref : C->varlists()) {
12439       if (!Ref->getType()->isScalarType())
12440         continue;
12441       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12442       if (!DRE)
12443         continue;
12444       NeedToCheckForLPCs.insert(DRE->getDecl());
12445     }
12446   }
12447   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12448     for (const Expr *Ref : C->varlists()) {
12449       if (!Ref->getType()->isScalarType())
12450         continue;
12451       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12452       if (!DRE)
12453         continue;
12454       NeedToCheckForLPCs.insert(DRE->getDecl());
12455     }
12456   }
12457   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12458     for (const Expr *Ref : C->varlists()) {
12459       if (!Ref->getType()->isScalarType())
12460         continue;
12461       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12462       if (!DRE)
12463         continue;
12464       NeedToCheckForLPCs.insert(DRE->getDecl());
12465     }
12466   }
12467   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12468     for (const Expr *Ref : C->varlists()) {
12469       if (!Ref->getType()->isScalarType())
12470         continue;
12471       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12472       if (!DRE)
12473         continue;
12474       NeedToCheckForLPCs.insert(DRE->getDecl());
12475     }
12476   }
12477   for (const Decl *VD : NeedToCheckForLPCs) {
12478     for (const LastprivateConditionalData &Data :
12479          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12480       if (Data.DeclToUniqueName.count(VD) > 0) {
12481         if (!Data.Disabled)
12482           NeedToAddForLPCsAsDisabled.insert(VD);
12483         break;
12484       }
12485     }
12486   }
12487 }
12488 
12489 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12490     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12491     : CGM(CGF.CGM),
12492       Action((CGM.getLangOpts().OpenMP >= 50 &&
12493               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12494                            [](const OMPLastprivateClause *C) {
12495                              return C->getKind() ==
12496                                     OMPC_LASTPRIVATE_conditional;
12497                            }))
12498                  ? ActionToDo::PushAsLastprivateConditional
12499                  : ActionToDo::DoNotPush) {
12500   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12501   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12502     return;
12503   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12504          "Expected a push action.");
12505   LastprivateConditionalData &Data =
12506       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12507   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12508     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12509       continue;
12510 
12511     for (const Expr *Ref : C->varlists()) {
12512       Data.DeclToUniqueName.insert(std::make_pair(
12513           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12514           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12515     }
12516   }
12517   Data.IVLVal = IVLVal;
12518   Data.Fn = CGF.CurFn;
12519 }
12520 
12521 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12522     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12523     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12524   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12525   if (CGM.getLangOpts().OpenMP < 50)
12526     return;
12527   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12528   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12529   if (!NeedToAddForLPCsAsDisabled.empty()) {
12530     Action = ActionToDo::DisableLastprivateConditional;
12531     LastprivateConditionalData &Data =
12532         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12533     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12534       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12535     Data.Fn = CGF.CurFn;
12536     Data.Disabled = true;
12537   }
12538 }
12539 
12540 CGOpenMPRuntime::LastprivateConditionalRAII
12541 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12542     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12543   return LastprivateConditionalRAII(CGF, S);
12544 }
12545 
12546 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12547   if (CGM.getLangOpts().OpenMP < 50)
12548     return;
12549   if (Action == ActionToDo::DisableLastprivateConditional) {
12550     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12551            "Expected list of disabled private vars.");
12552     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12553   }
12554   if (Action == ActionToDo::PushAsLastprivateConditional) {
12555     assert(
12556         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12557         "Expected list of lastprivate conditional vars.");
12558     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12559   }
12560 }
12561 
12562 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12563                                                         const VarDecl *VD) {
12564   ASTContext &C = CGM.getContext();
12565   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12566   if (I == LastprivateConditionalToTypes.end())
12567     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12568   QualType NewType;
12569   const FieldDecl *VDField;
12570   const FieldDecl *FiredField;
12571   LValue BaseLVal;
12572   auto VI = I->getSecond().find(VD);
12573   if (VI == I->getSecond().end()) {
12574     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12575     RD->startDefinition();
12576     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12577     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12578     RD->completeDefinition();
12579     NewType = C.getRecordType(RD);
12580     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12581     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12582     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12583   } else {
12584     NewType = std::get<0>(VI->getSecond());
12585     VDField = std::get<1>(VI->getSecond());
12586     FiredField = std::get<2>(VI->getSecond());
12587     BaseLVal = std::get<3>(VI->getSecond());
12588   }
12589   LValue FiredLVal =
12590       CGF.EmitLValueForField(BaseLVal, FiredField);
12591   CGF.EmitStoreOfScalar(
12592       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12593       FiredLVal);
12594   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12595 }
12596 
12597 namespace {
12598 /// Checks if the lastprivate conditional variable is referenced in LHS.
12599 class LastprivateConditionalRefChecker final
12600     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12601   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12602   const Expr *FoundE = nullptr;
12603   const Decl *FoundD = nullptr;
12604   StringRef UniqueDeclName;
12605   LValue IVLVal;
12606   llvm::Function *FoundFn = nullptr;
12607   SourceLocation Loc;
12608 
12609 public:
12610   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12611     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12612          llvm::reverse(LPM)) {
12613       auto It = D.DeclToUniqueName.find(E->getDecl());
12614       if (It == D.DeclToUniqueName.end())
12615         continue;
12616       if (D.Disabled)
12617         return false;
12618       FoundE = E;
12619       FoundD = E->getDecl()->getCanonicalDecl();
12620       UniqueDeclName = It->second;
12621       IVLVal = D.IVLVal;
12622       FoundFn = D.Fn;
12623       break;
12624     }
12625     return FoundE == E;
12626   }
12627   bool VisitMemberExpr(const MemberExpr *E) {
12628     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12629       return false;
12630     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12631          llvm::reverse(LPM)) {
12632       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12633       if (It == D.DeclToUniqueName.end())
12634         continue;
12635       if (D.Disabled)
12636         return false;
12637       FoundE = E;
12638       FoundD = E->getMemberDecl()->getCanonicalDecl();
12639       UniqueDeclName = It->second;
12640       IVLVal = D.IVLVal;
12641       FoundFn = D.Fn;
12642       break;
12643     }
12644     return FoundE == E;
12645   }
12646   bool VisitStmt(const Stmt *S) {
12647     for (const Stmt *Child : S->children()) {
12648       if (!Child)
12649         continue;
12650       if (const auto *E = dyn_cast<Expr>(Child))
12651         if (!E->isGLValue())
12652           continue;
12653       if (Visit(Child))
12654         return true;
12655     }
12656     return false;
12657   }
12658   explicit LastprivateConditionalRefChecker(
12659       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12660       : LPM(LPM) {}
12661   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12662   getFoundData() const {
12663     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12664   }
12665 };
12666 } // namespace
12667 
12668 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12669                                                        LValue IVLVal,
12670                                                        StringRef UniqueDeclName,
12671                                                        LValue LVal,
12672                                                        SourceLocation Loc) {
12673   // Last updated loop counter for the lastprivate conditional var.
12674   // int<xx> last_iv = 0;
12675   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12676   llvm::Constant *LastIV =
12677       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12678   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12679       IVLVal.getAlignment().getAsAlign());
12680   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12681 
12682   // Last value of the lastprivate conditional.
12683   // decltype(priv_a) last_a;
12684   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12685       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12686   Last->setAlignment(LVal.getAlignment().getAsAlign());
12687   LValue LastLVal = CGF.MakeAddrLValue(
12688       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12689 
12690   // Global loop counter. Required to handle inner parallel-for regions.
12691   // iv
12692   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12693 
12694   // #pragma omp critical(a)
12695   // if (last_iv <= iv) {
12696   //   last_iv = iv;
12697   //   last_a = priv_a;
12698   // }
12699   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12700                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12701     Action.Enter(CGF);
12702     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12703     // (last_iv <= iv) ? Check if the variable is updated and store new
12704     // value in global var.
12705     llvm::Value *CmpRes;
12706     if (IVLVal.getType()->isSignedIntegerType()) {
12707       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12708     } else {
12709       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12710              "Loop iteration variable must be integer.");
12711       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12712     }
12713     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12714     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12715     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12716     // {
12717     CGF.EmitBlock(ThenBB);
12718 
12719     //   last_iv = iv;
12720     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12721 
12722     //   last_a = priv_a;
12723     switch (CGF.getEvaluationKind(LVal.getType())) {
12724     case TEK_Scalar: {
12725       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12726       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12727       break;
12728     }
12729     case TEK_Complex: {
12730       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12731       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12732       break;
12733     }
12734     case TEK_Aggregate:
12735       llvm_unreachable(
12736           "Aggregates are not supported in lastprivate conditional.");
12737     }
12738     // }
12739     CGF.EmitBranch(ExitBB);
12740     // There is no need to emit line number for unconditional branch.
12741     (void)ApplyDebugLocation::CreateEmpty(CGF);
12742     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12743   };
12744 
12745   if (CGM.getLangOpts().OpenMPSimd) {
12746     // Do not emit as a critical region as no parallel region could be emitted.
12747     RegionCodeGenTy ThenRCG(CodeGen);
12748     ThenRCG(CGF);
12749   } else {
12750     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12751   }
12752 }
12753 
12754 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12755                                                          const Expr *LHS) {
12756   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12757     return;
12758   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12759   if (!Checker.Visit(LHS))
12760     return;
12761   const Expr *FoundE;
12762   const Decl *FoundD;
12763   StringRef UniqueDeclName;
12764   LValue IVLVal;
12765   llvm::Function *FoundFn;
12766   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12767       Checker.getFoundData();
12768   if (FoundFn != CGF.CurFn) {
12769     // Special codegen for inner parallel regions.
12770     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12771     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12772     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12773            "Lastprivate conditional is not found in outer region.");
12774     QualType StructTy = std::get<0>(It->getSecond());
12775     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12776     LValue PrivLVal = CGF.EmitLValue(FoundE);
12777     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12778         PrivLVal.getAddress(CGF),
12779         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12780     LValue BaseLVal =
12781         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12782     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12783     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12784                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12785                         FiredLVal, llvm::AtomicOrdering::Unordered,
12786                         /*IsVolatile=*/true, /*isInit=*/false);
12787     return;
12788   }
12789 
12790   // Private address of the lastprivate conditional in the current context.
12791   // priv_a
12792   LValue LVal = CGF.EmitLValue(FoundE);
12793   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12794                                    FoundE->getExprLoc());
12795 }
12796 
12797 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12798     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12799     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12800   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12801     return;
12802   auto Range = llvm::reverse(LastprivateConditionalStack);
12803   auto It = llvm::find_if(
12804       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12805   if (It == Range.end() || It->Fn != CGF.CurFn)
12806     return;
12807   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12808   assert(LPCI != LastprivateConditionalToTypes.end() &&
12809          "Lastprivates must be registered already.");
12810   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12811   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12812   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12813   for (const auto &Pair : It->DeclToUniqueName) {
12814     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12815     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12816       continue;
12817     auto I = LPCI->getSecond().find(Pair.first);
12818     assert(I != LPCI->getSecond().end() &&
12819            "Lastprivate must be rehistered already.");
12820     // bool Cmp = priv_a.Fired != 0;
12821     LValue BaseLVal = std::get<3>(I->getSecond());
12822     LValue FiredLVal =
12823         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12824     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12825     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12826     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12827     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12828     // if (Cmp) {
12829     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12830     CGF.EmitBlock(ThenBB);
12831     Address Addr = CGF.GetAddrOfLocalVar(VD);
12832     LValue LVal;
12833     if (VD->getType()->isReferenceType())
12834       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12835                                            AlignmentSource::Decl);
12836     else
12837       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12838                                 AlignmentSource::Decl);
12839     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12840                                      D.getBeginLoc());
12841     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12842     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12843     // }
12844   }
12845 }
12846 
12847 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12848     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12849     SourceLocation Loc) {
12850   if (CGF.getLangOpts().OpenMP < 50)
12851     return;
12852   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12853   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12854          "Unknown lastprivate conditional variable.");
12855   StringRef UniqueName = It->second;
12856   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12857   // The variable was not updated in the region - exit.
12858   if (!GV)
12859     return;
12860   LValue LPLVal = CGF.MakeAddrLValue(
12861       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12862       PrivLVal.getType().getNonReferenceType());
12863   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12864   CGF.EmitStoreOfScalar(Res, PrivLVal);
12865 }
12866 
12867 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12868     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12869     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12870   llvm_unreachable("Not supported in SIMD-only mode");
12871 }
12872 
12873 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12874     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12875     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12876   llvm_unreachable("Not supported in SIMD-only mode");
12877 }
12878 
12879 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12880     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12881     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12882     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12883     bool Tied, unsigned &NumberOfParts) {
12884   llvm_unreachable("Not supported in SIMD-only mode");
12885 }
12886 
12887 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12888                                            SourceLocation Loc,
12889                                            llvm::Function *OutlinedFn,
12890                                            ArrayRef<llvm::Value *> CapturedVars,
12891                                            const Expr *IfCond,
12892                                            llvm::Value *NumThreads) {
12893   llvm_unreachable("Not supported in SIMD-only mode");
12894 }
12895 
12896 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12897     CodeGenFunction &CGF, StringRef CriticalName,
12898     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12899     const Expr *Hint) {
12900   llvm_unreachable("Not supported in SIMD-only mode");
12901 }
12902 
12903 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12904                                            const RegionCodeGenTy &MasterOpGen,
12905                                            SourceLocation Loc) {
12906   llvm_unreachable("Not supported in SIMD-only mode");
12907 }
12908 
12909 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12910                                            const RegionCodeGenTy &MasterOpGen,
12911                                            SourceLocation Loc,
12912                                            const Expr *Filter) {
12913   llvm_unreachable("Not supported in SIMD-only mode");
12914 }
12915 
12916 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12917                                             SourceLocation Loc) {
12918   llvm_unreachable("Not supported in SIMD-only mode");
12919 }
12920 
12921 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12922     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12923     SourceLocation Loc) {
12924   llvm_unreachable("Not supported in SIMD-only mode");
12925 }
12926 
12927 void CGOpenMPSIMDRuntime::emitSingleRegion(
12928     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12929     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12930     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12931     ArrayRef<const Expr *> AssignmentOps) {
12932   llvm_unreachable("Not supported in SIMD-only mode");
12933 }
12934 
12935 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12936                                             const RegionCodeGenTy &OrderedOpGen,
12937                                             SourceLocation Loc,
12938                                             bool IsThreads) {
12939   llvm_unreachable("Not supported in SIMD-only mode");
12940 }
12941 
12942 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12943                                           SourceLocation Loc,
12944                                           OpenMPDirectiveKind Kind,
12945                                           bool EmitChecks,
12946                                           bool ForceSimpleCall) {
12947   llvm_unreachable("Not supported in SIMD-only mode");
12948 }
12949 
12950 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12951     CodeGenFunction &CGF, SourceLocation Loc,
12952     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12953     bool Ordered, const DispatchRTInput &DispatchValues) {
12954   llvm_unreachable("Not supported in SIMD-only mode");
12955 }
12956 
12957 void CGOpenMPSIMDRuntime::emitForStaticInit(
12958     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12959     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12960   llvm_unreachable("Not supported in SIMD-only mode");
12961 }
12962 
12963 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12964     CodeGenFunction &CGF, SourceLocation Loc,
12965     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12966   llvm_unreachable("Not supported in SIMD-only mode");
12967 }
12968 
12969 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12970                                                      SourceLocation Loc,
12971                                                      unsigned IVSize,
12972                                                      bool IVSigned) {
12973   llvm_unreachable("Not supported in SIMD-only mode");
12974 }
12975 
12976 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12977                                               SourceLocation Loc,
12978                                               OpenMPDirectiveKind DKind) {
12979   llvm_unreachable("Not supported in SIMD-only mode");
12980 }
12981 
12982 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12983                                               SourceLocation Loc,
12984                                               unsigned IVSize, bool IVSigned,
12985                                               Address IL, Address LB,
12986                                               Address UB, Address ST) {
12987   llvm_unreachable("Not supported in SIMD-only mode");
12988 }
12989 
12990 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12991                                                llvm::Value *NumThreads,
12992                                                SourceLocation Loc) {
12993   llvm_unreachable("Not supported in SIMD-only mode");
12994 }
12995 
12996 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12997                                              ProcBindKind ProcBind,
12998                                              SourceLocation Loc) {
12999   llvm_unreachable("Not supported in SIMD-only mode");
13000 }
13001 
13002 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13003                                                     const VarDecl *VD,
13004                                                     Address VDAddr,
13005                                                     SourceLocation Loc) {
13006   llvm_unreachable("Not supported in SIMD-only mode");
13007 }
13008 
13009 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13010     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13011     CodeGenFunction *CGF) {
13012   llvm_unreachable("Not supported in SIMD-only mode");
13013 }
13014 
13015 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13016     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13017   llvm_unreachable("Not supported in SIMD-only mode");
13018 }
13019 
13020 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13021                                     ArrayRef<const Expr *> Vars,
13022                                     SourceLocation Loc,
13023                                     llvm::AtomicOrdering AO) {
13024   llvm_unreachable("Not supported in SIMD-only mode");
13025 }
13026 
13027 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13028                                        const OMPExecutableDirective &D,
13029                                        llvm::Function *TaskFunction,
13030                                        QualType SharedsTy, Address Shareds,
13031                                        const Expr *IfCond,
13032                                        const OMPTaskDataTy &Data) {
13033   llvm_unreachable("Not supported in SIMD-only mode");
13034 }
13035 
13036 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13037     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13038     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13039     const Expr *IfCond, const OMPTaskDataTy &Data) {
13040   llvm_unreachable("Not supported in SIMD-only mode");
13041 }
13042 
13043 void CGOpenMPSIMDRuntime::emitReduction(
13044     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13045     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13046     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13047   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13048   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13049                                  ReductionOps, Options);
13050 }
13051 
13052 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13053     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13054     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13055   llvm_unreachable("Not supported in SIMD-only mode");
13056 }
13057 
13058 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13059                                                 SourceLocation Loc,
13060                                                 bool IsWorksharingReduction) {
13061   llvm_unreachable("Not supported in SIMD-only mode");
13062 }
13063 
13064 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13065                                                   SourceLocation Loc,
13066                                                   ReductionCodeGen &RCG,
13067                                                   unsigned N) {
13068   llvm_unreachable("Not supported in SIMD-only mode");
13069 }
13070 
13071 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13072                                                   SourceLocation Loc,
13073                                                   llvm::Value *ReductionsPtr,
13074                                                   LValue SharedLVal) {
13075   llvm_unreachable("Not supported in SIMD-only mode");
13076 }
13077 
13078 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13079                                            SourceLocation Loc,
13080                                            const OMPTaskDataTy &Data) {
13081   llvm_unreachable("Not supported in SIMD-only mode");
13082 }
13083 
13084 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13085     CodeGenFunction &CGF, SourceLocation Loc,
13086     OpenMPDirectiveKind CancelRegion) {
13087   llvm_unreachable("Not supported in SIMD-only mode");
13088 }
13089 
13090 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13091                                          SourceLocation Loc, const Expr *IfCond,
13092                                          OpenMPDirectiveKind CancelRegion) {
13093   llvm_unreachable("Not supported in SIMD-only mode");
13094 }
13095 
13096 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13097     const OMPExecutableDirective &D, StringRef ParentName,
13098     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13099     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13100   llvm_unreachable("Not supported in SIMD-only mode");
13101 }
13102 
13103 void CGOpenMPSIMDRuntime::emitTargetCall(
13104     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13105     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13106     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13107     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13108                                      const OMPLoopDirective &D)>
13109         SizeEmitter) {
13110   llvm_unreachable("Not supported in SIMD-only mode");
13111 }
13112 
13113 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13114   llvm_unreachable("Not supported in SIMD-only mode");
13115 }
13116 
13117 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13118   llvm_unreachable("Not supported in SIMD-only mode");
13119 }
13120 
13121 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13122   return false;
13123 }
13124 
13125 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13126                                         const OMPExecutableDirective &D,
13127                                         SourceLocation Loc,
13128                                         llvm::Function *OutlinedFn,
13129                                         ArrayRef<llvm::Value *> CapturedVars) {
13130   llvm_unreachable("Not supported in SIMD-only mode");
13131 }
13132 
13133 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13134                                              const Expr *NumTeams,
13135                                              const Expr *ThreadLimit,
13136                                              SourceLocation Loc) {
13137   llvm_unreachable("Not supported in SIMD-only mode");
13138 }
13139 
13140 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13141     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13142     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13143   llvm_unreachable("Not supported in SIMD-only mode");
13144 }
13145 
13146 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13147     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13148     const Expr *Device) {
13149   llvm_unreachable("Not supported in SIMD-only mode");
13150 }
13151 
13152 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13153                                            const OMPLoopDirective &D,
13154                                            ArrayRef<Expr *> NumIterations) {
13155   llvm_unreachable("Not supported in SIMD-only mode");
13156 }
13157 
13158 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13159                                               const OMPDependClause *C) {
13160   llvm_unreachable("Not supported in SIMD-only mode");
13161 }
13162 
13163 const VarDecl *
13164 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13165                                         const VarDecl *NativeParam) const {
13166   llvm_unreachable("Not supported in SIMD-only mode");
13167 }
13168 
13169 Address
13170 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13171                                          const VarDecl *NativeParam,
13172                                          const VarDecl *TargetParam) const {
13173   llvm_unreachable("Not supported in SIMD-only mode");
13174 }
13175