1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/Format.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <numeric>
43 
44 using namespace clang;
45 using namespace CodeGen;
46 using namespace llvm::omp;
47 
48 namespace {
49 /// Base class for handling code generation inside OpenMP regions.
50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51 public:
52   /// Kinds of OpenMP regions used in codegen.
53   enum CGOpenMPRegionKind {
54     /// Region with outlined function for standalone 'parallel'
55     /// directive.
56     ParallelOutlinedRegion,
57     /// Region with outlined function for standalone 'task' directive.
58     TaskOutlinedRegion,
59     /// Region for constructs that do not require function outlining,
60     /// like 'for', 'sections', 'atomic' etc. directives.
61     InlinedRegion,
62     /// Region with outlined function for standalone 'target' directive.
63     TargetRegion,
64   };
65 
66   CGOpenMPRegionInfo(const CapturedStmt &CS,
67                      const CGOpenMPRegionKind RegionKind,
68                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69                      bool HasCancel)
70       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72 
73   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75                      bool HasCancel)
76       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77         Kind(Kind), HasCancel(HasCancel) {}
78 
79   /// Get a variable or parameter for storing global thread id
80   /// inside OpenMP construct.
81   virtual const VarDecl *getThreadIDVariable() const = 0;
82 
83   /// Emit the captured statement body.
84   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85 
86   /// Get an LValue for the current ThreadID variable.
87   /// \return LValue for thread id variable. This LValue always has type int32*.
88   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89 
90   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91 
92   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93 
94   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95 
96   bool hasCancel() const { return HasCancel; }
97 
98   static bool classof(const CGCapturedStmtInfo *Info) {
99     return Info->getKind() == CR_OpenMP;
100   }
101 
102   ~CGOpenMPRegionInfo() override = default;
103 
104 protected:
105   CGOpenMPRegionKind RegionKind;
106   RegionCodeGenTy CodeGen;
107   OpenMPDirectiveKind Kind;
108   bool HasCancel;
109 };
110 
111 /// API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115                              const RegionCodeGenTy &CodeGen,
116                              OpenMPDirectiveKind Kind, bool HasCancel,
117                              StringRef HelperName)
118       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119                            HasCancel),
120         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
122   }
123 
124   /// Get a variable or parameter for storing global thread id
125   /// inside OpenMP construct.
126   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127 
128   /// Get the name of the capture helper.
129   StringRef getHelperName() const override { return HelperName; }
130 
131   static bool classof(const CGCapturedStmtInfo *Info) {
132     return CGOpenMPRegionInfo::classof(Info) &&
133            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134                ParallelOutlinedRegion;
135   }
136 
137 private:
138   /// A variable or parameter storing global thread id for OpenMP
139   /// constructs.
140   const VarDecl *ThreadIDVar;
141   StringRef HelperName;
142 };
143 
144 /// API for captured statement code generation in OpenMP constructs.
145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146 public:
147   class UntiedTaskActionTy final : public PrePostActionTy {
148     bool Untied;
149     const VarDecl *PartIDVar;
150     const RegionCodeGenTy UntiedCodeGen;
151     llvm::SwitchInst *UntiedSwitch = nullptr;
152 
153   public:
154     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155                        const RegionCodeGenTy &UntiedCodeGen)
156         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157     void Enter(CodeGenFunction &CGF) override {
158       if (Untied) {
159         // Emit task switching point.
160         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         llvm::Value *Res =
164             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167         CGF.EmitBlock(DoneBB);
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171                               CGF.Builder.GetInsertBlock());
172         emitUntiedSwitch(CGF);
173       }
174     }
175     void emitUntiedSwitch(CodeGenFunction &CGF) const {
176       if (Untied) {
177         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178             CGF.GetAddrOfLocalVar(PartIDVar),
179             PartIDVar->getType()->castAs<PointerType>());
180         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181                               PartIdLVal);
182         UntiedCodeGen(CGF);
183         CodeGenFunction::JumpDest CurPoint =
184             CGF.getJumpDestInCurrentScope(".untied.next.");
185         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188                               CGF.Builder.GetInsertBlock());
189         CGF.EmitBranchThroughCleanup(CurPoint);
190         CGF.EmitBlock(CurPoint.getBlock());
191       }
192     }
193     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194   };
195   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196                                  const VarDecl *ThreadIDVar,
197                                  const RegionCodeGenTy &CodeGen,
198                                  OpenMPDirectiveKind Kind, bool HasCancel,
199                                  const UntiedTaskActionTy &Action)
200       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201         ThreadIDVar(ThreadIDVar), Action(Action) {
202     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
203   }
204 
205   /// Get a variable or parameter for storing global thread id
206   /// inside OpenMP construct.
207   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208 
209   /// Get an LValue for the current ThreadID variable.
210   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211 
212   /// Get the name of the capture helper.
213   StringRef getHelperName() const override { return ".omp_outlined."; }
214 
215   void emitUntiedSwitch(CodeGenFunction &CGF) override {
216     Action.emitUntiedSwitch(CGF);
217   }
218 
219   static bool classof(const CGCapturedStmtInfo *Info) {
220     return CGOpenMPRegionInfo::classof(Info) &&
221            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222                TaskOutlinedRegion;
223   }
224 
225 private:
226   /// A variable or parameter storing global thread id for OpenMP
227   /// constructs.
228   const VarDecl *ThreadIDVar;
229   /// Action for emitting code for untied tasks.
230   const UntiedTaskActionTy &Action;
231 };
232 
233 /// API for inlined captured statement code generation in OpenMP
234 /// constructs.
235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236 public:
237   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238                             const RegionCodeGenTy &CodeGen,
239                             OpenMPDirectiveKind Kind, bool HasCancel)
240       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241         OldCSI(OldCSI),
242         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243 
244   // Retrieve the value of the context parameter.
245   llvm::Value *getContextValue() const override {
246     if (OuterRegionInfo)
247       return OuterRegionInfo->getContextValue();
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   void setContextValue(llvm::Value *V) override {
252     if (OuterRegionInfo) {
253       OuterRegionInfo->setContextValue(V);
254       return;
255     }
256     llvm_unreachable("No context value for inlined OpenMP region");
257   }
258 
259   /// Lookup the captured field decl for a variable.
260   const FieldDecl *lookup(const VarDecl *VD) const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->lookup(VD);
263     // If there is no outer outlined region,no need to lookup in a list of
264     // captured variables, we can use the original one.
265     return nullptr;
266   }
267 
268   FieldDecl *getThisFieldDecl() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThisFieldDecl();
271     return nullptr;
272   }
273 
274   /// Get a variable or parameter for storing global thread id
275   /// inside OpenMP construct.
276   const VarDecl *getThreadIDVariable() const override {
277     if (OuterRegionInfo)
278       return OuterRegionInfo->getThreadIDVariable();
279     return nullptr;
280   }
281 
282   /// Get an LValue for the current ThreadID variable.
283   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284     if (OuterRegionInfo)
285       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286     llvm_unreachable("No LValue for inlined OpenMP construct");
287   }
288 
289   /// Get the name of the capture helper.
290   StringRef getHelperName() const override {
291     if (auto *OuterRegionInfo = getOldCSI())
292       return OuterRegionInfo->getHelperName();
293     llvm_unreachable("No helper name for inlined OpenMP construct");
294   }
295 
296   void emitUntiedSwitch(CodeGenFunction &CGF) override {
297     if (OuterRegionInfo)
298       OuterRegionInfo->emitUntiedSwitch(CGF);
299   }
300 
301   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302 
303   static bool classof(const CGCapturedStmtInfo *Info) {
304     return CGOpenMPRegionInfo::classof(Info) &&
305            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306   }
307 
308   ~CGOpenMPInlinedRegionInfo() override = default;
309 
310 private:
311   /// CodeGen info about outer OpenMP region.
312   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313   CGOpenMPRegionInfo *OuterRegionInfo;
314 };
315 
316 /// API for captured statement code generation in OpenMP target
317 /// constructs. For this captures, implicit parameters are used instead of the
318 /// captured fields. The name of the target region has to be unique in a given
319 /// application so it is provided by the client, because only the client has
320 /// the information to generate that.
321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322 public:
323   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
325       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326                            /*HasCancel=*/false),
327         HelperName(HelperName) {}
328 
329   /// This is unused for target regions because each starts executing
330   /// with a single thread.
331   const VarDecl *getThreadIDVariable() const override { return nullptr; }
332 
333   /// Get the name of the capture helper.
334   StringRef getHelperName() const override { return HelperName; }
335 
336   static bool classof(const CGCapturedStmtInfo *Info) {
337     return CGOpenMPRegionInfo::classof(Info) &&
338            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339   }
340 
341 private:
342   StringRef HelperName;
343 };
344 
345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346   llvm_unreachable("No codegen for expressions");
347 }
348 /// API for generation of expressions captured in a innermost OpenMP
349 /// region.
350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351 public:
352   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354                                   OMPD_unknown,
355                                   /*HasCancel=*/false),
356         PrivScope(CGF) {
357     // Make sure the globals captured in the provided statement are local by
358     // using the privatization logic. We assume the same variable is not
359     // captured more than once.
360     for (const auto &C : CS.captures()) {
361       if (!C.capturesVariable() && !C.capturesVariableByCopy())
362         continue;
363 
364       const VarDecl *VD = C.getCapturedVar();
365       if (VD->isLocalVarDeclOrParm())
366         continue;
367 
368       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369                       /*RefersToEnclosingVariableOrCapture=*/false,
370                       VD->getType().getNonReferenceType(), VK_LValue,
371                       C.getLocation());
372       PrivScope.addPrivate(
373           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 namespace {
481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
482 /// Values for bit flags for marking which requires clauses have been used.
483 enum OpenMPOffloadingRequiresDirFlags : int64_t {
484   /// flag undefined.
485   OMP_REQ_UNDEFINED               = 0x000,
486   /// no requires clause present.
487   OMP_REQ_NONE                    = 0x001,
488   /// reverse_offload clause.
489   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
490   /// unified_address clause.
491   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
492   /// unified_shared_memory clause.
493   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
494   /// dynamic_allocators clause.
495   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
496   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
497 };
498 
499 enum OpenMPOffloadingReservedDeviceIDs {
500   /// Device ID if the device was not defined, runtime should get it
501   /// from environment variables in the spec.
502   OMP_DEVICEID_UNDEF = -1,
503 };
504 } // anonymous namespace
505 
506 /// Describes ident structure that describes a source location.
507 /// All descriptions are taken from
508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509 /// Original structure:
510 /// typedef struct ident {
511 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
512 ///                                  see above  */
513 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
514 ///                                  KMP_IDENT_KMPC identifies this union
515 ///                                  member  */
516 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
517 ///                                  see above */
518 ///#if USE_ITT_BUILD
519 ///                            /*  but currently used for storing
520 ///                                region-specific ITT */
521 ///                            /*  contextual information. */
522 ///#endif /* USE_ITT_BUILD */
523 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
524 ///                                 C++  */
525 ///    char const *psource;    /**< String describing the source location.
526 ///                            The string is composed of semi-colon separated
527 //                             fields which describe the source file,
528 ///                            the function and a pair of line numbers that
529 ///                            delimit the construct.
530 ///                             */
531 /// } ident_t;
532 enum IdentFieldIndex {
533   /// might be used in Fortran
534   IdentField_Reserved_1,
535   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536   IdentField_Flags,
537   /// Not really used in Fortran any more
538   IdentField_Reserved_2,
539   /// Source[4] in Fortran, do not use for C++
540   IdentField_Reserved_3,
541   /// String describing the source location. The string is composed of
542   /// semi-colon separated fields which describe the source file, the function
543   /// and a pair of line numbers that delimit the construct.
544   IdentField_PSource
545 };
546 
547 /// Schedule types for 'omp for' loops (these enumerators are taken from
548 /// the enum sched_type in kmp.h).
549 enum OpenMPSchedType {
550   /// Lower bound for default (unordered) versions.
551   OMP_sch_lower = 32,
552   OMP_sch_static_chunked = 33,
553   OMP_sch_static = 34,
554   OMP_sch_dynamic_chunked = 35,
555   OMP_sch_guided_chunked = 36,
556   OMP_sch_runtime = 37,
557   OMP_sch_auto = 38,
558   /// static with chunk adjustment (e.g., simd)
559   OMP_sch_static_balanced_chunked = 45,
560   /// Lower bound for 'ordered' versions.
561   OMP_ord_lower = 64,
562   OMP_ord_static_chunked = 65,
563   OMP_ord_static = 66,
564   OMP_ord_dynamic_chunked = 67,
565   OMP_ord_guided_chunked = 68,
566   OMP_ord_runtime = 69,
567   OMP_ord_auto = 70,
568   OMP_sch_default = OMP_sch_static,
569   /// dist_schedule types
570   OMP_dist_sch_static_chunked = 91,
571   OMP_dist_sch_static = 92,
572   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573   /// Set if the monotonic schedule modifier was present.
574   OMP_sch_modifier_monotonic = (1 << 29),
575   /// Set if the nonmonotonic schedule modifier was present.
576   OMP_sch_modifier_nonmonotonic = (1 << 30),
577 };
578 
579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580 /// region.
581 class CleanupTy final : public EHScopeStack::Cleanup {
582   PrePostActionTy *Action;
583 
584 public:
585   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587     if (!CGF.HaveInsertPoint())
588       return;
589     Action->Exit(CGF);
590   }
591 };
592 
593 } // anonymous namespace
594 
595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596   CodeGenFunction::RunCleanupsScope Scope(CGF);
597   if (PrePostAction) {
598     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599     Callback(CodeGen, CGF, *PrePostAction);
600   } else {
601     PrePostActionTy Action;
602     Callback(CodeGen, CGF, Action);
603   }
604 }
605 
606 /// Check if the combiner is a call to UDR combiner and if it is so return the
607 /// UDR decl used for reduction.
608 static const OMPDeclareReductionDecl *
609 getReductionInit(const Expr *ReductionOp) {
610   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612       if (const auto *DRE =
613               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615           return DRD;
616   return nullptr;
617 }
618 
619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620                                              const OMPDeclareReductionDecl *DRD,
621                                              const Expr *InitOp,
622                                              Address Private, Address Original,
623                                              QualType Ty) {
624   if (DRD->getInitializer()) {
625     std::pair<llvm::Function *, llvm::Function *> Reduction =
626         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627     const auto *CE = cast<CallExpr>(InitOp);
628     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631     const auto *LHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633     const auto *RHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637                             [=]() { return Private; });
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639                             [=]() { return Original; });
640     (void)PrivateScope.Privatize();
641     RValue Func = RValue::get(Reduction.second);
642     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643     CGF.EmitIgnoredExpr(InitOp);
644   } else {
645     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647     auto *GV = new llvm::GlobalVariable(
648         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649         llvm::GlobalValue::PrivateLinkage, Init, Name);
650     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651     RValue InitRVal;
652     switch (CGF.getEvaluationKind(Ty)) {
653     case TEK_Scalar:
654       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655       break;
656     case TEK_Complex:
657       InitRVal =
658           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659       break;
660     case TEK_Aggregate: {
661       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664                            /*IsInitializer=*/false);
665       return;
666     }
667     }
668     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671                          /*IsInitializer=*/false);
672   }
673 }
674 
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681                                  QualType Type, bool EmitDeclareReductionInit,
682                                  const Expr *Init,
683                                  const OMPDeclareReductionDecl *DRD,
684                                  Address SrcAddr = Address::invalid()) {
685   // Perform element-by-element initialization.
686   QualType ElementTy;
687 
688   // Drill down to the base element type on both arrays.
689   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691   if (DRD)
692     SrcAddr =
693         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 
695   llvm::Value *SrcBegin = nullptr;
696   if (DRD)
697     SrcBegin = SrcAddr.getPointer();
698   llvm::Value *DestBegin = DestAddr.getPointer();
699   // Cast from pointer to array type to pointer to single element.
700   llvm::Value *DestEnd =
701       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
702   // The basic structure here is a while-do loop.
703   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705   llvm::Value *IsEmpty =
706       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 
709   // Enter the loop body, making that address the current address.
710   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711   CGF.EmitBlock(BodyBB);
712 
713   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 
715   llvm::PHINode *SrcElementPHI = nullptr;
716   Address SrcElementCurrent = Address::invalid();
717   if (DRD) {
718     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                           "omp.arraycpy.srcElementPast");
720     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721     SrcElementCurrent =
722         Address(SrcElementPHI,
723                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724   }
725   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727   DestElementPHI->addIncoming(DestBegin, EntryBB);
728   Address DestElementCurrent =
729       Address(DestElementPHI,
730               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 
732   // Emit copy.
733   {
734     CodeGenFunction::RunCleanupsScope InitScope(CGF);
735     if (EmitDeclareReductionInit) {
736       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                        SrcElementCurrent, ElementTy);
738     } else
739       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                            /*IsInitializer=*/false);
741   }
742 
743   if (DRD) {
744     // Shift the address forward by one element.
745     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
747         "omp.arraycpy.dest.element");
748     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
749   }
750 
751   // Shift the address forward by one element.
752   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
753       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
754       "omp.arraycpy.dest.element");
755   // Check whether we've reached the end.
756   llvm::Value *Done =
757       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
758   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
759   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
760 
761   // Done.
762   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
763 }
764 
765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
766   return CGF.EmitOMPSharedLValue(E);
767 }
768 
769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
770                                             const Expr *E) {
771   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
772     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
773   return LValue();
774 }
775 
776 void ReductionCodeGen::emitAggregateInitialization(
777     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
778     const OMPDeclareReductionDecl *DRD) {
779   // Emit VarDecl with copy init for arrays.
780   // Get the address of the original variable captured in current
781   // captured region.
782   const auto *PrivateVD =
783       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
784   bool EmitDeclareReductionInit =
785       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
786   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
787                        EmitDeclareReductionInit,
788                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
789                                                 : PrivateVD->getInit(),
790                        DRD, SharedAddr);
791 }
792 
793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
794                                    ArrayRef<const Expr *> Origs,
795                                    ArrayRef<const Expr *> Privates,
796                                    ArrayRef<const Expr *> ReductionOps) {
797   ClausesData.reserve(Shareds.size());
798   SharedAddresses.reserve(Shareds.size());
799   Sizes.reserve(Shareds.size());
800   BaseDecls.reserve(Shareds.size());
801   const auto *IOrig = Origs.begin();
802   const auto *IPriv = Privates.begin();
803   const auto *IRed = ReductionOps.begin();
804   for (const Expr *Ref : Shareds) {
805     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
806     std::advance(IOrig, 1);
807     std::advance(IPriv, 1);
808     std::advance(IRed, 1);
809   }
810 }
811 
812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
813   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
814          "Number of generated lvalues must be exactly N.");
815   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
816   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
817   SharedAddresses.emplace_back(First, Second);
818   if (ClausesData[N].Shared == ClausesData[N].Ref) {
819     OrigAddresses.emplace_back(First, Second);
820   } else {
821     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
822     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
823     OrigAddresses.emplace_back(First, Second);
824   }
825 }
826 
827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
828   const auto *PrivateVD =
829       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
830   QualType PrivateType = PrivateVD->getType();
831   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
832   if (!PrivateType->isVariablyModifiedType()) {
833     Sizes.emplace_back(
834         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
835         nullptr);
836     return;
837   }
838   llvm::Value *Size;
839   llvm::Value *SizeInChars;
840   auto *ElemType =
841       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
842           ->getElementType();
843   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
844   if (AsArraySection) {
845     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
846                                      OrigAddresses[N].first.getPointer(CGF));
847     Size = CGF.Builder.CreateNUWAdd(
848         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
849     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
850   } else {
851     SizeInChars =
852         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
853     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
854   }
855   Sizes.emplace_back(SizeInChars, Size);
856   CodeGenFunction::OpaqueValueMapping OpaqueMap(
857       CGF,
858       cast<OpaqueValueExpr>(
859           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
860       RValue::get(Size));
861   CGF.EmitVariablyModifiedType(PrivateType);
862 }
863 
864 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
865                                          llvm::Value *Size) {
866   const auto *PrivateVD =
867       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
868   QualType PrivateType = PrivateVD->getType();
869   if (!PrivateType->isVariablyModifiedType()) {
870     assert(!Size && !Sizes[N].second &&
871            "Size should be nullptr for non-variably modified reduction "
872            "items.");
873     return;
874   }
875   CodeGenFunction::OpaqueValueMapping OpaqueMap(
876       CGF,
877       cast<OpaqueValueExpr>(
878           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
879       RValue::get(Size));
880   CGF.EmitVariablyModifiedType(PrivateType);
881 }
882 
883 void ReductionCodeGen::emitInitialization(
884     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
885     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
886   assert(SharedAddresses.size() > N && "No variable was generated");
887   const auto *PrivateVD =
888       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
889   const OMPDeclareReductionDecl *DRD =
890       getReductionInit(ClausesData[N].ReductionOp);
891   QualType PrivateType = PrivateVD->getType();
892   PrivateAddr = CGF.Builder.CreateElementBitCast(
893       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
894   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
895     if (DRD && DRD->getInitializer())
896       (void)DefaultInit(CGF);
897     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
898   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
899     (void)DefaultInit(CGF);
900     QualType SharedType = SharedAddresses[N].first.getType();
901     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
902                                      PrivateAddr, SharedAddr, SharedType);
903   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
904              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
905     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
906                          PrivateVD->getType().getQualifiers(),
907                          /*IsInitializer=*/false);
908   }
909 }
910 
911 bool ReductionCodeGen::needCleanups(unsigned N) {
912   const auto *PrivateVD =
913       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914   QualType PrivateType = PrivateVD->getType();
915   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
916   return DTorKind != QualType::DK_none;
917 }
918 
919 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
920                                     Address PrivateAddr) {
921   const auto *PrivateVD =
922       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
923   QualType PrivateType = PrivateVD->getType();
924   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
925   if (needCleanups(N)) {
926     PrivateAddr = CGF.Builder.CreateElementBitCast(
927         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
928     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
929   }
930 }
931 
932 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
933                           LValue BaseLV) {
934   BaseTy = BaseTy.getNonReferenceType();
935   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
936          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
937     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
938       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
939     } else {
940       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
941       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
942     }
943     BaseTy = BaseTy->getPointeeType();
944   }
945   return CGF.MakeAddrLValue(
946       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
947                                        CGF.ConvertTypeForMem(ElTy)),
948       BaseLV.getType(), BaseLV.getBaseInfo(),
949       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
950 }
951 
952 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
953                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
954                           llvm::Value *Addr) {
955   Address Tmp = Address::invalid();
956   Address TopTmp = Address::invalid();
957   Address MostTopTmp = Address::invalid();
958   BaseTy = BaseTy.getNonReferenceType();
959   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
960          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
961     Tmp = CGF.CreateMemTemp(BaseTy);
962     if (TopTmp.isValid())
963       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
964     else
965       MostTopTmp = Tmp;
966     TopTmp = Tmp;
967     BaseTy = BaseTy->getPointeeType();
968   }
969   llvm::Type *Ty = BaseLVType;
970   if (Tmp.isValid())
971     Ty = Tmp.getElementType();
972   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
973   if (Tmp.isValid()) {
974     CGF.Builder.CreateStore(Addr, Tmp);
975     return MostTopTmp;
976   }
977   return Address(Addr, BaseLVAlignment);
978 }
979 
980 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
981   const VarDecl *OrigVD = nullptr;
982   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
983     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
984     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
985       Base = TempOASE->getBase()->IgnoreParenImpCasts();
986     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987       Base = TempASE->getBase()->IgnoreParenImpCasts();
988     DE = cast<DeclRefExpr>(Base);
989     OrigVD = cast<VarDecl>(DE->getDecl());
990   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
991     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
992     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
993       Base = TempASE->getBase()->IgnoreParenImpCasts();
994     DE = cast<DeclRefExpr>(Base);
995     OrigVD = cast<VarDecl>(DE->getDecl());
996   }
997   return OrigVD;
998 }
999 
1000 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1001                                                Address PrivateAddr) {
1002   const DeclRefExpr *DE;
1003   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1004     BaseDecls.emplace_back(OrigVD);
1005     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1006     LValue BaseLValue =
1007         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1008                     OriginalBaseLValue);
1009     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1010     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1011         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1012     llvm::Value *PrivatePointer =
1013         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1014             PrivateAddr.getPointer(), SharedAddr.getType());
1015     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1016         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1017     return castToBase(CGF, OrigVD->getType(),
1018                       SharedAddresses[N].first.getType(),
1019                       OriginalBaseLValue.getAddress(CGF).getType(),
1020                       OriginalBaseLValue.getAlignment(), Ptr);
1021   }
1022   BaseDecls.emplace_back(
1023       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1024   return PrivateAddr;
1025 }
1026 
1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1028   const OMPDeclareReductionDecl *DRD =
1029       getReductionInit(ClausesData[N].ReductionOp);
1030   return DRD && DRD->getInitializer();
1031 }
1032 
1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1034   return CGF.EmitLoadOfPointerLValue(
1035       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1036       getThreadIDVariable()->getType()->castAs<PointerType>());
1037 }
1038 
1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1040   if (!CGF.HaveInsertPoint())
1041     return;
1042   // 1.2.2 OpenMP Language Terminology
1043   // Structured block - An executable statement with a single entry at the
1044   // top and a single exit at the bottom.
1045   // The point of exit cannot be a branch out of the structured block.
1046   // longjmp() and throw() must not violate the entry/exit criteria.
1047   CGF.EHStack.pushTerminate();
1048   if (S)
1049     CGF.incrementProfileCounter(S);
1050   CodeGen(CGF);
1051   CGF.EHStack.popTerminate();
1052 }
1053 
1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1055     CodeGenFunction &CGF) {
1056   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1057                             getThreadIDVariable()->getType(),
1058                             AlignmentSource::Decl);
1059 }
1060 
1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1062                                        QualType FieldTy) {
1063   auto *Field = FieldDecl::Create(
1064       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1065       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1066       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1067   Field->setAccess(AS_public);
1068   DC->addDecl(Field);
1069   return Field;
1070 }
1071 
1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1073                                  StringRef Separator)
1074     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1075       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1076   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1077 
1078   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1079   OMPBuilder.initialize();
1080   loadOffloadInfoMetadata();
1081 }
1082 
1083 void CGOpenMPRuntime::clear() {
1084   InternalVars.clear();
1085   // Clean non-target variable declarations possibly used only in debug info.
1086   for (const auto &Data : EmittedNonTargetVariables) {
1087     if (!Data.getValue().pointsToAliveValue())
1088       continue;
1089     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1090     if (!GV)
1091       continue;
1092     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1093       continue;
1094     GV->eraseFromParent();
1095   }
1096 }
1097 
1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1099   SmallString<128> Buffer;
1100   llvm::raw_svector_ostream OS(Buffer);
1101   StringRef Sep = FirstSeparator;
1102   for (StringRef Part : Parts) {
1103     OS << Sep << Part;
1104     Sep = Separator;
1105   }
1106   return std::string(OS.str());
1107 }
1108 
1109 static llvm::Function *
1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1111                           const Expr *CombinerInitializer, const VarDecl *In,
1112                           const VarDecl *Out, bool IsCombiner) {
1113   // void .omp_combiner.(Ty *in, Ty *out);
1114   ASTContext &C = CGM.getContext();
1115   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1116   FunctionArgList Args;
1117   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1118                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1119   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1120                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121   Args.push_back(&OmpOutParm);
1122   Args.push_back(&OmpInParm);
1123   const CGFunctionInfo &FnInfo =
1124       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1125   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1126   std::string Name = CGM.getOpenMPRuntime().getName(
1127       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1128   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1129                                     Name, &CGM.getModule());
1130   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1131   if (CGM.getLangOpts().Optimize) {
1132     Fn->removeFnAttr(llvm::Attribute::NoInline);
1133     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1134     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1135   }
1136   CodeGenFunction CGF(CGM);
1137   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1138   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1139   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1140                     Out->getLocation());
1141   CodeGenFunction::OMPPrivateScope Scope(CGF);
1142   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1143   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1144     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1145         .getAddress(CGF);
1146   });
1147   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1148   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1149     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1150         .getAddress(CGF);
1151   });
1152   (void)Scope.Privatize();
1153   if (!IsCombiner && Out->hasInit() &&
1154       !CGF.isTrivialInitializer(Out->getInit())) {
1155     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1156                          Out->getType().getQualifiers(),
1157                          /*IsInitializer=*/true);
1158   }
1159   if (CombinerInitializer)
1160     CGF.EmitIgnoredExpr(CombinerInitializer);
1161   Scope.ForceCleanup();
1162   CGF.FinishFunction();
1163   return Fn;
1164 }
1165 
1166 void CGOpenMPRuntime::emitUserDefinedReduction(
1167     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1168   if (UDRMap.count(D) > 0)
1169     return;
1170   llvm::Function *Combiner = emitCombinerOrInitializer(
1171       CGM, D->getType(), D->getCombiner(),
1172       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1173       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1174       /*IsCombiner=*/true);
1175   llvm::Function *Initializer = nullptr;
1176   if (const Expr *Init = D->getInitializer()) {
1177     Initializer = emitCombinerOrInitializer(
1178         CGM, D->getType(),
1179         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1180                                                                      : nullptr,
1181         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1182         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1183         /*IsCombiner=*/false);
1184   }
1185   UDRMap.try_emplace(D, Combiner, Initializer);
1186   if (CGF) {
1187     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1188     Decls.second.push_back(D);
1189   }
1190 }
1191 
1192 std::pair<llvm::Function *, llvm::Function *>
1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1194   auto I = UDRMap.find(D);
1195   if (I != UDRMap.end())
1196     return I->second;
1197   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1198   return UDRMap.lookup(D);
1199 }
1200 
1201 namespace {
1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1203 // Builder if one is present.
1204 struct PushAndPopStackRAII {
1205   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1206                       bool HasCancel, llvm::omp::Directive Kind)
1207       : OMPBuilder(OMPBuilder) {
1208     if (!OMPBuilder)
1209       return;
1210 
1211     // The following callback is the crucial part of clangs cleanup process.
1212     //
1213     // NOTE:
1214     // Once the OpenMPIRBuilder is used to create parallel regions (and
1215     // similar), the cancellation destination (Dest below) is determined via
1216     // IP. That means if we have variables to finalize we split the block at IP,
1217     // use the new block (=BB) as destination to build a JumpDest (via
1218     // getJumpDestInCurrentScope(BB)) which then is fed to
1219     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1220     // to push & pop an FinalizationInfo object.
1221     // The FiniCB will still be needed but at the point where the
1222     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1223     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1224       assert(IP.getBlock()->end() == IP.getPoint() &&
1225              "Clang CG should cause non-terminated block!");
1226       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1227       CGF.Builder.restoreIP(IP);
1228       CodeGenFunction::JumpDest Dest =
1229           CGF.getOMPCancelDestination(OMPD_parallel);
1230       CGF.EmitBranchThroughCleanup(Dest);
1231     };
1232 
1233     // TODO: Remove this once we emit parallel regions through the
1234     //       OpenMPIRBuilder as it can do this setup internally.
1235     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1236     OMPBuilder->pushFinalizationCB(std::move(FI));
1237   }
1238   ~PushAndPopStackRAII() {
1239     if (OMPBuilder)
1240       OMPBuilder->popFinalizationCB();
1241   }
1242   llvm::OpenMPIRBuilder *OMPBuilder;
1243 };
1244 } // namespace
1245 
1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1247     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1248     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1249     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1250   assert(ThreadIDVar->getType()->isPointerType() &&
1251          "thread id variable must be of type kmp_int32 *");
1252   CodeGenFunction CGF(CGM, true);
1253   bool HasCancel = false;
1254   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1255     HasCancel = OPD->hasCancel();
1256   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1257     HasCancel = OPD->hasCancel();
1258   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1259     HasCancel = OPSD->hasCancel();
1260   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1261     HasCancel = OPFD->hasCancel();
1262   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1263     HasCancel = OPFD->hasCancel();
1264   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1265     HasCancel = OPFD->hasCancel();
1266   else if (const auto *OPFD =
1267                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272 
1273   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1274   //       parallel region to make cancellation barriers work properly.
1275   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1276   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1277   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1278                                     HasCancel, OutlinedHelperName);
1279   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1280   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1281 }
1282 
1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1284     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1286   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1287   return emitParallelOrTeamsOutlinedFunction(
1288       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1289 }
1290 
1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1292     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1295   return emitParallelOrTeamsOutlinedFunction(
1296       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297 }
1298 
1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1300     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1302     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1303     bool Tied, unsigned &NumberOfParts) {
1304   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1305                                               PrePostActionTy &) {
1306     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1307     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1308     llvm::Value *TaskArgs[] = {
1309         UpLoc, ThreadID,
1310         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1311                                     TaskTVar->getType()->castAs<PointerType>())
1312             .getPointer(CGF)};
1313     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1314                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1315                         TaskArgs);
1316   };
1317   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1318                                                             UntiedCodeGen);
1319   CodeGen.setAction(Action);
1320   assert(!ThreadIDVar->getType()->isPointerType() &&
1321          "thread id variable must be of type kmp_int32 for tasks");
1322   const OpenMPDirectiveKind Region =
1323       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1324                                                       : OMPD_task;
1325   const CapturedStmt *CS = D.getCapturedStmt(Region);
1326   bool HasCancel = false;
1327   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1328     HasCancel = TD->hasCancel();
1329   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1330     HasCancel = TD->hasCancel();
1331   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1332     HasCancel = TD->hasCancel();
1333   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335 
1336   CodeGenFunction CGF(CGM, true);
1337   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1338                                         InnermostKind, HasCancel, Action);
1339   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1340   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1341   if (!Tied)
1342     NumberOfParts = Action.getNumberOfParts();
1343   return Res;
1344 }
1345 
1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1347                              const RecordDecl *RD, const CGRecordLayout &RL,
1348                              ArrayRef<llvm::Constant *> Data) {
1349   llvm::StructType *StructTy = RL.getLLVMType();
1350   unsigned PrevIdx = 0;
1351   ConstantInitBuilder CIBuilder(CGM);
1352   auto DI = Data.begin();
1353   for (const FieldDecl *FD : RD->fields()) {
1354     unsigned Idx = RL.getLLVMFieldNo(FD);
1355     // Fill the alignment.
1356     for (unsigned I = PrevIdx; I < Idx; ++I)
1357       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1358     PrevIdx = Idx + 1;
1359     Fields.add(*DI);
1360     ++DI;
1361   }
1362 }
1363 
1364 template <class... As>
1365 static llvm::GlobalVariable *
1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1367                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1368                    As &&... Args) {
1369   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1370   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1371   ConstantInitBuilder CIBuilder(CGM);
1372   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1373   buildStructValue(Fields, CGM, RD, RL, Data);
1374   return Fields.finishAndCreateGlobal(
1375       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1376       std::forward<As>(Args)...);
1377 }
1378 
1379 template <typename T>
1380 static void
1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1382                                          ArrayRef<llvm::Constant *> Data,
1383                                          T &Parent) {
1384   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1385   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1386   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1387   buildStructValue(Fields, CGM, RD, RL, Data);
1388   Fields.finishAndAddTo(Parent);
1389 }
1390 
1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1392                                              bool AtCurrentPoint) {
1393   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1394   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1395 
1396   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1397   if (AtCurrentPoint) {
1398     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1399         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1400   } else {
1401     Elem.second.ServiceInsertPt =
1402         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1403     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1404   }
1405 }
1406 
1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1408   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1409   if (Elem.second.ServiceInsertPt) {
1410     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1411     Elem.second.ServiceInsertPt = nullptr;
1412     Ptr->eraseFromParent();
1413   }
1414 }
1415 
1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1417                                                   SourceLocation Loc,
1418                                                   SmallString<128> &Buffer) {
1419   llvm::raw_svector_ostream OS(Buffer);
1420   // Build debug location
1421   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1422   OS << ";" << PLoc.getFilename() << ";";
1423   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1424     OS << FD->getQualifiedNameAsString();
1425   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1426   return OS.str();
1427 }
1428 
1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1430                                                  SourceLocation Loc,
1431                                                  unsigned Flags) {
1432   uint32_t SrcLocStrSize;
1433   llvm::Constant *SrcLocStr;
1434   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1435       Loc.isInvalid()) {
1436     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1437   } else {
1438     std::string FunctionName;
1439     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1440       FunctionName = FD->getQualifiedNameAsString();
1441     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1442     const char *FileName = PLoc.getFilename();
1443     unsigned Line = PLoc.getLine();
1444     unsigned Column = PLoc.getColumn();
1445     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1446                                                 Column, SrcLocStrSize);
1447   }
1448   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1449   return OMPBuilder.getOrCreateIdent(
1450       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1451 }
1452 
1453 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1454                                           SourceLocation Loc) {
1455   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1457   // the clang invariants used below might be broken.
1458   if (CGM.getLangOpts().OpenMPIRBuilder) {
1459     SmallString<128> Buffer;
1460     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1461     uint32_t SrcLocStrSize;
1462     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1463         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1464     return OMPBuilder.getOrCreateThreadID(
1465         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1466   }
1467 
1468   llvm::Value *ThreadID = nullptr;
1469   // Check whether we've already cached a load of the thread id in this
1470   // function.
1471   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472   if (I != OpenMPLocThreadIDMap.end()) {
1473     ThreadID = I->second.ThreadID;
1474     if (ThreadID != nullptr)
1475       return ThreadID;
1476   }
1477   // If exceptions are enabled, do not use parameter to avoid possible crash.
1478   if (auto *OMPRegionInfo =
1479           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1480     if (OMPRegionInfo->getThreadIDVariable()) {
1481       // Check if this an outlined function with thread id passed as argument.
1482       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1483       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1484       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1485           !CGF.getLangOpts().CXXExceptions ||
1486           CGF.Builder.GetInsertBlock() == TopBlock ||
1487           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1488           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1489               TopBlock ||
1490           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1491               CGF.Builder.GetInsertBlock()) {
1492         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1493         // If value loaded in entry block, cache it and use it everywhere in
1494         // function.
1495         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1496           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1497           Elem.second.ThreadID = ThreadID;
1498         }
1499         return ThreadID;
1500       }
1501     }
1502   }
1503 
1504   // This is not an outlined function region - need to call __kmpc_int32
1505   // kmpc_global_thread_num(ident_t *loc).
1506   // Generate thread id value and cache this value for use across the
1507   // function.
1508   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509   if (!Elem.second.ServiceInsertPt)
1510     setLocThreadIdInsertPt(CGF);
1511   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1512   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1513   llvm::CallInst *Call = CGF.Builder.CreateCall(
1514       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1515                                             OMPRTL___kmpc_global_thread_num),
1516       emitUpdateLocation(CGF, Loc));
1517   Call->setCallingConv(CGF.getRuntimeCC());
1518   Elem.second.ThreadID = Call;
1519   return Call;
1520 }
1521 
1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1523   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1524   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1525     clearLocThreadIdInsertPt(CGF);
1526     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1527   }
1528   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1529     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1530       UDRMap.erase(D);
1531     FunctionUDRMap.erase(CGF.CurFn);
1532   }
1533   auto I = FunctionUDMMap.find(CGF.CurFn);
1534   if (I != FunctionUDMMap.end()) {
1535     for(const auto *D : I->second)
1536       UDMMap.erase(D);
1537     FunctionUDMMap.erase(I);
1538   }
1539   LastprivateConditionalToTypes.erase(CGF.CurFn);
1540   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1541 }
1542 
1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1544   return OMPBuilder.IdentPtr;
1545 }
1546 
1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1548   if (!Kmpc_MicroTy) {
1549     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1550     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1551                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1552     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1553   }
1554   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1555 }
1556 
1557 llvm::FunctionCallee
1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1559                                              bool IsGPUDistribute) {
1560   assert((IVSize == 32 || IVSize == 64) &&
1561          "IV size is not compatible with the omp runtime");
1562   StringRef Name;
1563   if (IsGPUDistribute)
1564     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1565                                     : "__kmpc_distribute_static_init_4u")
1566                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1567                                     : "__kmpc_distribute_static_init_8u");
1568   else
1569     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1570                                     : "__kmpc_for_static_init_4u")
1571                         : (IVSigned ? "__kmpc_for_static_init_8"
1572                                     : "__kmpc_for_static_init_8u");
1573 
1574   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1575   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1576   llvm::Type *TypeParams[] = {
1577     getIdentTyPointerTy(),                     // loc
1578     CGM.Int32Ty,                               // tid
1579     CGM.Int32Ty,                               // schedtype
1580     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1581     PtrTy,                                     // p_lower
1582     PtrTy,                                     // p_upper
1583     PtrTy,                                     // p_stride
1584     ITy,                                       // incr
1585     ITy                                        // chunk
1586   };
1587   auto *FnTy =
1588       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1589   return CGM.CreateRuntimeFunction(FnTy, Name);
1590 }
1591 
1592 llvm::FunctionCallee
1593 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1594   assert((IVSize == 32 || IVSize == 64) &&
1595          "IV size is not compatible with the omp runtime");
1596   StringRef Name =
1597       IVSize == 32
1598           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1599           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1600   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1601   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1602                                CGM.Int32Ty,           // tid
1603                                CGM.Int32Ty,           // schedtype
1604                                ITy,                   // lower
1605                                ITy,                   // upper
1606                                ITy,                   // stride
1607                                ITy                    // chunk
1608   };
1609   auto *FnTy =
1610       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1611   return CGM.CreateRuntimeFunction(FnTy, Name);
1612 }
1613 
1614 llvm::FunctionCallee
1615 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1616   assert((IVSize == 32 || IVSize == 64) &&
1617          "IV size is not compatible with the omp runtime");
1618   StringRef Name =
1619       IVSize == 32
1620           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1621           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1622   llvm::Type *TypeParams[] = {
1623       getIdentTyPointerTy(), // loc
1624       CGM.Int32Ty,           // tid
1625   };
1626   auto *FnTy =
1627       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1628   return CGM.CreateRuntimeFunction(FnTy, Name);
1629 }
1630 
1631 llvm::FunctionCallee
1632 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1633   assert((IVSize == 32 || IVSize == 64) &&
1634          "IV size is not compatible with the omp runtime");
1635   StringRef Name =
1636       IVSize == 32
1637           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1638           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1639   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1640   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1641   llvm::Type *TypeParams[] = {
1642     getIdentTyPointerTy(),                     // loc
1643     CGM.Int32Ty,                               // tid
1644     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1645     PtrTy,                                     // p_lower
1646     PtrTy,                                     // p_upper
1647     PtrTy                                      // p_stride
1648   };
1649   auto *FnTy =
1650       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1651   return CGM.CreateRuntimeFunction(FnTy, Name);
1652 }
1653 
1654 /// Obtain information that uniquely identifies a target entry. This
1655 /// consists of the file and device IDs as well as line number associated with
1656 /// the relevant entry source location.
1657 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1658                                      unsigned &DeviceID, unsigned &FileID,
1659                                      unsigned &LineNum) {
1660   SourceManager &SM = C.getSourceManager();
1661 
1662   // The loc should be always valid and have a file ID (the user cannot use
1663   // #pragma directives in macros)
1664 
1665   assert(Loc.isValid() && "Source location is expected to be always valid.");
1666 
1667   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1668   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1669 
1670   llvm::sys::fs::UniqueID ID;
1671   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1672     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1673     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1674     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1675       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1676           << PLoc.getFilename() << EC.message();
1677   }
1678 
1679   DeviceID = ID.getDevice();
1680   FileID = ID.getFile();
1681   LineNum = PLoc.getLine();
1682 }
1683 
1684 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1685   if (CGM.getLangOpts().OpenMPSimd)
1686     return Address::invalid();
1687   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1688       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1689   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1690               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1691                HasRequiresUnifiedSharedMemory))) {
1692     SmallString<64> PtrName;
1693     {
1694       llvm::raw_svector_ostream OS(PtrName);
1695       OS << CGM.getMangledName(GlobalDecl(VD));
1696       if (!VD->isExternallyVisible()) {
1697         unsigned DeviceID, FileID, Line;
1698         getTargetEntryUniqueInfo(CGM.getContext(),
1699                                  VD->getCanonicalDecl()->getBeginLoc(),
1700                                  DeviceID, FileID, Line);
1701         OS << llvm::format("_%x", FileID);
1702       }
1703       OS << "_decl_tgt_ref_ptr";
1704     }
1705     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1706     if (!Ptr) {
1707       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1708       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1709                                         PtrName);
1710 
1711       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1712       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1713 
1714       if (!CGM.getLangOpts().OpenMPIsDevice)
1715         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1716       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1717     }
1718     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1719   }
1720   return Address::invalid();
1721 }
1722 
1723 llvm::Constant *
1724 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1725   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1726          !CGM.getContext().getTargetInfo().isTLSSupported());
1727   // Lookup the entry, lazily creating it if necessary.
1728   std::string Suffix = getName({"cache", ""});
1729   return getOrCreateInternalVariable(
1730       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1731 }
1732 
1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1734                                                 const VarDecl *VD,
1735                                                 Address VDAddr,
1736                                                 SourceLocation Loc) {
1737   if (CGM.getLangOpts().OpenMPUseTLS &&
1738       CGM.getContext().getTargetInfo().isTLSSupported())
1739     return VDAddr;
1740 
1741   llvm::Type *VarTy = VDAddr.getElementType();
1742   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1743                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1744                                                        CGM.Int8PtrTy),
1745                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1746                          getOrCreateThreadPrivateCache(VD)};
1747   return Address(CGF.EmitRuntimeCall(
1748                      OMPBuilder.getOrCreateRuntimeFunction(
1749                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1750                      Args),
1751                  VDAddr.getAlignment());
1752 }
1753 
1754 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1755     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1756     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1757   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1758   // library.
1759   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1760   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1761                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1762                       OMPLoc);
1763   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1764   // to register constructor/destructor for variable.
1765   llvm::Value *Args[] = {
1766       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1767       Ctor, CopyCtor, Dtor};
1768   CGF.EmitRuntimeCall(
1769       OMPBuilder.getOrCreateRuntimeFunction(
1770           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1771       Args);
1772 }
1773 
1774 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1775     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1776     bool PerformInit, CodeGenFunction *CGF) {
1777   if (CGM.getLangOpts().OpenMPUseTLS &&
1778       CGM.getContext().getTargetInfo().isTLSSupported())
1779     return nullptr;
1780 
1781   VD = VD->getDefinition(CGM.getContext());
1782   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1783     QualType ASTTy = VD->getType();
1784 
1785     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1786     const Expr *Init = VD->getAnyInitializer();
1787     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1788       // Generate function that re-emits the declaration's initializer into the
1789       // threadprivate copy of the variable VD
1790       CodeGenFunction CtorCGF(CGM);
1791       FunctionArgList Args;
1792       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1793                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1794                             ImplicitParamDecl::Other);
1795       Args.push_back(&Dst);
1796 
1797       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1798           CGM.getContext().VoidPtrTy, Args);
1799       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1800       std::string Name = getName({"__kmpc_global_ctor_", ""});
1801       llvm::Function *Fn =
1802           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1803       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1804                             Args, Loc, Loc);
1805       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1806           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807           CGM.getContext().VoidPtrTy, Dst.getLocation());
1808       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1809       Arg = CtorCGF.Builder.CreateElementBitCast(
1810           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1811       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1812                                /*IsInitializer=*/true);
1813       ArgVal = CtorCGF.EmitLoadOfScalar(
1814           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1815           CGM.getContext().VoidPtrTy, Dst.getLocation());
1816       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1817       CtorCGF.FinishFunction();
1818       Ctor = Fn;
1819     }
1820     if (VD->getType().isDestructedType() != QualType::DK_none) {
1821       // Generate function that emits destructor call for the threadprivate copy
1822       // of the variable VD
1823       CodeGenFunction DtorCGF(CGM);
1824       FunctionArgList Args;
1825       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1826                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1827                             ImplicitParamDecl::Other);
1828       Args.push_back(&Dst);
1829 
1830       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1831           CGM.getContext().VoidTy, Args);
1832       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1833       std::string Name = getName({"__kmpc_global_dtor_", ""});
1834       llvm::Function *Fn =
1835           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1836       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1837       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1838                             Loc, Loc);
1839       // Create a scope with an artificial location for the body of this function.
1840       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1841       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1842           DtorCGF.GetAddrOfLocalVar(&Dst),
1843           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1844       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1845                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1846                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1847       DtorCGF.FinishFunction();
1848       Dtor = Fn;
1849     }
1850     // Do not emit init function if it is not required.
1851     if (!Ctor && !Dtor)
1852       return nullptr;
1853 
1854     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1855     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1856                                                /*isVarArg=*/false)
1857                            ->getPointerTo();
1858     // Copying constructor for the threadprivate variable.
1859     // Must be NULL - reserved by runtime, but currently it requires that this
1860     // parameter is always NULL. Otherwise it fires assertion.
1861     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1862     if (Ctor == nullptr) {
1863       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1864                                              /*isVarArg=*/false)
1865                          ->getPointerTo();
1866       Ctor = llvm::Constant::getNullValue(CtorTy);
1867     }
1868     if (Dtor == nullptr) {
1869       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1870                                              /*isVarArg=*/false)
1871                          ->getPointerTo();
1872       Dtor = llvm::Constant::getNullValue(DtorTy);
1873     }
1874     if (!CGF) {
1875       auto *InitFunctionTy =
1876           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1877       std::string Name = getName({"__omp_threadprivate_init_", ""});
1878       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1879           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1880       CodeGenFunction InitCGF(CGM);
1881       FunctionArgList ArgList;
1882       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1883                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1884                             Loc, Loc);
1885       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886       InitCGF.FinishFunction();
1887       return InitFunction;
1888     }
1889     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1890   }
1891   return nullptr;
1892 }
1893 
1894 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1895                                                      llvm::GlobalVariable *Addr,
1896                                                      bool PerformInit) {
1897   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1898       !CGM.getLangOpts().OpenMPIsDevice)
1899     return false;
1900   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1901       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1902   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1903       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1904        HasRequiresUnifiedSharedMemory))
1905     return CGM.getLangOpts().OpenMPIsDevice;
1906   VD = VD->getDefinition(CGM.getContext());
1907   assert(VD && "Unknown VarDecl");
1908 
1909   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1910     return CGM.getLangOpts().OpenMPIsDevice;
1911 
1912   QualType ASTTy = VD->getType();
1913   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1914 
1915   // Produce the unique prefix to identify the new target regions. We use
1916   // the source location of the variable declaration which we know to not
1917   // conflict with any target region.
1918   unsigned DeviceID;
1919   unsigned FileID;
1920   unsigned Line;
1921   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1922   SmallString<128> Buffer, Out;
1923   {
1924     llvm::raw_svector_ostream OS(Buffer);
1925     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1926        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1927   }
1928 
1929   const Expr *Init = VD->getAnyInitializer();
1930   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1931     llvm::Constant *Ctor;
1932     llvm::Constant *ID;
1933     if (CGM.getLangOpts().OpenMPIsDevice) {
1934       // Generate function that re-emits the declaration's initializer into
1935       // the threadprivate copy of the variable VD
1936       CodeGenFunction CtorCGF(CGM);
1937 
1938       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1939       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1940       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1941           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1942       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1943       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1944                             FunctionArgList(), Loc, Loc);
1945       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1946       CtorCGF.EmitAnyExprToMem(Init,
1947                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1948                                Init->getType().getQualifiers(),
1949                                /*IsInitializer=*/true);
1950       CtorCGF.FinishFunction();
1951       Ctor = Fn;
1952       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1953       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1954     } else {
1955       Ctor = new llvm::GlobalVariable(
1956           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1957           llvm::GlobalValue::PrivateLinkage,
1958           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1959       ID = Ctor;
1960     }
1961 
1962     // Register the information for the entry associated with the constructor.
1963     Out.clear();
1964     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1965         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1966         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1967   }
1968   if (VD->getType().isDestructedType() != QualType::DK_none) {
1969     llvm::Constant *Dtor;
1970     llvm::Constant *ID;
1971     if (CGM.getLangOpts().OpenMPIsDevice) {
1972       // Generate function that emits destructor call for the threadprivate
1973       // copy of the variable VD
1974       CodeGenFunction DtorCGF(CGM);
1975 
1976       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1977       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1978       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1979           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1980       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1981       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1982                             FunctionArgList(), Loc, Loc);
1983       // Create a scope with an artificial location for the body of this
1984       // function.
1985       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1986       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1987                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1988                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1989       DtorCGF.FinishFunction();
1990       Dtor = Fn;
1991       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1992       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1993     } else {
1994       Dtor = new llvm::GlobalVariable(
1995           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1996           llvm::GlobalValue::PrivateLinkage,
1997           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1998       ID = Dtor;
1999     }
2000     // Register the information for the entry associated with the destructor.
2001     Out.clear();
2002     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2004         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2005   }
2006   return CGM.getLangOpts().OpenMPIsDevice;
2007 }
2008 
2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2010                                                           QualType VarType,
2011                                                           StringRef Name) {
2012   std::string Suffix = getName({"artificial", ""});
2013   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2014   llvm::GlobalVariable *GAddr =
2015       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2016   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2017       CGM.getTarget().isTLSSupported()) {
2018     GAddr->setThreadLocal(/*Val=*/true);
2019     return Address(GAddr, GAddr->getValueType(),
2020                    CGM.getContext().getTypeAlignInChars(VarType));
2021   }
2022   std::string CacheSuffix = getName({"cache", ""});
2023   llvm::Value *Args[] = {
2024       emitUpdateLocation(CGF, SourceLocation()),
2025       getThreadID(CGF, SourceLocation()),
2026       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2027       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2028                                 /*isSigned=*/false),
2029       getOrCreateInternalVariable(
2030           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2031   return Address(
2032       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2033           CGF.EmitRuntimeCall(
2034               OMPBuilder.getOrCreateRuntimeFunction(
2035                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2036               Args),
2037           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2038       CGM.getContext().getTypeAlignInChars(VarType));
2039 }
2040 
2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2042                                    const RegionCodeGenTy &ThenGen,
2043                                    const RegionCodeGenTy &ElseGen) {
2044   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2045 
2046   // If the condition constant folds and can be elided, try to avoid emitting
2047   // the condition and the dead arm of the if/else.
2048   bool CondConstant;
2049   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2050     if (CondConstant)
2051       ThenGen(CGF);
2052     else
2053       ElseGen(CGF);
2054     return;
2055   }
2056 
2057   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2058   // emit the conditional branch.
2059   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2060   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2061   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2062   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2063 
2064   // Emit the 'then' code.
2065   CGF.EmitBlock(ThenBlock);
2066   ThenGen(CGF);
2067   CGF.EmitBranch(ContBlock);
2068   // Emit the 'else' code if present.
2069   // There is no need to emit line number for unconditional branch.
2070   (void)ApplyDebugLocation::CreateEmpty(CGF);
2071   CGF.EmitBlock(ElseBlock);
2072   ElseGen(CGF);
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBranch(ContBlock);
2076   // Emit the continuation block for code after the if.
2077   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2078 }
2079 
2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2081                                        llvm::Function *OutlinedFn,
2082                                        ArrayRef<llvm::Value *> CapturedVars,
2083                                        const Expr *IfCond,
2084                                        llvm::Value *NumThreads) {
2085   if (!CGF.HaveInsertPoint())
2086     return;
2087   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2088   auto &M = CGM.getModule();
2089   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2090                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2091     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2092     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2093     llvm::Value *Args[] = {
2094         RTLoc,
2095         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2096         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2097     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2098     RealArgs.append(std::begin(Args), std::end(Args));
2099     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2100 
2101     llvm::FunctionCallee RTLFn =
2102         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2103     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2104   };
2105   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2106                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2107     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2108     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2109     // Build calls:
2110     // __kmpc_serialized_parallel(&Loc, GTid);
2111     llvm::Value *Args[] = {RTLoc, ThreadID};
2112     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2113                             M, OMPRTL___kmpc_serialized_parallel),
2114                         Args);
2115 
2116     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2117     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2118     Address ZeroAddrBound =
2119         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2120                                          /*Name=*/".bound.zero.addr");
2121     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2122     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2123     // ThreadId for serialized parallels is 0.
2124     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2125     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2126     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2127 
2128     // Ensure we do not inline the function. This is trivially true for the ones
2129     // passed to __kmpc_fork_call but the ones called in serialized regions
2130     // could be inlined. This is not a perfect but it is closer to the invariant
2131     // we want, namely, every data environment starts with a new function.
2132     // TODO: We should pass the if condition to the runtime function and do the
2133     //       handling there. Much cleaner code.
2134     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2135     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2136     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2137 
2138     // __kmpc_end_serialized_parallel(&Loc, GTid);
2139     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2140     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2141                             M, OMPRTL___kmpc_end_serialized_parallel),
2142                         EndArgs);
2143   };
2144   if (IfCond) {
2145     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2146   } else {
2147     RegionCodeGenTy ThenRCG(ThenGen);
2148     ThenRCG(CGF);
2149   }
2150 }
2151 
2152 // If we're inside an (outlined) parallel region, use the region info's
2153 // thread-ID variable (it is passed in a first argument of the outlined function
2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2155 // regular serial code region, get thread ID by calling kmp_int32
2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2157 // return the address of that temp.
2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2159                                              SourceLocation Loc) {
2160   if (auto *OMPRegionInfo =
2161           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2162     if (OMPRegionInfo->getThreadIDVariable())
2163       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2164 
2165   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2166   QualType Int32Ty =
2167       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2168   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2169   CGF.EmitStoreOfScalar(ThreadID,
2170                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2171 
2172   return ThreadIDTemp;
2173 }
2174 
2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2176     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2177   SmallString<256> Buffer;
2178   llvm::raw_svector_ostream Out(Buffer);
2179   Out << Name;
2180   StringRef RuntimeName = Out.str();
2181   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2182   if (Elem.second) {
2183     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2184            "OMP internal variable has different type than requested");
2185     return &*Elem.second;
2186   }
2187 
2188   return Elem.second = new llvm::GlobalVariable(
2189              CGM.getModule(), Ty, /*IsConstant*/ false,
2190              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2191              Elem.first(), /*InsertBefore=*/nullptr,
2192              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2193 }
2194 
2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2196   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2197   std::string Name = getName({Prefix, "var"});
2198   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2199 }
2200 
2201 namespace {
2202 /// Common pre(post)-action for different OpenMP constructs.
2203 class CommonActionTy final : public PrePostActionTy {
2204   llvm::FunctionCallee EnterCallee;
2205   ArrayRef<llvm::Value *> EnterArgs;
2206   llvm::FunctionCallee ExitCallee;
2207   ArrayRef<llvm::Value *> ExitArgs;
2208   bool Conditional;
2209   llvm::BasicBlock *ContBlock = nullptr;
2210 
2211 public:
2212   CommonActionTy(llvm::FunctionCallee EnterCallee,
2213                  ArrayRef<llvm::Value *> EnterArgs,
2214                  llvm::FunctionCallee ExitCallee,
2215                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2216       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2217         ExitArgs(ExitArgs), Conditional(Conditional) {}
2218   void Enter(CodeGenFunction &CGF) override {
2219     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2220     if (Conditional) {
2221       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2222       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2223       ContBlock = CGF.createBasicBlock("omp_if.end");
2224       // Generate the branch (If-stmt)
2225       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2226       CGF.EmitBlock(ThenBlock);
2227     }
2228   }
2229   void Done(CodeGenFunction &CGF) {
2230     // Emit the rest of blocks/branches
2231     CGF.EmitBranch(ContBlock);
2232     CGF.EmitBlock(ContBlock, true);
2233   }
2234   void Exit(CodeGenFunction &CGF) override {
2235     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2236   }
2237 };
2238 } // anonymous namespace
2239 
2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2241                                          StringRef CriticalName,
2242                                          const RegionCodeGenTy &CriticalOpGen,
2243                                          SourceLocation Loc, const Expr *Hint) {
2244   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2245   // CriticalOpGen();
2246   // __kmpc_end_critical(ident_t *, gtid, Lock);
2247   // Prepare arguments and build a call to __kmpc_critical
2248   if (!CGF.HaveInsertPoint())
2249     return;
2250   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2251                          getCriticalRegionLock(CriticalName)};
2252   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2253                                                 std::end(Args));
2254   if (Hint) {
2255     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2256         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2257   }
2258   CommonActionTy Action(
2259       OMPBuilder.getOrCreateRuntimeFunction(
2260           CGM.getModule(),
2261           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2262       EnterArgs,
2263       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2264                                             OMPRTL___kmpc_end_critical),
2265       Args);
2266   CriticalOpGen.setAction(Action);
2267   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2268 }
2269 
2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2271                                        const RegionCodeGenTy &MasterOpGen,
2272                                        SourceLocation Loc) {
2273   if (!CGF.HaveInsertPoint())
2274     return;
2275   // if(__kmpc_master(ident_t *, gtid)) {
2276   //   MasterOpGen();
2277   //   __kmpc_end_master(ident_t *, gtid);
2278   // }
2279   // Prepare arguments and build a call to __kmpc_master
2280   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2281   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2282                             CGM.getModule(), OMPRTL___kmpc_master),
2283                         Args,
2284                         OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_end_master),
2286                         Args,
2287                         /*Conditional=*/true);
2288   MasterOpGen.setAction(Action);
2289   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2290   Action.Done(CGF);
2291 }
2292 
2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2294                                        const RegionCodeGenTy &MaskedOpGen,
2295                                        SourceLocation Loc, const Expr *Filter) {
2296   if (!CGF.HaveInsertPoint())
2297     return;
2298   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2299   //   MaskedOpGen();
2300   //   __kmpc_end_masked(iden_t *, gtid);
2301   // }
2302   // Prepare arguments and build a call to __kmpc_masked
2303   llvm::Value *FilterVal = Filter
2304                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2305                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2306   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2307                          FilterVal};
2308   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2309                             getThreadID(CGF, Loc)};
2310   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2311                             CGM.getModule(), OMPRTL___kmpc_masked),
2312                         Args,
2313                         OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2315                         ArgsEnd,
2316                         /*Conditional=*/true);
2317   MaskedOpGen.setAction(Action);
2318   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2319   Action.Done(CGF);
2320 }
2321 
2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2323                                         SourceLocation Loc) {
2324   if (!CGF.HaveInsertPoint())
2325     return;
2326   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2327     OMPBuilder.createTaskyield(CGF.Builder);
2328   } else {
2329     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330     llvm::Value *Args[] = {
2331         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2334                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335                         Args);
2336   }
2337 
2338   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339     Region->emitUntiedSwitch(CGF);
2340 }
2341 
2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343                                           const RegionCodeGenTy &TaskgroupOpGen,
2344                                           SourceLocation Loc) {
2345   if (!CGF.HaveInsertPoint())
2346     return;
2347   // __kmpc_taskgroup(ident_t *, gtid);
2348   // TaskgroupOpGen();
2349   // __kmpc_end_taskgroup(ident_t *, gtid);
2350   // Prepare arguments and build a call to __kmpc_taskgroup
2351   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2353                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354                         Args,
2355                         OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357                         Args);
2358   TaskgroupOpGen.setAction(Action);
2359   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360 }
2361 
2362 /// Given an array of pointers to variables, project the address of a
2363 /// given variable.
2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365                                       unsigned Index, const VarDecl *Var) {
2366   // Pull out the pointer to the variable.
2367   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369 
2370   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2371   Addr = CGF.Builder.CreateElementBitCast(
2372       Addr, CGF.ConvertTypeForMem(Var->getType()));
2373   return Addr;
2374 }
2375 
2376 static llvm::Value *emitCopyprivateCopyFunction(
2377     CodeGenModule &CGM, llvm::Type *ArgsType,
2378     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2379     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2380     SourceLocation Loc) {
2381   ASTContext &C = CGM.getContext();
2382   // void copy_func(void *LHSArg, void *RHSArg);
2383   FunctionArgList Args;
2384   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385                            ImplicitParamDecl::Other);
2386   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387                            ImplicitParamDecl::Other);
2388   Args.push_back(&LHSArg);
2389   Args.push_back(&RHSArg);
2390   const auto &CGFI =
2391       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2392   std::string Name =
2393       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2394   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2395                                     llvm::GlobalValue::InternalLinkage, Name,
2396                                     &CGM.getModule());
2397   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2398   Fn->setDoesNotRecurse();
2399   CodeGenFunction CGF(CGM);
2400   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2401   // Dest = (void*[n])(LHSArg);
2402   // Src = (void*[n])(RHSArg);
2403   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2404       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2405       ArgsType), CGF.getPointerAlign());
2406   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2410   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2411   // ...
2412   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2413   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2414     const auto *DestVar =
2415         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2416     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2417 
2418     const auto *SrcVar =
2419         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2420     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2421 
2422     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2423     QualType Type = VD->getType();
2424     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2425   }
2426   CGF.FinishFunction();
2427   return Fn;
2428 }
2429 
2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2431                                        const RegionCodeGenTy &SingleOpGen,
2432                                        SourceLocation Loc,
2433                                        ArrayRef<const Expr *> CopyprivateVars,
2434                                        ArrayRef<const Expr *> SrcExprs,
2435                                        ArrayRef<const Expr *> DstExprs,
2436                                        ArrayRef<const Expr *> AssignmentOps) {
2437   if (!CGF.HaveInsertPoint())
2438     return;
2439   assert(CopyprivateVars.size() == SrcExprs.size() &&
2440          CopyprivateVars.size() == DstExprs.size() &&
2441          CopyprivateVars.size() == AssignmentOps.size());
2442   ASTContext &C = CGM.getContext();
2443   // int32 did_it = 0;
2444   // if(__kmpc_single(ident_t *, gtid)) {
2445   //   SingleOpGen();
2446   //   __kmpc_end_single(ident_t *, gtid);
2447   //   did_it = 1;
2448   // }
2449   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450   // <copy_func>, did_it);
2451 
2452   Address DidIt = Address::invalid();
2453   if (!CopyprivateVars.empty()) {
2454     // int32 did_it = 0;
2455     QualType KmpInt32Ty =
2456         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2457     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2458     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2459   }
2460   // Prepare arguments and build a call to __kmpc_single
2461   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2462   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2463                             CGM.getModule(), OMPRTL___kmpc_single),
2464                         Args,
2465                         OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_end_single),
2467                         Args,
2468                         /*Conditional=*/true);
2469   SingleOpGen.setAction(Action);
2470   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2471   if (DidIt.isValid()) {
2472     // did_it = 1;
2473     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2474   }
2475   Action.Done(CGF);
2476   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2477   // <copy_func>, did_it);
2478   if (DidIt.isValid()) {
2479     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2480     QualType CopyprivateArrayTy = C.getConstantArrayType(
2481         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2482         /*IndexTypeQuals=*/0);
2483     // Create a list of all private variables for copyprivate.
2484     Address CopyprivateList =
2485         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2486     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2487       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2488       CGF.Builder.CreateStore(
2489           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2490               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2491               CGF.VoidPtrTy),
2492           Elem);
2493     }
2494     // Build function that copies private values from single region to all other
2495     // threads in the corresponding parallel region.
2496     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2497         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2498         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2499     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2500     Address CL =
2501       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2502                                                       CGF.VoidPtrTy);
2503     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2504     llvm::Value *Args[] = {
2505         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2506         getThreadID(CGF, Loc),        // i32 <gtid>
2507         BufSize,                      // size_t <buf_size>
2508         CL.getPointer(),              // void *<copyprivate list>
2509         CpyFn,                        // void (*) (void *, void *) <copy_func>
2510         DidItVal                      // i32 did_it
2511     };
2512     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2513                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2514                         Args);
2515   }
2516 }
2517 
2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2519                                         const RegionCodeGenTy &OrderedOpGen,
2520                                         SourceLocation Loc, bool IsThreads) {
2521   if (!CGF.HaveInsertPoint())
2522     return;
2523   // __kmpc_ordered(ident_t *, gtid);
2524   // OrderedOpGen();
2525   // __kmpc_end_ordered(ident_t *, gtid);
2526   // Prepare arguments and build a call to __kmpc_ordered
2527   if (IsThreads) {
2528     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2529     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2530                               CGM.getModule(), OMPRTL___kmpc_ordered),
2531                           Args,
2532                           OMPBuilder.getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2534                           Args);
2535     OrderedOpGen.setAction(Action);
2536     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2537     return;
2538   }
2539   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540 }
2541 
2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2543   unsigned Flags;
2544   if (Kind == OMPD_for)
2545     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2546   else if (Kind == OMPD_sections)
2547     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2548   else if (Kind == OMPD_single)
2549     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2550   else if (Kind == OMPD_barrier)
2551     Flags = OMP_IDENT_BARRIER_EXPL;
2552   else
2553     Flags = OMP_IDENT_BARRIER_IMPL;
2554   return Flags;
2555 }
2556 
2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2558     CodeGenFunction &CGF, const OMPLoopDirective &S,
2559     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2560   // Check if the loop directive is actually a doacross loop directive. In this
2561   // case choose static, 1 schedule.
2562   if (llvm::any_of(
2563           S.getClausesOfKind<OMPOrderedClause>(),
2564           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2565     ScheduleKind = OMPC_SCHEDULE_static;
2566     // Chunk size is 1 in this case.
2567     llvm::APInt ChunkSize(32, 1);
2568     ChunkExpr = IntegerLiteral::Create(
2569         CGF.getContext(), ChunkSize,
2570         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2571         SourceLocation());
2572   }
2573 }
2574 
2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2576                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2577                                       bool ForceSimpleCall) {
2578   // Check if we should use the OMPBuilder
2579   auto *OMPRegionInfo =
2580       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2581   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2582     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2583         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2584     return;
2585   }
2586 
2587   if (!CGF.HaveInsertPoint())
2588     return;
2589   // Build call __kmpc_cancel_barrier(loc, thread_id);
2590   // Build call __kmpc_barrier(loc, thread_id);
2591   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2592   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2593   // thread_id);
2594   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2595                          getThreadID(CGF, Loc)};
2596   if (OMPRegionInfo) {
2597     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2598       llvm::Value *Result = CGF.EmitRuntimeCall(
2599           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2600                                                 OMPRTL___kmpc_cancel_barrier),
2601           Args);
2602       if (EmitChecks) {
2603         // if (__kmpc_cancel_barrier()) {
2604         //   exit from construct;
2605         // }
2606         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2607         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2608         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2609         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2610         CGF.EmitBlock(ExitBB);
2611         //   exit from construct;
2612         CodeGenFunction::JumpDest CancelDestination =
2613             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2614         CGF.EmitBranchThroughCleanup(CancelDestination);
2615         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2616       }
2617       return;
2618     }
2619   }
2620   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2621                           CGM.getModule(), OMPRTL___kmpc_barrier),
2622                       Args);
2623 }
2624 
2625 /// Map the OpenMP loop schedule to the runtime enumeration.
2626 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2627                                           bool Chunked, bool Ordered) {
2628   switch (ScheduleKind) {
2629   case OMPC_SCHEDULE_static:
2630     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2631                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2632   case OMPC_SCHEDULE_dynamic:
2633     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2634   case OMPC_SCHEDULE_guided:
2635     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2636   case OMPC_SCHEDULE_runtime:
2637     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2638   case OMPC_SCHEDULE_auto:
2639     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2640   case OMPC_SCHEDULE_unknown:
2641     assert(!Chunked && "chunk was specified but schedule kind not known");
2642     return Ordered ? OMP_ord_static : OMP_sch_static;
2643   }
2644   llvm_unreachable("Unexpected runtime schedule");
2645 }
2646 
2647 /// Map the OpenMP distribute schedule to the runtime enumeration.
2648 static OpenMPSchedType
2649 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2650   // only static is allowed for dist_schedule
2651   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2652 }
2653 
2654 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2655                                          bool Chunked) const {
2656   OpenMPSchedType Schedule =
2657       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2658   return Schedule == OMP_sch_static;
2659 }
2660 
2661 bool CGOpenMPRuntime::isStaticNonchunked(
2662     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2663   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2664   return Schedule == OMP_dist_sch_static;
2665 }
2666 
2667 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2668                                       bool Chunked) const {
2669   OpenMPSchedType Schedule =
2670       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2671   return Schedule == OMP_sch_static_chunked;
2672 }
2673 
2674 bool CGOpenMPRuntime::isStaticChunked(
2675     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2676   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2677   return Schedule == OMP_dist_sch_static_chunked;
2678 }
2679 
2680 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2681   OpenMPSchedType Schedule =
2682       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2683   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2684   return Schedule != OMP_sch_static;
2685 }
2686 
2687 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2688                                   OpenMPScheduleClauseModifier M1,
2689                                   OpenMPScheduleClauseModifier M2) {
2690   int Modifier = 0;
2691   switch (M1) {
2692   case OMPC_SCHEDULE_MODIFIER_monotonic:
2693     Modifier = OMP_sch_modifier_monotonic;
2694     break;
2695   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2696     Modifier = OMP_sch_modifier_nonmonotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_simd:
2699     if (Schedule == OMP_sch_static_chunked)
2700       Schedule = OMP_sch_static_balanced_chunked;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_last:
2703   case OMPC_SCHEDULE_MODIFIER_unknown:
2704     break;
2705   }
2706   switch (M2) {
2707   case OMPC_SCHEDULE_MODIFIER_monotonic:
2708     Modifier = OMP_sch_modifier_monotonic;
2709     break;
2710   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2711     Modifier = OMP_sch_modifier_nonmonotonic;
2712     break;
2713   case OMPC_SCHEDULE_MODIFIER_simd:
2714     if (Schedule == OMP_sch_static_chunked)
2715       Schedule = OMP_sch_static_balanced_chunked;
2716     break;
2717   case OMPC_SCHEDULE_MODIFIER_last:
2718   case OMPC_SCHEDULE_MODIFIER_unknown:
2719     break;
2720   }
2721   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2722   // If the static schedule kind is specified or if the ordered clause is
2723   // specified, and if the nonmonotonic modifier is not specified, the effect is
2724   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2725   // modifier is specified, the effect is as if the nonmonotonic modifier is
2726   // specified.
2727   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2728     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2729           Schedule == OMP_sch_static_balanced_chunked ||
2730           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2731           Schedule == OMP_dist_sch_static_chunked ||
2732           Schedule == OMP_dist_sch_static))
2733       Modifier = OMP_sch_modifier_nonmonotonic;
2734   }
2735   return Schedule | Modifier;
2736 }
2737 
2738 void CGOpenMPRuntime::emitForDispatchInit(
2739     CodeGenFunction &CGF, SourceLocation Loc,
2740     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2741     bool Ordered, const DispatchRTInput &DispatchValues) {
2742   if (!CGF.HaveInsertPoint())
2743     return;
2744   OpenMPSchedType Schedule = getRuntimeSchedule(
2745       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2746   assert(Ordered ||
2747          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2748           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2749           Schedule != OMP_sch_static_balanced_chunked));
2750   // Call __kmpc_dispatch_init(
2751   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2752   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2753   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2754 
2755   // If the Chunk was not specified in the clause - use default value 1.
2756   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2757                                             : CGF.Builder.getIntN(IVSize, 1);
2758   llvm::Value *Args[] = {
2759       emitUpdateLocation(CGF, Loc),
2760       getThreadID(CGF, Loc),
2761       CGF.Builder.getInt32(addMonoNonMonoModifier(
2762           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2763       DispatchValues.LB,                                     // Lower
2764       DispatchValues.UB,                                     // Upper
2765       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2766       Chunk                                                  // Chunk
2767   };
2768   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2769 }
2770 
2771 static void emitForStaticInitCall(
2772     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2773     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2774     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2775     const CGOpenMPRuntime::StaticRTInput &Values) {
2776   if (!CGF.HaveInsertPoint())
2777     return;
2778 
2779   assert(!Values.Ordered);
2780   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2781          Schedule == OMP_sch_static_balanced_chunked ||
2782          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2783          Schedule == OMP_dist_sch_static ||
2784          Schedule == OMP_dist_sch_static_chunked);
2785 
2786   // Call __kmpc_for_static_init(
2787   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2788   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2789   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2790   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2791   llvm::Value *Chunk = Values.Chunk;
2792   if (Chunk == nullptr) {
2793     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2794             Schedule == OMP_dist_sch_static) &&
2795            "expected static non-chunked schedule");
2796     // If the Chunk was not specified in the clause - use default value 1.
2797     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2798   } else {
2799     assert((Schedule == OMP_sch_static_chunked ||
2800             Schedule == OMP_sch_static_balanced_chunked ||
2801             Schedule == OMP_ord_static_chunked ||
2802             Schedule == OMP_dist_sch_static_chunked) &&
2803            "expected static chunked schedule");
2804   }
2805   llvm::Value *Args[] = {
2806       UpdateLocation,
2807       ThreadId,
2808       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2809                                                   M2)), // Schedule type
2810       Values.IL.getPointer(),                           // &isLastIter
2811       Values.LB.getPointer(),                           // &LB
2812       Values.UB.getPointer(),                           // &UB
2813       Values.ST.getPointer(),                           // &Stride
2814       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2815       Chunk                                             // Chunk
2816   };
2817   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2818 }
2819 
2820 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2821                                         SourceLocation Loc,
2822                                         OpenMPDirectiveKind DKind,
2823                                         const OpenMPScheduleTy &ScheduleKind,
2824                                         const StaticRTInput &Values) {
2825   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2826       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2827   assert(isOpenMPWorksharingDirective(DKind) &&
2828          "Expected loop-based or sections-based directive.");
2829   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2830                                              isOpenMPLoopDirective(DKind)
2831                                                  ? OMP_IDENT_WORK_LOOP
2832                                                  : OMP_IDENT_WORK_SECTIONS);
2833   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2834   llvm::FunctionCallee StaticInitFunction =
2835       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2836   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2837   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2838                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2839 }
2840 
2841 void CGOpenMPRuntime::emitDistributeStaticInit(
2842     CodeGenFunction &CGF, SourceLocation Loc,
2843     OpenMPDistScheduleClauseKind SchedKind,
2844     const CGOpenMPRuntime::StaticRTInput &Values) {
2845   OpenMPSchedType ScheduleNum =
2846       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2847   llvm::Value *UpdatedLocation =
2848       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2849   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2850   llvm::FunctionCallee StaticInitFunction;
2851   bool isGPUDistribute =
2852       CGM.getLangOpts().OpenMPIsDevice &&
2853       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2854   StaticInitFunction = createForStaticInitFunction(
2855       Values.IVSize, Values.IVSigned, isGPUDistribute);
2856 
2857   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2858                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2859                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2860 }
2861 
2862 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2863                                           SourceLocation Loc,
2864                                           OpenMPDirectiveKind DKind) {
2865   if (!CGF.HaveInsertPoint())
2866     return;
2867   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2868   llvm::Value *Args[] = {
2869       emitUpdateLocation(CGF, Loc,
2870                          isOpenMPDistributeDirective(DKind)
2871                              ? OMP_IDENT_WORK_DISTRIBUTE
2872                              : isOpenMPLoopDirective(DKind)
2873                                    ? OMP_IDENT_WORK_LOOP
2874                                    : OMP_IDENT_WORK_SECTIONS),
2875       getThreadID(CGF, Loc)};
2876   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2877   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2878       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2879     CGF.EmitRuntimeCall(
2880         OMPBuilder.getOrCreateRuntimeFunction(
2881             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2882         Args);
2883   else
2884     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2885                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2886                         Args);
2887 }
2888 
2889 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2890                                                  SourceLocation Loc,
2891                                                  unsigned IVSize,
2892                                                  bool IVSigned) {
2893   if (!CGF.HaveInsertPoint())
2894     return;
2895   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2896   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2897   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2898 }
2899 
2900 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2901                                           SourceLocation Loc, unsigned IVSize,
2902                                           bool IVSigned, Address IL,
2903                                           Address LB, Address UB,
2904                                           Address ST) {
2905   // Call __kmpc_dispatch_next(
2906   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2907   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2908   //          kmp_int[32|64] *p_stride);
2909   llvm::Value *Args[] = {
2910       emitUpdateLocation(CGF, Loc),
2911       getThreadID(CGF, Loc),
2912       IL.getPointer(), // &isLastIter
2913       LB.getPointer(), // &Lower
2914       UB.getPointer(), // &Upper
2915       ST.getPointer()  // &Stride
2916   };
2917   llvm::Value *Call =
2918       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2919   return CGF.EmitScalarConversion(
2920       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2921       CGF.getContext().BoolTy, Loc);
2922 }
2923 
2924 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2925                                            llvm::Value *NumThreads,
2926                                            SourceLocation Loc) {
2927   if (!CGF.HaveInsertPoint())
2928     return;
2929   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2930   llvm::Value *Args[] = {
2931       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2932       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2933   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2934                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2935                       Args);
2936 }
2937 
2938 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2939                                          ProcBindKind ProcBind,
2940                                          SourceLocation Loc) {
2941   if (!CGF.HaveInsertPoint())
2942     return;
2943   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2944   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2945   llvm::Value *Args[] = {
2946       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2947       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2948   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2949                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2950                       Args);
2951 }
2952 
2953 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2954                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2955   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2956     OMPBuilder.createFlush(CGF.Builder);
2957   } else {
2958     if (!CGF.HaveInsertPoint())
2959       return;
2960     // Build call void __kmpc_flush(ident_t *loc)
2961     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2962                             CGM.getModule(), OMPRTL___kmpc_flush),
2963                         emitUpdateLocation(CGF, Loc));
2964   }
2965 }
2966 
2967 namespace {
2968 /// Indexes of fields for type kmp_task_t.
2969 enum KmpTaskTFields {
2970   /// List of shared variables.
2971   KmpTaskTShareds,
2972   /// Task routine.
2973   KmpTaskTRoutine,
2974   /// Partition id for the untied tasks.
2975   KmpTaskTPartId,
2976   /// Function with call of destructors for private variables.
2977   Data1,
2978   /// Task priority.
2979   Data2,
2980   /// (Taskloops only) Lower bound.
2981   KmpTaskTLowerBound,
2982   /// (Taskloops only) Upper bound.
2983   KmpTaskTUpperBound,
2984   /// (Taskloops only) Stride.
2985   KmpTaskTStride,
2986   /// (Taskloops only) Is last iteration flag.
2987   KmpTaskTLastIter,
2988   /// (Taskloops only) Reduction data.
2989   KmpTaskTReductions,
2990 };
2991 } // anonymous namespace
2992 
2993 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2994   return OffloadEntriesTargetRegion.empty() &&
2995          OffloadEntriesDeviceGlobalVar.empty();
2996 }
2997 
2998 /// Initialize target region entry.
2999 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3000     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3001                                     StringRef ParentName, unsigned LineNum,
3002                                     unsigned Order) {
3003   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3004                                              "only required for the device "
3005                                              "code generation.");
3006   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3007       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3008                                    OMPTargetRegionEntryTargetRegion);
3009   ++OffloadingEntriesNum;
3010 }
3011 
3012 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3013     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3014                                   StringRef ParentName, unsigned LineNum,
3015                                   llvm::Constant *Addr, llvm::Constant *ID,
3016                                   OMPTargetRegionEntryKind Flags) {
3017   // If we are emitting code for a target, the entry is already initialized,
3018   // only has to be registered.
3019   if (CGM.getLangOpts().OpenMPIsDevice) {
3020     // This could happen if the device compilation is invoked standalone.
3021     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3022       return;
3023     auto &Entry =
3024         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3025     Entry.setAddress(Addr);
3026     Entry.setID(ID);
3027     Entry.setFlags(Flags);
3028   } else {
3029     if (Flags ==
3030             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3031         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3032                                  /*IgnoreAddressId*/ true))
3033       return;
3034     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3035            "Target region entry already registered!");
3036     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3037     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3038     ++OffloadingEntriesNum;
3039   }
3040 }
3041 
3042 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3043     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3044     bool IgnoreAddressId) const {
3045   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3046   if (PerDevice == OffloadEntriesTargetRegion.end())
3047     return false;
3048   auto PerFile = PerDevice->second.find(FileID);
3049   if (PerFile == PerDevice->second.end())
3050     return false;
3051   auto PerParentName = PerFile->second.find(ParentName);
3052   if (PerParentName == PerFile->second.end())
3053     return false;
3054   auto PerLine = PerParentName->second.find(LineNum);
3055   if (PerLine == PerParentName->second.end())
3056     return false;
3057   // Fail if this entry is already registered.
3058   if (!IgnoreAddressId &&
3059       (PerLine->second.getAddress() || PerLine->second.getID()))
3060     return false;
3061   return true;
3062 }
3063 
3064 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3065     const OffloadTargetRegionEntryInfoActTy &Action) {
3066   // Scan all target region entries and perform the provided action.
3067   for (const auto &D : OffloadEntriesTargetRegion)
3068     for (const auto &F : D.second)
3069       for (const auto &P : F.second)
3070         for (const auto &L : P.second)
3071           Action(D.first, F.first, P.first(), L.first, L.second);
3072 }
3073 
3074 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3075     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3076                                        OMPTargetGlobalVarEntryKind Flags,
3077                                        unsigned Order) {
3078   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3079                                              "only required for the device "
3080                                              "code generation.");
3081   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3082   ++OffloadingEntriesNum;
3083 }
3084 
3085 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3086     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3087                                      CharUnits VarSize,
3088                                      OMPTargetGlobalVarEntryKind Flags,
3089                                      llvm::GlobalValue::LinkageTypes Linkage) {
3090   if (CGM.getLangOpts().OpenMPIsDevice) {
3091     // This could happen if the device compilation is invoked standalone.
3092     if (!hasDeviceGlobalVarEntryInfo(VarName))
3093       return;
3094     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3095     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3096       if (Entry.getVarSize().isZero()) {
3097         Entry.setVarSize(VarSize);
3098         Entry.setLinkage(Linkage);
3099       }
3100       return;
3101     }
3102     Entry.setVarSize(VarSize);
3103     Entry.setLinkage(Linkage);
3104     Entry.setAddress(Addr);
3105   } else {
3106     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3107       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3108       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3109              "Entry not initialized!");
3110       if (Entry.getVarSize().isZero()) {
3111         Entry.setVarSize(VarSize);
3112         Entry.setLinkage(Linkage);
3113       }
3114       return;
3115     }
3116     OffloadEntriesDeviceGlobalVar.try_emplace(
3117         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3118     ++OffloadingEntriesNum;
3119   }
3120 }
3121 
3122 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3123     actOnDeviceGlobalVarEntriesInfo(
3124         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3125   // Scan all target region entries and perform the provided action.
3126   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3127     Action(E.getKey(), E.getValue());
3128 }
3129 
3130 void CGOpenMPRuntime::createOffloadEntry(
3131     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3132     llvm::GlobalValue::LinkageTypes Linkage) {
3133   StringRef Name = Addr->getName();
3134   llvm::Module &M = CGM.getModule();
3135   llvm::LLVMContext &C = M.getContext();
3136 
3137   // Create constant string with the name.
3138   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3139 
3140   std::string StringName = getName({"omp_offloading", "entry_name"});
3141   auto *Str = new llvm::GlobalVariable(
3142       M, StrPtrInit->getType(), /*isConstant=*/true,
3143       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3144   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3145 
3146   llvm::Constant *Data[] = {
3147       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3148       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3149       llvm::ConstantInt::get(CGM.SizeTy, Size),
3150       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3151       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3152   std::string EntryName = getName({"omp_offloading", "entry", ""});
3153   llvm::GlobalVariable *Entry = createGlobalStruct(
3154       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3155       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3156 
3157   // The entry has to be created in the section the linker expects it to be.
3158   Entry->setSection("omp_offloading_entries");
3159 }
3160 
3161 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3162   // Emit the offloading entries and metadata so that the device codegen side
3163   // can easily figure out what to emit. The produced metadata looks like
3164   // this:
3165   //
3166   // !omp_offload.info = !{!1, ...}
3167   //
3168   // Right now we only generate metadata for function that contain target
3169   // regions.
3170 
3171   // If we are in simd mode or there are no entries, we don't need to do
3172   // anything.
3173   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3174     return;
3175 
3176   llvm::Module &M = CGM.getModule();
3177   llvm::LLVMContext &C = M.getContext();
3178   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3179                          SourceLocation, StringRef>,
3180               16>
3181       OrderedEntries(OffloadEntriesInfoManager.size());
3182   llvm::SmallVector<StringRef, 16> ParentFunctions(
3183       OffloadEntriesInfoManager.size());
3184 
3185   // Auxiliary methods to create metadata values and strings.
3186   auto &&GetMDInt = [this](unsigned V) {
3187     return llvm::ConstantAsMetadata::get(
3188         llvm::ConstantInt::get(CGM.Int32Ty, V));
3189   };
3190 
3191   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3192 
3193   // Create the offloading info metadata node.
3194   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3195 
3196   // Create function that emits metadata for each target region entry;
3197   auto &&TargetRegionMetadataEmitter =
3198       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3199        &GetMDString](
3200           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3201           unsigned Line,
3202           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3203         // Generate metadata for target regions. Each entry of this metadata
3204         // contains:
3205         // - Entry 0 -> Kind of this type of metadata (0).
3206         // - Entry 1 -> Device ID of the file where the entry was identified.
3207         // - Entry 2 -> File ID of the file where the entry was identified.
3208         // - Entry 3 -> Mangled name of the function where the entry was
3209         // identified.
3210         // - Entry 4 -> Line in the file where the entry was identified.
3211         // - Entry 5 -> Order the entry was created.
3212         // The first element of the metadata node is the kind.
3213         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3214                                  GetMDInt(FileID),      GetMDString(ParentName),
3215                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3216 
3217         SourceLocation Loc;
3218         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3219                   E = CGM.getContext().getSourceManager().fileinfo_end();
3220              I != E; ++I) {
3221           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3222               I->getFirst()->getUniqueID().getFile() == FileID) {
3223             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3224                 I->getFirst(), Line, 1);
3225             break;
3226           }
3227         }
3228         // Save this entry in the right position of the ordered entries array.
3229         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3230         ParentFunctions[E.getOrder()] = ParentName;
3231 
3232         // Add metadata to the named metadata node.
3233         MD->addOperand(llvm::MDNode::get(C, Ops));
3234       };
3235 
3236   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3237       TargetRegionMetadataEmitter);
3238 
3239   // Create function that emits metadata for each device global variable entry;
3240   auto &&DeviceGlobalVarMetadataEmitter =
3241       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3242        MD](StringRef MangledName,
3243            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3244                &E) {
3245         // Generate metadata for global variables. Each entry of this metadata
3246         // contains:
3247         // - Entry 0 -> Kind of this type of metadata (1).
3248         // - Entry 1 -> Mangled name of the variable.
3249         // - Entry 2 -> Declare target kind.
3250         // - Entry 3 -> Order the entry was created.
3251         // The first element of the metadata node is the kind.
3252         llvm::Metadata *Ops[] = {
3253             GetMDInt(E.getKind()), GetMDString(MangledName),
3254             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3255 
3256         // Save this entry in the right position of the ordered entries array.
3257         OrderedEntries[E.getOrder()] =
3258             std::make_tuple(&E, SourceLocation(), MangledName);
3259 
3260         // Add metadata to the named metadata node.
3261         MD->addOperand(llvm::MDNode::get(C, Ops));
3262       };
3263 
3264   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3265       DeviceGlobalVarMetadataEmitter);
3266 
3267   for (const auto &E : OrderedEntries) {
3268     assert(std::get<0>(E) && "All ordered entries must exist!");
3269     if (const auto *CE =
3270             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3271                 std::get<0>(E))) {
3272       if (!CE->getID() || !CE->getAddress()) {
3273         // Do not blame the entry if the parent funtion is not emitted.
3274         StringRef FnName = ParentFunctions[CE->getOrder()];
3275         if (!CGM.GetGlobalValue(FnName))
3276           continue;
3277         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3278             DiagnosticsEngine::Error,
3279             "Offloading entry for target region in %0 is incorrect: either the "
3280             "address or the ID is invalid.");
3281         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3282         continue;
3283       }
3284       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3285                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3286     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3287                                              OffloadEntryInfoDeviceGlobalVar>(
3288                    std::get<0>(E))) {
3289       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3290           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3291               CE->getFlags());
3292       switch (Flags) {
3293       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3294         if (CGM.getLangOpts().OpenMPIsDevice &&
3295             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3296           continue;
3297         if (!CE->getAddress()) {
3298           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3299               DiagnosticsEngine::Error, "Offloading entry for declare target "
3300                                         "variable %0 is incorrect: the "
3301                                         "address is invalid.");
3302           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3303           continue;
3304         }
3305         // The vaiable has no definition - no need to add the entry.
3306         if (CE->getVarSize().isZero())
3307           continue;
3308         break;
3309       }
3310       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3311         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3312                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3313                "Declaret target link address is set.");
3314         if (CGM.getLangOpts().OpenMPIsDevice)
3315           continue;
3316         if (!CE->getAddress()) {
3317           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3318               DiagnosticsEngine::Error,
3319               "Offloading entry for declare target variable is incorrect: the "
3320               "address is invalid.");
3321           CGM.getDiags().Report(DiagID);
3322           continue;
3323         }
3324         break;
3325       }
3326       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3327                          CE->getVarSize().getQuantity(), Flags,
3328                          CE->getLinkage());
3329     } else {
3330       llvm_unreachable("Unsupported entry kind.");
3331     }
3332   }
3333 }
3334 
3335 /// Loads all the offload entries information from the host IR
3336 /// metadata.
3337 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3338   // If we are in target mode, load the metadata from the host IR. This code has
3339   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3340 
3341   if (!CGM.getLangOpts().OpenMPIsDevice)
3342     return;
3343 
3344   if (CGM.getLangOpts().OMPHostIRFile.empty())
3345     return;
3346 
3347   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3348   if (auto EC = Buf.getError()) {
3349     CGM.getDiags().Report(diag::err_cannot_open_file)
3350         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3351     return;
3352   }
3353 
3354   llvm::LLVMContext C;
3355   auto ME = expectedToErrorOrAndEmitErrors(
3356       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3357 
3358   if (auto EC = ME.getError()) {
3359     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3360         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3361     CGM.getDiags().Report(DiagID)
3362         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3363     return;
3364   }
3365 
3366   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3367   if (!MD)
3368     return;
3369 
3370   for (llvm::MDNode *MN : MD->operands()) {
3371     auto &&GetMDInt = [MN](unsigned Idx) {
3372       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3373       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3374     };
3375 
3376     auto &&GetMDString = [MN](unsigned Idx) {
3377       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3378       return V->getString();
3379     };
3380 
3381     switch (GetMDInt(0)) {
3382     default:
3383       llvm_unreachable("Unexpected metadata!");
3384       break;
3385     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3386         OffloadingEntryInfoTargetRegion:
3387       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3388           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3389           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3390           /*Order=*/GetMDInt(5));
3391       break;
3392     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3393         OffloadingEntryInfoDeviceGlobalVar:
3394       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3395           /*MangledName=*/GetMDString(1),
3396           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3397               /*Flags=*/GetMDInt(2)),
3398           /*Order=*/GetMDInt(3));
3399       break;
3400     }
3401   }
3402 }
3403 
3404 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3405   if (!KmpRoutineEntryPtrTy) {
3406     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3407     ASTContext &C = CGM.getContext();
3408     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3409     FunctionProtoType::ExtProtoInfo EPI;
3410     KmpRoutineEntryPtrQTy = C.getPointerType(
3411         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3412     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3413   }
3414 }
3415 
3416 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3417   // Make sure the type of the entry is already created. This is the type we
3418   // have to create:
3419   // struct __tgt_offload_entry{
3420   //   void      *addr;       // Pointer to the offload entry info.
3421   //                          // (function or global)
3422   //   char      *name;       // Name of the function or global.
3423   //   size_t     size;       // Size of the entry info (0 if it a function).
3424   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3425   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3426   // };
3427   if (TgtOffloadEntryQTy.isNull()) {
3428     ASTContext &C = CGM.getContext();
3429     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3430     RD->startDefinition();
3431     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3432     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3433     addFieldToRecordDecl(C, RD, C.getSizeType());
3434     addFieldToRecordDecl(
3435         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3436     addFieldToRecordDecl(
3437         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3438     RD->completeDefinition();
3439     RD->addAttr(PackedAttr::CreateImplicit(C));
3440     TgtOffloadEntryQTy = C.getRecordType(RD);
3441   }
3442   return TgtOffloadEntryQTy;
3443 }
3444 
3445 namespace {
3446 struct PrivateHelpersTy {
3447   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3448                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3449       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3450         PrivateElemInit(PrivateElemInit) {}
3451   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3452   const Expr *OriginalRef = nullptr;
3453   const VarDecl *Original = nullptr;
3454   const VarDecl *PrivateCopy = nullptr;
3455   const VarDecl *PrivateElemInit = nullptr;
3456   bool isLocalPrivate() const {
3457     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3458   }
3459 };
3460 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3461 } // anonymous namespace
3462 
3463 static bool isAllocatableDecl(const VarDecl *VD) {
3464   const VarDecl *CVD = VD->getCanonicalDecl();
3465   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3466     return false;
3467   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3468   // Use the default allocation.
3469   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3470             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3471            !AA->getAllocator());
3472 }
3473 
3474 static RecordDecl *
3475 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3476   if (!Privates.empty()) {
3477     ASTContext &C = CGM.getContext();
3478     // Build struct .kmp_privates_t. {
3479     //         /*  private vars  */
3480     //       };
3481     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3482     RD->startDefinition();
3483     for (const auto &Pair : Privates) {
3484       const VarDecl *VD = Pair.second.Original;
3485       QualType Type = VD->getType().getNonReferenceType();
3486       // If the private variable is a local variable with lvalue ref type,
3487       // allocate the pointer instead of the pointee type.
3488       if (Pair.second.isLocalPrivate()) {
3489         if (VD->getType()->isLValueReferenceType())
3490           Type = C.getPointerType(Type);
3491         if (isAllocatableDecl(VD))
3492           Type = C.getPointerType(Type);
3493       }
3494       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3495       if (VD->hasAttrs()) {
3496         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3497              E(VD->getAttrs().end());
3498              I != E; ++I)
3499           FD->addAttr(*I);
3500       }
3501     }
3502     RD->completeDefinition();
3503     return RD;
3504   }
3505   return nullptr;
3506 }
3507 
3508 static RecordDecl *
3509 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3510                          QualType KmpInt32Ty,
3511                          QualType KmpRoutineEntryPointerQTy) {
3512   ASTContext &C = CGM.getContext();
3513   // Build struct kmp_task_t {
3514   //         void *              shareds;
3515   //         kmp_routine_entry_t routine;
3516   //         kmp_int32           part_id;
3517   //         kmp_cmplrdata_t data1;
3518   //         kmp_cmplrdata_t data2;
3519   // For taskloops additional fields:
3520   //         kmp_uint64          lb;
3521   //         kmp_uint64          ub;
3522   //         kmp_int64           st;
3523   //         kmp_int32           liter;
3524   //         void *              reductions;
3525   //       };
3526   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3527   UD->startDefinition();
3528   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3529   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3530   UD->completeDefinition();
3531   QualType KmpCmplrdataTy = C.getRecordType(UD);
3532   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3533   RD->startDefinition();
3534   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3535   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3536   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3537   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3538   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3539   if (isOpenMPTaskLoopDirective(Kind)) {
3540     QualType KmpUInt64Ty =
3541         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3542     QualType KmpInt64Ty =
3543         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3544     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3545     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3546     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3547     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3548     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3549   }
3550   RD->completeDefinition();
3551   return RD;
3552 }
3553 
3554 static RecordDecl *
3555 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3556                                      ArrayRef<PrivateDataTy> Privates) {
3557   ASTContext &C = CGM.getContext();
3558   // Build struct kmp_task_t_with_privates {
3559   //         kmp_task_t task_data;
3560   //         .kmp_privates_t. privates;
3561   //       };
3562   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3563   RD->startDefinition();
3564   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3565   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3566     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3567   RD->completeDefinition();
3568   return RD;
3569 }
3570 
3571 /// Emit a proxy function which accepts kmp_task_t as the second
3572 /// argument.
3573 /// \code
3574 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3575 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3576 ///   For taskloops:
3577 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3578 ///   tt->reductions, tt->shareds);
3579 ///   return 0;
3580 /// }
3581 /// \endcode
3582 static llvm::Function *
3583 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3584                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3585                       QualType KmpTaskTWithPrivatesPtrQTy,
3586                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3587                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3588                       llvm::Value *TaskPrivatesMap) {
3589   ASTContext &C = CGM.getContext();
3590   FunctionArgList Args;
3591   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3592                             ImplicitParamDecl::Other);
3593   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3594                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3595                                 ImplicitParamDecl::Other);
3596   Args.push_back(&GtidArg);
3597   Args.push_back(&TaskTypeArg);
3598   const auto &TaskEntryFnInfo =
3599       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3600   llvm::FunctionType *TaskEntryTy =
3601       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3602   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3603   auto *TaskEntry = llvm::Function::Create(
3604       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3605   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3606   TaskEntry->setDoesNotRecurse();
3607   CodeGenFunction CGF(CGM);
3608   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3609                     Loc, Loc);
3610 
3611   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3612   // tt,
3613   // For taskloops:
3614   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3615   // tt->task_data.shareds);
3616   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3617       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3618   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3619       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3620       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3621   const auto *KmpTaskTWithPrivatesQTyRD =
3622       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3623   LValue Base =
3624       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3625   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3626   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3627   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3628   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3629 
3630   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3631   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3632   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3633       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3634       CGF.ConvertTypeForMem(SharedsPtrTy));
3635 
3636   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3637   llvm::Value *PrivatesParam;
3638   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3639     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3640     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3641         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3642   } else {
3643     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3644   }
3645 
3646   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3647                                TaskPrivatesMap,
3648                                CGF.Builder
3649                                    .CreatePointerBitCastOrAddrSpaceCast(
3650                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3651                                    .getPointer()};
3652   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3653                                           std::end(CommonArgs));
3654   if (isOpenMPTaskLoopDirective(Kind)) {
3655     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3656     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3657     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3658     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3659     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3660     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3661     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3662     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3663     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3664     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3665     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3666     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3667     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3668     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3669     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3670     CallArgs.push_back(LBParam);
3671     CallArgs.push_back(UBParam);
3672     CallArgs.push_back(StParam);
3673     CallArgs.push_back(LIParam);
3674     CallArgs.push_back(RParam);
3675   }
3676   CallArgs.push_back(SharedsParam);
3677 
3678   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3679                                                   CallArgs);
3680   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3681                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3682   CGF.FinishFunction();
3683   return TaskEntry;
3684 }
3685 
3686 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3687                                             SourceLocation Loc,
3688                                             QualType KmpInt32Ty,
3689                                             QualType KmpTaskTWithPrivatesPtrQTy,
3690                                             QualType KmpTaskTWithPrivatesQTy) {
3691   ASTContext &C = CGM.getContext();
3692   FunctionArgList Args;
3693   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3694                             ImplicitParamDecl::Other);
3695   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3696                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3697                                 ImplicitParamDecl::Other);
3698   Args.push_back(&GtidArg);
3699   Args.push_back(&TaskTypeArg);
3700   const auto &DestructorFnInfo =
3701       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3702   llvm::FunctionType *DestructorFnTy =
3703       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3704   std::string Name =
3705       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3706   auto *DestructorFn =
3707       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3708                              Name, &CGM.getModule());
3709   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3710                                     DestructorFnInfo);
3711   DestructorFn->setDoesNotRecurse();
3712   CodeGenFunction CGF(CGM);
3713   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3714                     Args, Loc, Loc);
3715 
3716   LValue Base = CGF.EmitLoadOfPointerLValue(
3717       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3718       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3719   const auto *KmpTaskTWithPrivatesQTyRD =
3720       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3721   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3722   Base = CGF.EmitLValueForField(Base, *FI);
3723   for (const auto *Field :
3724        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3725     if (QualType::DestructionKind DtorKind =
3726             Field->getType().isDestructedType()) {
3727       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3728       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3729     }
3730   }
3731   CGF.FinishFunction();
3732   return DestructorFn;
3733 }
3734 
3735 /// Emit a privates mapping function for correct handling of private and
3736 /// firstprivate variables.
3737 /// \code
3738 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3739 /// **noalias priv1,...,  <tyn> **noalias privn) {
3740 ///   *priv1 = &.privates.priv1;
3741 ///   ...;
3742 ///   *privn = &.privates.privn;
3743 /// }
3744 /// \endcode
3745 static llvm::Value *
3746 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3747                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3748                                ArrayRef<PrivateDataTy> Privates) {
3749   ASTContext &C = CGM.getContext();
3750   FunctionArgList Args;
3751   ImplicitParamDecl TaskPrivatesArg(
3752       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3753       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3754       ImplicitParamDecl::Other);
3755   Args.push_back(&TaskPrivatesArg);
3756   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3757   unsigned Counter = 1;
3758   for (const Expr *E : Data.PrivateVars) {
3759     Args.push_back(ImplicitParamDecl::Create(
3760         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3761         C.getPointerType(C.getPointerType(E->getType()))
3762             .withConst()
3763             .withRestrict(),
3764         ImplicitParamDecl::Other));
3765     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3766     PrivateVarsPos[VD] = Counter;
3767     ++Counter;
3768   }
3769   for (const Expr *E : Data.FirstprivateVars) {
3770     Args.push_back(ImplicitParamDecl::Create(
3771         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3772         C.getPointerType(C.getPointerType(E->getType()))
3773             .withConst()
3774             .withRestrict(),
3775         ImplicitParamDecl::Other));
3776     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3777     PrivateVarsPos[VD] = Counter;
3778     ++Counter;
3779   }
3780   for (const Expr *E : Data.LastprivateVars) {
3781     Args.push_back(ImplicitParamDecl::Create(
3782         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3783         C.getPointerType(C.getPointerType(E->getType()))
3784             .withConst()
3785             .withRestrict(),
3786         ImplicitParamDecl::Other));
3787     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3788     PrivateVarsPos[VD] = Counter;
3789     ++Counter;
3790   }
3791   for (const VarDecl *VD : Data.PrivateLocals) {
3792     QualType Ty = VD->getType().getNonReferenceType();
3793     if (VD->getType()->isLValueReferenceType())
3794       Ty = C.getPointerType(Ty);
3795     if (isAllocatableDecl(VD))
3796       Ty = C.getPointerType(Ty);
3797     Args.push_back(ImplicitParamDecl::Create(
3798         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3799         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3800         ImplicitParamDecl::Other));
3801     PrivateVarsPos[VD] = Counter;
3802     ++Counter;
3803   }
3804   const auto &TaskPrivatesMapFnInfo =
3805       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3806   llvm::FunctionType *TaskPrivatesMapTy =
3807       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3808   std::string Name =
3809       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3810   auto *TaskPrivatesMap = llvm::Function::Create(
3811       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3812       &CGM.getModule());
3813   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3814                                     TaskPrivatesMapFnInfo);
3815   if (CGM.getLangOpts().Optimize) {
3816     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3817     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3818     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3819   }
3820   CodeGenFunction CGF(CGM);
3821   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3822                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3823 
3824   // *privi = &.privates.privi;
3825   LValue Base = CGF.EmitLoadOfPointerLValue(
3826       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3827       TaskPrivatesArg.getType()->castAs<PointerType>());
3828   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3829   Counter = 0;
3830   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3831     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3832     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3833     LValue RefLVal =
3834         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3835     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3836         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3837     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3838     ++Counter;
3839   }
3840   CGF.FinishFunction();
3841   return TaskPrivatesMap;
3842 }
3843 
3844 /// Emit initialization for private variables in task-based directives.
3845 static void emitPrivatesInit(CodeGenFunction &CGF,
3846                              const OMPExecutableDirective &D,
3847                              Address KmpTaskSharedsPtr, LValue TDBase,
3848                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3849                              QualType SharedsTy, QualType SharedsPtrTy,
3850                              const OMPTaskDataTy &Data,
3851                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3852   ASTContext &C = CGF.getContext();
3853   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3854   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3855   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3856                                  ? OMPD_taskloop
3857                                  : OMPD_task;
3858   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3859   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3860   LValue SrcBase;
3861   bool IsTargetTask =
3862       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3863       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3864   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3865   // PointersArray, SizesArray, and MappersArray. The original variables for
3866   // these arrays are not captured and we get their addresses explicitly.
3867   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3868       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3869     SrcBase = CGF.MakeAddrLValue(
3870         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3871             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3872         SharedsTy);
3873   }
3874   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3875   for (const PrivateDataTy &Pair : Privates) {
3876     // Do not initialize private locals.
3877     if (Pair.second.isLocalPrivate()) {
3878       ++FI;
3879       continue;
3880     }
3881     const VarDecl *VD = Pair.second.PrivateCopy;
3882     const Expr *Init = VD->getAnyInitializer();
3883     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3884                              !CGF.isTrivialInitializer(Init)))) {
3885       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3886       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3887         const VarDecl *OriginalVD = Pair.second.Original;
3888         // Check if the variable is the target-based BasePointersArray,
3889         // PointersArray, SizesArray, or MappersArray.
3890         LValue SharedRefLValue;
3891         QualType Type = PrivateLValue.getType();
3892         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3893         if (IsTargetTask && !SharedField) {
3894           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3895                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3896                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3897                          ->getNumParams() == 0 &&
3898                  isa<TranslationUnitDecl>(
3899                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3900                          ->getDeclContext()) &&
3901                  "Expected artificial target data variable.");
3902           SharedRefLValue =
3903               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3904         } else if (ForDup) {
3905           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3906           SharedRefLValue = CGF.MakeAddrLValue(
3907               Address(SharedRefLValue.getPointer(CGF),
3908                       C.getDeclAlign(OriginalVD)),
3909               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3910               SharedRefLValue.getTBAAInfo());
3911         } else if (CGF.LambdaCaptureFields.count(
3912                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3913                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3914           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3915         } else {
3916           // Processing for implicitly captured variables.
3917           InlinedOpenMPRegionRAII Region(
3918               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3919               /*HasCancel=*/false, /*NoInheritance=*/true);
3920           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3921         }
3922         if (Type->isArrayType()) {
3923           // Initialize firstprivate array.
3924           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3925             // Perform simple memcpy.
3926             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3927           } else {
3928             // Initialize firstprivate array using element-by-element
3929             // initialization.
3930             CGF.EmitOMPAggregateAssign(
3931                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3932                 Type,
3933                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3934                                                   Address SrcElement) {
3935                   // Clean up any temporaries needed by the initialization.
3936                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3937                   InitScope.addPrivate(
3938                       Elem, [SrcElement]() -> Address { return SrcElement; });
3939                   (void)InitScope.Privatize();
3940                   // Emit initialization for single element.
3941                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3942                       CGF, &CapturesInfo);
3943                   CGF.EmitAnyExprToMem(Init, DestElement,
3944                                        Init->getType().getQualifiers(),
3945                                        /*IsInitializer=*/false);
3946                 });
3947           }
3948         } else {
3949           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3950           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3951             return SharedRefLValue.getAddress(CGF);
3952           });
3953           (void)InitScope.Privatize();
3954           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3955           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3956                              /*capturedByInit=*/false);
3957         }
3958       } else {
3959         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3960       }
3961     }
3962     ++FI;
3963   }
3964 }
3965 
3966 /// Check if duplication function is required for taskloops.
3967 static bool checkInitIsRequired(CodeGenFunction &CGF,
3968                                 ArrayRef<PrivateDataTy> Privates) {
3969   bool InitRequired = false;
3970   for (const PrivateDataTy &Pair : Privates) {
3971     if (Pair.second.isLocalPrivate())
3972       continue;
3973     const VarDecl *VD = Pair.second.PrivateCopy;
3974     const Expr *Init = VD->getAnyInitializer();
3975     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3976                                     !CGF.isTrivialInitializer(Init));
3977     if (InitRequired)
3978       break;
3979   }
3980   return InitRequired;
3981 }
3982 
3983 
3984 /// Emit task_dup function (for initialization of
3985 /// private/firstprivate/lastprivate vars and last_iter flag)
3986 /// \code
3987 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3988 /// lastpriv) {
3989 /// // setup lastprivate flag
3990 ///    task_dst->last = lastpriv;
3991 /// // could be constructor calls here...
3992 /// }
3993 /// \endcode
3994 static llvm::Value *
3995 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3996                     const OMPExecutableDirective &D,
3997                     QualType KmpTaskTWithPrivatesPtrQTy,
3998                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3999                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4000                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4001                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4002   ASTContext &C = CGM.getContext();
4003   FunctionArgList Args;
4004   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4005                            KmpTaskTWithPrivatesPtrQTy,
4006                            ImplicitParamDecl::Other);
4007   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4008                            KmpTaskTWithPrivatesPtrQTy,
4009                            ImplicitParamDecl::Other);
4010   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4011                                 ImplicitParamDecl::Other);
4012   Args.push_back(&DstArg);
4013   Args.push_back(&SrcArg);
4014   Args.push_back(&LastprivArg);
4015   const auto &TaskDupFnInfo =
4016       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4017   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4018   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4019   auto *TaskDup = llvm::Function::Create(
4020       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4021   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4022   TaskDup->setDoesNotRecurse();
4023   CodeGenFunction CGF(CGM);
4024   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4025                     Loc);
4026 
4027   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4028       CGF.GetAddrOfLocalVar(&DstArg),
4029       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4030   // task_dst->liter = lastpriv;
4031   if (WithLastIter) {
4032     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4033     LValue Base = CGF.EmitLValueForField(
4034         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4035     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4036     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4037         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4038     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4039   }
4040 
4041   // Emit initial values for private copies (if any).
4042   assert(!Privates.empty());
4043   Address KmpTaskSharedsPtr = Address::invalid();
4044   if (!Data.FirstprivateVars.empty()) {
4045     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4046         CGF.GetAddrOfLocalVar(&SrcArg),
4047         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4048     LValue Base = CGF.EmitLValueForField(
4049         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4050     KmpTaskSharedsPtr = Address(
4051         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4052                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4053                                                   KmpTaskTShareds)),
4054                              Loc),
4055         CGM.getNaturalTypeAlignment(SharedsTy));
4056   }
4057   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4058                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4059   CGF.FinishFunction();
4060   return TaskDup;
4061 }
4062 
4063 /// Checks if destructor function is required to be generated.
4064 /// \return true if cleanups are required, false otherwise.
4065 static bool
4066 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4067                          ArrayRef<PrivateDataTy> Privates) {
4068   for (const PrivateDataTy &P : Privates) {
4069     if (P.second.isLocalPrivate())
4070       continue;
4071     QualType Ty = P.second.Original->getType().getNonReferenceType();
4072     if (Ty.isDestructedType())
4073       return true;
4074   }
4075   return false;
4076 }
4077 
4078 namespace {
4079 /// Loop generator for OpenMP iterator expression.
4080 class OMPIteratorGeneratorScope final
4081     : public CodeGenFunction::OMPPrivateScope {
4082   CodeGenFunction &CGF;
4083   const OMPIteratorExpr *E = nullptr;
4084   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4085   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4086   OMPIteratorGeneratorScope() = delete;
4087   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4088 
4089 public:
4090   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4091       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4092     if (!E)
4093       return;
4094     SmallVector<llvm::Value *, 4> Uppers;
4095     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4096       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4097       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4098       addPrivate(VD, [&CGF, VD]() {
4099         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4100       });
4101       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4102       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4103         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4104                                  "counter.addr");
4105       });
4106     }
4107     Privatize();
4108 
4109     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4110       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4111       LValue CLVal =
4112           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4113                              HelperData.CounterVD->getType());
4114       // Counter = 0;
4115       CGF.EmitStoreOfScalar(
4116           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4117           CLVal);
4118       CodeGenFunction::JumpDest &ContDest =
4119           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4120       CodeGenFunction::JumpDest &ExitDest =
4121           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4122       // N = <number-of_iterations>;
4123       llvm::Value *N = Uppers[I];
4124       // cont:
4125       // if (Counter < N) goto body; else goto exit;
4126       CGF.EmitBlock(ContDest.getBlock());
4127       auto *CVal =
4128           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4129       llvm::Value *Cmp =
4130           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4131               ? CGF.Builder.CreateICmpSLT(CVal, N)
4132               : CGF.Builder.CreateICmpULT(CVal, N);
4133       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4134       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4135       // body:
4136       CGF.EmitBlock(BodyBB);
4137       // Iteri = Begini + Counter * Stepi;
4138       CGF.EmitIgnoredExpr(HelperData.Update);
4139     }
4140   }
4141   ~OMPIteratorGeneratorScope() {
4142     if (!E)
4143       return;
4144     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4145       // Counter = Counter + 1;
4146       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4147       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4148       // goto cont;
4149       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4150       // exit:
4151       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4152     }
4153   }
4154 };
4155 } // namespace
4156 
4157 static std::pair<llvm::Value *, llvm::Value *>
4158 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4159   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4160   llvm::Value *Addr;
4161   if (OASE) {
4162     const Expr *Base = OASE->getBase();
4163     Addr = CGF.EmitScalarExpr(Base);
4164   } else {
4165     Addr = CGF.EmitLValue(E).getPointer(CGF);
4166   }
4167   llvm::Value *SizeVal;
4168   QualType Ty = E->getType();
4169   if (OASE) {
4170     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4171     for (const Expr *SE : OASE->getDimensions()) {
4172       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4173       Sz = CGF.EmitScalarConversion(
4174           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4175       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4176     }
4177   } else if (const auto *ASE =
4178                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4179     LValue UpAddrLVal =
4180         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4181     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4182     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4183         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4184     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4185     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4186     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4187   } else {
4188     SizeVal = CGF.getTypeSize(Ty);
4189   }
4190   return std::make_pair(Addr, SizeVal);
4191 }
4192 
4193 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4194 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4195   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4196   if (KmpTaskAffinityInfoTy.isNull()) {
4197     RecordDecl *KmpAffinityInfoRD =
4198         C.buildImplicitRecord("kmp_task_affinity_info_t");
4199     KmpAffinityInfoRD->startDefinition();
4200     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4201     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4202     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4203     KmpAffinityInfoRD->completeDefinition();
4204     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4205   }
4206 }
4207 
4208 CGOpenMPRuntime::TaskResultTy
4209 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4210                               const OMPExecutableDirective &D,
4211                               llvm::Function *TaskFunction, QualType SharedsTy,
4212                               Address Shareds, const OMPTaskDataTy &Data) {
4213   ASTContext &C = CGM.getContext();
4214   llvm::SmallVector<PrivateDataTy, 4> Privates;
4215   // Aggregate privates and sort them by the alignment.
4216   const auto *I = Data.PrivateCopies.begin();
4217   for (const Expr *E : Data.PrivateVars) {
4218     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4219     Privates.emplace_back(
4220         C.getDeclAlign(VD),
4221         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4222                          /*PrivateElemInit=*/nullptr));
4223     ++I;
4224   }
4225   I = Data.FirstprivateCopies.begin();
4226   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4227   for (const Expr *E : Data.FirstprivateVars) {
4228     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4229     Privates.emplace_back(
4230         C.getDeclAlign(VD),
4231         PrivateHelpersTy(
4232             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4233             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4234     ++I;
4235     ++IElemInitRef;
4236   }
4237   I = Data.LastprivateCopies.begin();
4238   for (const Expr *E : Data.LastprivateVars) {
4239     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4240     Privates.emplace_back(
4241         C.getDeclAlign(VD),
4242         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4243                          /*PrivateElemInit=*/nullptr));
4244     ++I;
4245   }
4246   for (const VarDecl *VD : Data.PrivateLocals) {
4247     if (isAllocatableDecl(VD))
4248       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4249     else
4250       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4251   }
4252   llvm::stable_sort(Privates,
4253                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4254                       return L.first > R.first;
4255                     });
4256   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4257   // Build type kmp_routine_entry_t (if not built yet).
4258   emitKmpRoutineEntryT(KmpInt32Ty);
4259   // Build type kmp_task_t (if not built yet).
4260   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4261     if (SavedKmpTaskloopTQTy.isNull()) {
4262       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4263           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4264     }
4265     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4266   } else {
4267     assert((D.getDirectiveKind() == OMPD_task ||
4268             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4269             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4270            "Expected taskloop, task or target directive");
4271     if (SavedKmpTaskTQTy.isNull()) {
4272       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4273           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4274     }
4275     KmpTaskTQTy = SavedKmpTaskTQTy;
4276   }
4277   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4278   // Build particular struct kmp_task_t for the given task.
4279   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4280       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4281   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4282   QualType KmpTaskTWithPrivatesPtrQTy =
4283       C.getPointerType(KmpTaskTWithPrivatesQTy);
4284   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4285   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4286       KmpTaskTWithPrivatesTy->getPointerTo();
4287   llvm::Value *KmpTaskTWithPrivatesTySize =
4288       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4289   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4290 
4291   // Emit initial values for private copies (if any).
4292   llvm::Value *TaskPrivatesMap = nullptr;
4293   llvm::Type *TaskPrivatesMapTy =
4294       std::next(TaskFunction->arg_begin(), 3)->getType();
4295   if (!Privates.empty()) {
4296     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4297     TaskPrivatesMap =
4298         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4299     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4300         TaskPrivatesMap, TaskPrivatesMapTy);
4301   } else {
4302     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4303         cast<llvm::PointerType>(TaskPrivatesMapTy));
4304   }
4305   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4306   // kmp_task_t *tt);
4307   llvm::Function *TaskEntry = emitProxyTaskFunction(
4308       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4309       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4310       TaskPrivatesMap);
4311 
4312   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4313   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4314   // kmp_routine_entry_t *task_entry);
4315   // Task flags. Format is taken from
4316   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4317   // description of kmp_tasking_flags struct.
4318   enum {
4319     TiedFlag = 0x1,
4320     FinalFlag = 0x2,
4321     DestructorsFlag = 0x8,
4322     PriorityFlag = 0x20,
4323     DetachableFlag = 0x40,
4324   };
4325   unsigned Flags = Data.Tied ? TiedFlag : 0;
4326   bool NeedsCleanup = false;
4327   if (!Privates.empty()) {
4328     NeedsCleanup =
4329         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4330     if (NeedsCleanup)
4331       Flags = Flags | DestructorsFlag;
4332   }
4333   if (Data.Priority.getInt())
4334     Flags = Flags | PriorityFlag;
4335   if (D.hasClausesOfKind<OMPDetachClause>())
4336     Flags = Flags | DetachableFlag;
4337   llvm::Value *TaskFlags =
4338       Data.Final.getPointer()
4339           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4340                                      CGF.Builder.getInt32(FinalFlag),
4341                                      CGF.Builder.getInt32(/*C=*/0))
4342           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4343   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4344   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4345   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4346       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4347       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4348           TaskEntry, KmpRoutineEntryPtrTy)};
4349   llvm::Value *NewTask;
4350   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4351     // Check if we have any device clause associated with the directive.
4352     const Expr *Device = nullptr;
4353     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4354       Device = C->getDevice();
4355     // Emit device ID if any otherwise use default value.
4356     llvm::Value *DeviceID;
4357     if (Device)
4358       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4359                                            CGF.Int64Ty, /*isSigned=*/true);
4360     else
4361       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4362     AllocArgs.push_back(DeviceID);
4363     NewTask = CGF.EmitRuntimeCall(
4364         OMPBuilder.getOrCreateRuntimeFunction(
4365             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4366         AllocArgs);
4367   } else {
4368     NewTask =
4369         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4370                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4371                             AllocArgs);
4372   }
4373   // Emit detach clause initialization.
4374   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4375   // task_descriptor);
4376   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4377     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4378     LValue EvtLVal = CGF.EmitLValue(Evt);
4379 
4380     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4381     // int gtid, kmp_task_t *task);
4382     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4383     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4384     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4385     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4386         OMPBuilder.getOrCreateRuntimeFunction(
4387             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4388         {Loc, Tid, NewTask});
4389     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4390                                       Evt->getExprLoc());
4391     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4392   }
4393   // Process affinity clauses.
4394   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4395     // Process list of affinity data.
4396     ASTContext &C = CGM.getContext();
4397     Address AffinitiesArray = Address::invalid();
4398     // Calculate number of elements to form the array of affinity data.
4399     llvm::Value *NumOfElements = nullptr;
4400     unsigned NumAffinities = 0;
4401     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4402       if (const Expr *Modifier = C->getModifier()) {
4403         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4404         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4405           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4406           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4407           NumOfElements =
4408               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4409         }
4410       } else {
4411         NumAffinities += C->varlist_size();
4412       }
4413     }
4414     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4415     // Fields ids in kmp_task_affinity_info record.
4416     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4417 
4418     QualType KmpTaskAffinityInfoArrayTy;
4419     if (NumOfElements) {
4420       NumOfElements = CGF.Builder.CreateNUWAdd(
4421           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4422       auto *OVE = new (C) OpaqueValueExpr(
4423           Loc,
4424           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4425           VK_PRValue);
4426       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4427                                                     RValue::get(NumOfElements));
4428       KmpTaskAffinityInfoArrayTy =
4429           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4430                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4431       // Properly emit variable-sized array.
4432       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4433                                            ImplicitParamDecl::Other);
4434       CGF.EmitVarDecl(*PD);
4435       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4436       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4437                                                 /*isSigned=*/false);
4438     } else {
4439       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4440           KmpTaskAffinityInfoTy,
4441           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4442           ArrayType::Normal, /*IndexTypeQuals=*/0);
4443       AffinitiesArray =
4444           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4445       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4446       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4447                                              /*isSigned=*/false);
4448     }
4449 
4450     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4451     // Fill array by elements without iterators.
4452     unsigned Pos = 0;
4453     bool HasIterator = false;
4454     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4455       if (C->getModifier()) {
4456         HasIterator = true;
4457         continue;
4458       }
4459       for (const Expr *E : C->varlists()) {
4460         llvm::Value *Addr;
4461         llvm::Value *Size;
4462         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4463         LValue Base =
4464             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4465                                KmpTaskAffinityInfoTy);
4466         // affs[i].base_addr = &<Affinities[i].second>;
4467         LValue BaseAddrLVal = CGF.EmitLValueForField(
4468             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4469         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4470                               BaseAddrLVal);
4471         // affs[i].len = sizeof(<Affinities[i].second>);
4472         LValue LenLVal = CGF.EmitLValueForField(
4473             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4474         CGF.EmitStoreOfScalar(Size, LenLVal);
4475         ++Pos;
4476       }
4477     }
4478     LValue PosLVal;
4479     if (HasIterator) {
4480       PosLVal = CGF.MakeAddrLValue(
4481           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4482           C.getSizeType());
4483       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4484     }
4485     // Process elements with iterators.
4486     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4487       const Expr *Modifier = C->getModifier();
4488       if (!Modifier)
4489         continue;
4490       OMPIteratorGeneratorScope IteratorScope(
4491           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4492       for (const Expr *E : C->varlists()) {
4493         llvm::Value *Addr;
4494         llvm::Value *Size;
4495         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4496         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4497         LValue Base = CGF.MakeAddrLValue(
4498             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4499         // affs[i].base_addr = &<Affinities[i].second>;
4500         LValue BaseAddrLVal = CGF.EmitLValueForField(
4501             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4502         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4503                               BaseAddrLVal);
4504         // affs[i].len = sizeof(<Affinities[i].second>);
4505         LValue LenLVal = CGF.EmitLValueForField(
4506             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4507         CGF.EmitStoreOfScalar(Size, LenLVal);
4508         Idx = CGF.Builder.CreateNUWAdd(
4509             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4510         CGF.EmitStoreOfScalar(Idx, PosLVal);
4511       }
4512     }
4513     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4514     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4515     // naffins, kmp_task_affinity_info_t *affin_list);
4516     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4517     llvm::Value *GTid = getThreadID(CGF, Loc);
4518     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4519         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4520     // FIXME: Emit the function and ignore its result for now unless the
4521     // runtime function is properly implemented.
4522     (void)CGF.EmitRuntimeCall(
4523         OMPBuilder.getOrCreateRuntimeFunction(
4524             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4525         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4526   }
4527   llvm::Value *NewTaskNewTaskTTy =
4528       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4529           NewTask, KmpTaskTWithPrivatesPtrTy);
4530   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4531                                                KmpTaskTWithPrivatesQTy);
4532   LValue TDBase =
4533       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4534   // Fill the data in the resulting kmp_task_t record.
4535   // Copy shareds if there are any.
4536   Address KmpTaskSharedsPtr = Address::invalid();
4537   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4538     KmpTaskSharedsPtr =
4539         Address(CGF.EmitLoadOfScalar(
4540                     CGF.EmitLValueForField(
4541                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4542                                            KmpTaskTShareds)),
4543                     Loc),
4544                 CGM.getNaturalTypeAlignment(SharedsTy));
4545     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4546     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4547     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4548   }
4549   // Emit initial values for private copies (if any).
4550   TaskResultTy Result;
4551   if (!Privates.empty()) {
4552     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4553                      SharedsTy, SharedsPtrTy, Data, Privates,
4554                      /*ForDup=*/false);
4555     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4556         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4557       Result.TaskDupFn = emitTaskDupFunction(
4558           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4559           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4560           /*WithLastIter=*/!Data.LastprivateVars.empty());
4561     }
4562   }
4563   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4564   enum { Priority = 0, Destructors = 1 };
4565   // Provide pointer to function with destructors for privates.
4566   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4567   const RecordDecl *KmpCmplrdataUD =
4568       (*FI)->getType()->getAsUnionType()->getDecl();
4569   if (NeedsCleanup) {
4570     llvm::Value *DestructorFn = emitDestructorsFunction(
4571         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4572         KmpTaskTWithPrivatesQTy);
4573     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4574     LValue DestructorsLV = CGF.EmitLValueForField(
4575         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4576     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4577                               DestructorFn, KmpRoutineEntryPtrTy),
4578                           DestructorsLV);
4579   }
4580   // Set priority.
4581   if (Data.Priority.getInt()) {
4582     LValue Data2LV = CGF.EmitLValueForField(
4583         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4584     LValue PriorityLV = CGF.EmitLValueForField(
4585         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4586     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4587   }
4588   Result.NewTask = NewTask;
4589   Result.TaskEntry = TaskEntry;
4590   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4591   Result.TDBase = TDBase;
4592   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4593   return Result;
4594 }
4595 
4596 namespace {
4597 /// Dependence kind for RTL.
4598 enum RTLDependenceKindTy {
4599   DepIn = 0x01,
4600   DepInOut = 0x3,
4601   DepMutexInOutSet = 0x4
4602 };
4603 /// Fields ids in kmp_depend_info record.
4604 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4605 } // namespace
4606 
4607 /// Translates internal dependency kind into the runtime kind.
4608 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4609   RTLDependenceKindTy DepKind;
4610   switch (K) {
4611   case OMPC_DEPEND_in:
4612     DepKind = DepIn;
4613     break;
4614   // Out and InOut dependencies must use the same code.
4615   case OMPC_DEPEND_out:
4616   case OMPC_DEPEND_inout:
4617     DepKind = DepInOut;
4618     break;
4619   case OMPC_DEPEND_mutexinoutset:
4620     DepKind = DepMutexInOutSet;
4621     break;
4622   case OMPC_DEPEND_source:
4623   case OMPC_DEPEND_sink:
4624   case OMPC_DEPEND_depobj:
4625   case OMPC_DEPEND_unknown:
4626     llvm_unreachable("Unknown task dependence type");
4627   }
4628   return DepKind;
4629 }
4630 
4631 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4632 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4633                            QualType &FlagsTy) {
4634   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4635   if (KmpDependInfoTy.isNull()) {
4636     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4637     KmpDependInfoRD->startDefinition();
4638     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4639     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4640     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4641     KmpDependInfoRD->completeDefinition();
4642     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4643   }
4644 }
4645 
4646 std::pair<llvm::Value *, LValue>
4647 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4648                                    SourceLocation Loc) {
4649   ASTContext &C = CGM.getContext();
4650   QualType FlagsTy;
4651   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4652   RecordDecl *KmpDependInfoRD =
4653       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4654   LValue Base = CGF.EmitLoadOfPointerLValue(
4655       DepobjLVal.getAddress(CGF),
4656       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4657   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4658   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4659           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4660   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4661                             Base.getTBAAInfo());
4662   Address DepObjAddr = CGF.Builder.CreateGEP(
4663       Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4664   LValue NumDepsBase = CGF.MakeAddrLValue(
4665       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4666   // NumDeps = deps[i].base_addr;
4667   LValue BaseAddrLVal = CGF.EmitLValueForField(
4668       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4669   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4670   return std::make_pair(NumDeps, Base);
4671 }
4672 
4673 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4674                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4675                            const OMPTaskDataTy::DependData &Data,
4676                            Address DependenciesArray) {
4677   CodeGenModule &CGM = CGF.CGM;
4678   ASTContext &C = CGM.getContext();
4679   QualType FlagsTy;
4680   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4681   RecordDecl *KmpDependInfoRD =
4682       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4683   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4684 
4685   OMPIteratorGeneratorScope IteratorScope(
4686       CGF, cast_or_null<OMPIteratorExpr>(
4687                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4688                                  : nullptr));
4689   for (const Expr *E : Data.DepExprs) {
4690     llvm::Value *Addr;
4691     llvm::Value *Size;
4692     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4693     LValue Base;
4694     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4695       Base = CGF.MakeAddrLValue(
4696           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4697     } else {
4698       LValue &PosLVal = *Pos.get<LValue *>();
4699       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4700       Base = CGF.MakeAddrLValue(
4701           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4702     }
4703     // deps[i].base_addr = &<Dependencies[i].second>;
4704     LValue BaseAddrLVal = CGF.EmitLValueForField(
4705         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4706     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4707                           BaseAddrLVal);
4708     // deps[i].len = sizeof(<Dependencies[i].second>);
4709     LValue LenLVal = CGF.EmitLValueForField(
4710         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4711     CGF.EmitStoreOfScalar(Size, LenLVal);
4712     // deps[i].flags = <Dependencies[i].first>;
4713     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4714     LValue FlagsLVal = CGF.EmitLValueForField(
4715         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4716     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4717                           FlagsLVal);
4718     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4719       ++(*P);
4720     } else {
4721       LValue &PosLVal = *Pos.get<LValue *>();
4722       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4723       Idx = CGF.Builder.CreateNUWAdd(Idx,
4724                                      llvm::ConstantInt::get(Idx->getType(), 1));
4725       CGF.EmitStoreOfScalar(Idx, PosLVal);
4726     }
4727   }
4728 }
4729 
4730 static SmallVector<llvm::Value *, 4>
4731 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4732                         const OMPTaskDataTy::DependData &Data) {
4733   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4734          "Expected depobj dependecy kind.");
4735   SmallVector<llvm::Value *, 4> Sizes;
4736   SmallVector<LValue, 4> SizeLVals;
4737   ASTContext &C = CGF.getContext();
4738   QualType FlagsTy;
4739   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4740   RecordDecl *KmpDependInfoRD =
4741       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4742   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4743   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4744   {
4745     OMPIteratorGeneratorScope IteratorScope(
4746         CGF, cast_or_null<OMPIteratorExpr>(
4747                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4748                                    : nullptr));
4749     for (const Expr *E : Data.DepExprs) {
4750       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4751       LValue Base = CGF.EmitLoadOfPointerLValue(
4752           DepobjLVal.getAddress(CGF),
4753           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4754       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4755           Base.getAddress(CGF), KmpDependInfoPtrT);
4756       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4757                                 Base.getTBAAInfo());
4758       Address DepObjAddr = CGF.Builder.CreateGEP(
4759           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4760       LValue NumDepsBase = CGF.MakeAddrLValue(
4761           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4762       // NumDeps = deps[i].base_addr;
4763       LValue BaseAddrLVal = CGF.EmitLValueForField(
4764           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4765       llvm::Value *NumDeps =
4766           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4767       LValue NumLVal = CGF.MakeAddrLValue(
4768           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4769           C.getUIntPtrType());
4770       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4771                               NumLVal.getAddress(CGF));
4772       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4773       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4774       CGF.EmitStoreOfScalar(Add, NumLVal);
4775       SizeLVals.push_back(NumLVal);
4776     }
4777   }
4778   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4779     llvm::Value *Size =
4780         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4781     Sizes.push_back(Size);
4782   }
4783   return Sizes;
4784 }
4785 
4786 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4787                                LValue PosLVal,
4788                                const OMPTaskDataTy::DependData &Data,
4789                                Address DependenciesArray) {
4790   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4791          "Expected depobj dependecy kind.");
4792   ASTContext &C = CGF.getContext();
4793   QualType FlagsTy;
4794   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4795   RecordDecl *KmpDependInfoRD =
4796       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4797   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4798   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4799   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4800   {
4801     OMPIteratorGeneratorScope IteratorScope(
4802         CGF, cast_or_null<OMPIteratorExpr>(
4803                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4804                                    : nullptr));
4805     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4806       const Expr *E = Data.DepExprs[I];
4807       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4808       LValue Base = CGF.EmitLoadOfPointerLValue(
4809           DepobjLVal.getAddress(CGF),
4810           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4811       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4812           Base.getAddress(CGF), KmpDependInfoPtrT);
4813       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4814                                 Base.getTBAAInfo());
4815 
4816       // Get number of elements in a single depobj.
4817       Address DepObjAddr = CGF.Builder.CreateGEP(
4818           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4819       LValue NumDepsBase = CGF.MakeAddrLValue(
4820           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4821       // NumDeps = deps[i].base_addr;
4822       LValue BaseAddrLVal = CGF.EmitLValueForField(
4823           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4824       llvm::Value *NumDeps =
4825           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4826 
4827       // memcopy dependency data.
4828       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4829           ElSize,
4830           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4831       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4832       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4833       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4834 
4835       // Increase pos.
4836       // pos += size;
4837       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4838       CGF.EmitStoreOfScalar(Add, PosLVal);
4839     }
4840   }
4841 }
4842 
4843 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4844     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4845     SourceLocation Loc) {
4846   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4847         return D.DepExprs.empty();
4848       }))
4849     return std::make_pair(nullptr, Address::invalid());
4850   // Process list of dependencies.
4851   ASTContext &C = CGM.getContext();
4852   Address DependenciesArray = Address::invalid();
4853   llvm::Value *NumOfElements = nullptr;
4854   unsigned NumDependencies = std::accumulate(
4855       Dependencies.begin(), Dependencies.end(), 0,
4856       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4857         return D.DepKind == OMPC_DEPEND_depobj
4858                    ? V
4859                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4860       });
4861   QualType FlagsTy;
4862   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4863   bool HasDepobjDeps = false;
4864   bool HasRegularWithIterators = false;
4865   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4866   llvm::Value *NumOfRegularWithIterators =
4867       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4868   // Calculate number of depobj dependecies and regular deps with the iterators.
4869   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4870     if (D.DepKind == OMPC_DEPEND_depobj) {
4871       SmallVector<llvm::Value *, 4> Sizes =
4872           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4873       for (llvm::Value *Size : Sizes) {
4874         NumOfDepobjElements =
4875             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4876       }
4877       HasDepobjDeps = true;
4878       continue;
4879     }
4880     // Include number of iterations, if any.
4881 
4882     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4883       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4884         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4885         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4886         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4887             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4888         NumOfRegularWithIterators =
4889             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4890       }
4891       HasRegularWithIterators = true;
4892       continue;
4893     }
4894   }
4895 
4896   QualType KmpDependInfoArrayTy;
4897   if (HasDepobjDeps || HasRegularWithIterators) {
4898     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4899                                            /*isSigned=*/false);
4900     if (HasDepobjDeps) {
4901       NumOfElements =
4902           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4903     }
4904     if (HasRegularWithIterators) {
4905       NumOfElements =
4906           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4907     }
4908     auto *OVE = new (C) OpaqueValueExpr(
4909         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4910         VK_PRValue);
4911     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4912                                                   RValue::get(NumOfElements));
4913     KmpDependInfoArrayTy =
4914         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4915                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4916     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4917     // Properly emit variable-sized array.
4918     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4919                                          ImplicitParamDecl::Other);
4920     CGF.EmitVarDecl(*PD);
4921     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4922     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4923                                               /*isSigned=*/false);
4924   } else {
4925     KmpDependInfoArrayTy = C.getConstantArrayType(
4926         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4927         ArrayType::Normal, /*IndexTypeQuals=*/0);
4928     DependenciesArray =
4929         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4930     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4931     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4932                                            /*isSigned=*/false);
4933   }
4934   unsigned Pos = 0;
4935   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4936     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4937         Dependencies[I].IteratorExpr)
4938       continue;
4939     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4940                    DependenciesArray);
4941   }
4942   // Copy regular dependecies with iterators.
4943   LValue PosLVal = CGF.MakeAddrLValue(
4944       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4945   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4946   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4947     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4948         !Dependencies[I].IteratorExpr)
4949       continue;
4950     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4951                    DependenciesArray);
4952   }
4953   // Copy final depobj arrays without iterators.
4954   if (HasDepobjDeps) {
4955     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4956       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4957         continue;
4958       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4959                          DependenciesArray);
4960     }
4961   }
4962   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4963       DependenciesArray, CGF.VoidPtrTy);
4964   return std::make_pair(NumOfElements, DependenciesArray);
4965 }
4966 
4967 Address CGOpenMPRuntime::emitDepobjDependClause(
4968     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4969     SourceLocation Loc) {
4970   if (Dependencies.DepExprs.empty())
4971     return Address::invalid();
4972   // Process list of dependencies.
4973   ASTContext &C = CGM.getContext();
4974   Address DependenciesArray = Address::invalid();
4975   unsigned NumDependencies = Dependencies.DepExprs.size();
4976   QualType FlagsTy;
4977   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4978   RecordDecl *KmpDependInfoRD =
4979       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4980 
4981   llvm::Value *Size;
4982   // Define type kmp_depend_info[<Dependencies.size()>];
4983   // For depobj reserve one extra element to store the number of elements.
4984   // It is required to handle depobj(x) update(in) construct.
4985   // kmp_depend_info[<Dependencies.size()>] deps;
4986   llvm::Value *NumDepsVal;
4987   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4988   if (const auto *IE =
4989           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4990     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4991     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4992       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4993       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4994       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4995     }
4996     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4997                                     NumDepsVal);
4998     CharUnits SizeInBytes =
4999         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5000     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5001     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5002     NumDepsVal =
5003         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5004   } else {
5005     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5006         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5007         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5008     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5009     Size = CGM.getSize(Sz.alignTo(Align));
5010     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5011   }
5012   // Need to allocate on the dynamic memory.
5013   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5014   // Use default allocator.
5015   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5016   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5017 
5018   llvm::Value *Addr =
5019       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5020                               CGM.getModule(), OMPRTL___kmpc_alloc),
5021                           Args, ".dep.arr.addr");
5022   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5023       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5024   DependenciesArray = Address(Addr, Align);
5025   // Write number of elements in the first element of array for depobj.
5026   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5027   // deps[i].base_addr = NumDependencies;
5028   LValue BaseAddrLVal = CGF.EmitLValueForField(
5029       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5030   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5031   llvm::PointerUnion<unsigned *, LValue *> Pos;
5032   unsigned Idx = 1;
5033   LValue PosLVal;
5034   if (Dependencies.IteratorExpr) {
5035     PosLVal = CGF.MakeAddrLValue(
5036         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5037         C.getSizeType());
5038     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5039                           /*IsInit=*/true);
5040     Pos = &PosLVal;
5041   } else {
5042     Pos = &Idx;
5043   }
5044   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5045   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5046       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5047   return DependenciesArray;
5048 }
5049 
5050 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5051                                         SourceLocation Loc) {
5052   ASTContext &C = CGM.getContext();
5053   QualType FlagsTy;
5054   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5055   LValue Base = CGF.EmitLoadOfPointerLValue(
5056       DepobjLVal.getAddress(CGF),
5057       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5058   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5059   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5060       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5061   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5062       Addr.getElementType(), Addr.getPointer(),
5063       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5064   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5065                                                                CGF.VoidPtrTy);
5066   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5067   // Use default allocator.
5068   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5069   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5070 
5071   // _kmpc_free(gtid, addr, nullptr);
5072   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5073                                 CGM.getModule(), OMPRTL___kmpc_free),
5074                             Args);
5075 }
5076 
5077 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5078                                        OpenMPDependClauseKind NewDepKind,
5079                                        SourceLocation Loc) {
5080   ASTContext &C = CGM.getContext();
5081   QualType FlagsTy;
5082   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5083   RecordDecl *KmpDependInfoRD =
5084       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5085   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5086   llvm::Value *NumDeps;
5087   LValue Base;
5088   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5089 
5090   Address Begin = Base.getAddress(CGF);
5091   // Cast from pointer to array type to pointer to single element.
5092   llvm::Value *End = CGF.Builder.CreateGEP(
5093       Begin.getElementType(), Begin.getPointer(), NumDeps);
5094   // The basic structure here is a while-do loop.
5095   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5096   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5097   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5098   CGF.EmitBlock(BodyBB);
5099   llvm::PHINode *ElementPHI =
5100       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5101   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5102   Begin = Address(ElementPHI, Begin.getAlignment());
5103   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5104                             Base.getTBAAInfo());
5105   // deps[i].flags = NewDepKind;
5106   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5107   LValue FlagsLVal = CGF.EmitLValueForField(
5108       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5109   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5110                         FlagsLVal);
5111 
5112   // Shift the address forward by one element.
5113   Address ElementNext =
5114       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5115   ElementPHI->addIncoming(ElementNext.getPointer(),
5116                           CGF.Builder.GetInsertBlock());
5117   llvm::Value *IsEmpty =
5118       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5119   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5120   // Done.
5121   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5122 }
5123 
5124 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5125                                    const OMPExecutableDirective &D,
5126                                    llvm::Function *TaskFunction,
5127                                    QualType SharedsTy, Address Shareds,
5128                                    const Expr *IfCond,
5129                                    const OMPTaskDataTy &Data) {
5130   if (!CGF.HaveInsertPoint())
5131     return;
5132 
5133   TaskResultTy Result =
5134       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5135   llvm::Value *NewTask = Result.NewTask;
5136   llvm::Function *TaskEntry = Result.TaskEntry;
5137   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5138   LValue TDBase = Result.TDBase;
5139   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5140   // Process list of dependences.
5141   Address DependenciesArray = Address::invalid();
5142   llvm::Value *NumOfElements;
5143   std::tie(NumOfElements, DependenciesArray) =
5144       emitDependClause(CGF, Data.Dependences, Loc);
5145 
5146   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5147   // libcall.
5148   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5149   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5150   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5151   // list is not empty
5152   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5153   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5154   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5155   llvm::Value *DepTaskArgs[7];
5156   if (!Data.Dependences.empty()) {
5157     DepTaskArgs[0] = UpLoc;
5158     DepTaskArgs[1] = ThreadID;
5159     DepTaskArgs[2] = NewTask;
5160     DepTaskArgs[3] = NumOfElements;
5161     DepTaskArgs[4] = DependenciesArray.getPointer();
5162     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5163     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5164   }
5165   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5166                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5167     if (!Data.Tied) {
5168       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5169       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5170       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5171     }
5172     if (!Data.Dependences.empty()) {
5173       CGF.EmitRuntimeCall(
5174           OMPBuilder.getOrCreateRuntimeFunction(
5175               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5176           DepTaskArgs);
5177     } else {
5178       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5179                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5180                           TaskArgs);
5181     }
5182     // Check if parent region is untied and build return for untied task;
5183     if (auto *Region =
5184             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5185       Region->emitUntiedSwitch(CGF);
5186   };
5187 
5188   llvm::Value *DepWaitTaskArgs[6];
5189   if (!Data.Dependences.empty()) {
5190     DepWaitTaskArgs[0] = UpLoc;
5191     DepWaitTaskArgs[1] = ThreadID;
5192     DepWaitTaskArgs[2] = NumOfElements;
5193     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5194     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5195     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5196   }
5197   auto &M = CGM.getModule();
5198   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5199                         TaskEntry, &Data, &DepWaitTaskArgs,
5200                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5201     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5202     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5203     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5204     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5205     // is specified.
5206     if (!Data.Dependences.empty())
5207       CGF.EmitRuntimeCall(
5208           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5209           DepWaitTaskArgs);
5210     // Call proxy_task_entry(gtid, new_task);
5211     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5212                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5213       Action.Enter(CGF);
5214       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5215       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5216                                                           OutlinedFnArgs);
5217     };
5218 
5219     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5220     // kmp_task_t *new_task);
5221     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5222     // kmp_task_t *new_task);
5223     RegionCodeGenTy RCG(CodeGen);
5224     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5225                               M, OMPRTL___kmpc_omp_task_begin_if0),
5226                           TaskArgs,
5227                           OMPBuilder.getOrCreateRuntimeFunction(
5228                               M, OMPRTL___kmpc_omp_task_complete_if0),
5229                           TaskArgs);
5230     RCG.setAction(Action);
5231     RCG(CGF);
5232   };
5233 
5234   if (IfCond) {
5235     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5236   } else {
5237     RegionCodeGenTy ThenRCG(ThenCodeGen);
5238     ThenRCG(CGF);
5239   }
5240 }
5241 
5242 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5243                                        const OMPLoopDirective &D,
5244                                        llvm::Function *TaskFunction,
5245                                        QualType SharedsTy, Address Shareds,
5246                                        const Expr *IfCond,
5247                                        const OMPTaskDataTy &Data) {
5248   if (!CGF.HaveInsertPoint())
5249     return;
5250   TaskResultTy Result =
5251       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5252   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5253   // libcall.
5254   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5255   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5256   // sched, kmp_uint64 grainsize, void *task_dup);
5257   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5258   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5259   llvm::Value *IfVal;
5260   if (IfCond) {
5261     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5262                                       /*isSigned=*/true);
5263   } else {
5264     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5265   }
5266 
5267   LValue LBLVal = CGF.EmitLValueForField(
5268       Result.TDBase,
5269       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5270   const auto *LBVar =
5271       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5272   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5273                        LBLVal.getQuals(),
5274                        /*IsInitializer=*/true);
5275   LValue UBLVal = CGF.EmitLValueForField(
5276       Result.TDBase,
5277       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5278   const auto *UBVar =
5279       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5280   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5281                        UBLVal.getQuals(),
5282                        /*IsInitializer=*/true);
5283   LValue StLVal = CGF.EmitLValueForField(
5284       Result.TDBase,
5285       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5286   const auto *StVar =
5287       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5288   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5289                        StLVal.getQuals(),
5290                        /*IsInitializer=*/true);
5291   // Store reductions address.
5292   LValue RedLVal = CGF.EmitLValueForField(
5293       Result.TDBase,
5294       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5295   if (Data.Reductions) {
5296     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5297   } else {
5298     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5299                                CGF.getContext().VoidPtrTy);
5300   }
5301   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5302   llvm::Value *TaskArgs[] = {
5303       UpLoc,
5304       ThreadID,
5305       Result.NewTask,
5306       IfVal,
5307       LBLVal.getPointer(CGF),
5308       UBLVal.getPointer(CGF),
5309       CGF.EmitLoadOfScalar(StLVal, Loc),
5310       llvm::ConstantInt::getSigned(
5311           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5312       llvm::ConstantInt::getSigned(
5313           CGF.IntTy, Data.Schedule.getPointer()
5314                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5315                          : NoSchedule),
5316       Data.Schedule.getPointer()
5317           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5318                                       /*isSigned=*/false)
5319           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5320       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5321                              Result.TaskDupFn, CGF.VoidPtrTy)
5322                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5323   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5324                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5325                       TaskArgs);
5326 }
5327 
5328 /// Emit reduction operation for each element of array (required for
5329 /// array sections) LHS op = RHS.
5330 /// \param Type Type of array.
5331 /// \param LHSVar Variable on the left side of the reduction operation
5332 /// (references element of array in original variable).
5333 /// \param RHSVar Variable on the right side of the reduction operation
5334 /// (references element of array in original variable).
5335 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5336 /// RHSVar.
5337 static void EmitOMPAggregateReduction(
5338     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5339     const VarDecl *RHSVar,
5340     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5341                                   const Expr *, const Expr *)> &RedOpGen,
5342     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5343     const Expr *UpExpr = nullptr) {
5344   // Perform element-by-element initialization.
5345   QualType ElementTy;
5346   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5347   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5348 
5349   // Drill down to the base element type on both arrays.
5350   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5351   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5352 
5353   llvm::Value *RHSBegin = RHSAddr.getPointer();
5354   llvm::Value *LHSBegin = LHSAddr.getPointer();
5355   // Cast from pointer to array type to pointer to single element.
5356   llvm::Value *LHSEnd =
5357       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5358   // The basic structure here is a while-do loop.
5359   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5360   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5361   llvm::Value *IsEmpty =
5362       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5363   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5364 
5365   // Enter the loop body, making that address the current address.
5366   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5367   CGF.EmitBlock(BodyBB);
5368 
5369   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5370 
5371   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5372       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5373   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5374   Address RHSElementCurrent =
5375       Address(RHSElementPHI,
5376               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5377 
5378   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5379       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5380   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5381   Address LHSElementCurrent =
5382       Address(LHSElementPHI,
5383               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5384 
5385   // Emit copy.
5386   CodeGenFunction::OMPPrivateScope Scope(CGF);
5387   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5388   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5389   Scope.Privatize();
5390   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5391   Scope.ForceCleanup();
5392 
5393   // Shift the address forward by one element.
5394   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5395       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5396       "omp.arraycpy.dest.element");
5397   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5398       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5399       "omp.arraycpy.src.element");
5400   // Check whether we've reached the end.
5401   llvm::Value *Done =
5402       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5403   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5404   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5405   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5406 
5407   // Done.
5408   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5409 }
5410 
5411 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5412 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5413 /// UDR combiner function.
5414 static void emitReductionCombiner(CodeGenFunction &CGF,
5415                                   const Expr *ReductionOp) {
5416   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5417     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5418       if (const auto *DRE =
5419               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5420         if (const auto *DRD =
5421                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5422           std::pair<llvm::Function *, llvm::Function *> Reduction =
5423               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5424           RValue Func = RValue::get(Reduction.first);
5425           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5426           CGF.EmitIgnoredExpr(ReductionOp);
5427           return;
5428         }
5429   CGF.EmitIgnoredExpr(ReductionOp);
5430 }
5431 
5432 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5433     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5434     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5435     ArrayRef<const Expr *> ReductionOps) {
5436   ASTContext &C = CGM.getContext();
5437 
5438   // void reduction_func(void *LHSArg, void *RHSArg);
5439   FunctionArgList Args;
5440   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5441                            ImplicitParamDecl::Other);
5442   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5443                            ImplicitParamDecl::Other);
5444   Args.push_back(&LHSArg);
5445   Args.push_back(&RHSArg);
5446   const auto &CGFI =
5447       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5448   std::string Name = getName({"omp", "reduction", "reduction_func"});
5449   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5450                                     llvm::GlobalValue::InternalLinkage, Name,
5451                                     &CGM.getModule());
5452   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5453   Fn->setDoesNotRecurse();
5454   CodeGenFunction CGF(CGM);
5455   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5456 
5457   // Dst = (void*[n])(LHSArg);
5458   // Src = (void*[n])(RHSArg);
5459   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5460       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5461       ArgsType), CGF.getPointerAlign());
5462   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5463       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5464       ArgsType), CGF.getPointerAlign());
5465 
5466   //  ...
5467   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5468   //  ...
5469   CodeGenFunction::OMPPrivateScope Scope(CGF);
5470   auto IPriv = Privates.begin();
5471   unsigned Idx = 0;
5472   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5473     const auto *RHSVar =
5474         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5475     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5476       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5477     });
5478     const auto *LHSVar =
5479         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5480     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5481       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5482     });
5483     QualType PrivTy = (*IPriv)->getType();
5484     if (PrivTy->isVariablyModifiedType()) {
5485       // Get array size and emit VLA type.
5486       ++Idx;
5487       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5488       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5489       const VariableArrayType *VLA =
5490           CGF.getContext().getAsVariableArrayType(PrivTy);
5491       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5492       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5493           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5494       CGF.EmitVariablyModifiedType(PrivTy);
5495     }
5496   }
5497   Scope.Privatize();
5498   IPriv = Privates.begin();
5499   auto ILHS = LHSExprs.begin();
5500   auto IRHS = RHSExprs.begin();
5501   for (const Expr *E : ReductionOps) {
5502     if ((*IPriv)->getType()->isArrayType()) {
5503       // Emit reduction for array section.
5504       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5505       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5506       EmitOMPAggregateReduction(
5507           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5508           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5509             emitReductionCombiner(CGF, E);
5510           });
5511     } else {
5512       // Emit reduction for array subscript or single variable.
5513       emitReductionCombiner(CGF, E);
5514     }
5515     ++IPriv;
5516     ++ILHS;
5517     ++IRHS;
5518   }
5519   Scope.ForceCleanup();
5520   CGF.FinishFunction();
5521   return Fn;
5522 }
5523 
5524 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5525                                                   const Expr *ReductionOp,
5526                                                   const Expr *PrivateRef,
5527                                                   const DeclRefExpr *LHS,
5528                                                   const DeclRefExpr *RHS) {
5529   if (PrivateRef->getType()->isArrayType()) {
5530     // Emit reduction for array section.
5531     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5532     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5533     EmitOMPAggregateReduction(
5534         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5535         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5536           emitReductionCombiner(CGF, ReductionOp);
5537         });
5538   } else {
5539     // Emit reduction for array subscript or single variable.
5540     emitReductionCombiner(CGF, ReductionOp);
5541   }
5542 }
5543 
5544 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5545                                     ArrayRef<const Expr *> Privates,
5546                                     ArrayRef<const Expr *> LHSExprs,
5547                                     ArrayRef<const Expr *> RHSExprs,
5548                                     ArrayRef<const Expr *> ReductionOps,
5549                                     ReductionOptionsTy Options) {
5550   if (!CGF.HaveInsertPoint())
5551     return;
5552 
5553   bool WithNowait = Options.WithNowait;
5554   bool SimpleReduction = Options.SimpleReduction;
5555 
5556   // Next code should be emitted for reduction:
5557   //
5558   // static kmp_critical_name lock = { 0 };
5559   //
5560   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5561   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5562   //  ...
5563   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5564   //  *(Type<n>-1*)rhs[<n>-1]);
5565   // }
5566   //
5567   // ...
5568   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5569   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5570   // RedList, reduce_func, &<lock>)) {
5571   // case 1:
5572   //  ...
5573   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5574   //  ...
5575   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5576   // break;
5577   // case 2:
5578   //  ...
5579   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5580   //  ...
5581   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5582   // break;
5583   // default:;
5584   // }
5585   //
5586   // if SimpleReduction is true, only the next code is generated:
5587   //  ...
5588   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5589   //  ...
5590 
5591   ASTContext &C = CGM.getContext();
5592 
5593   if (SimpleReduction) {
5594     CodeGenFunction::RunCleanupsScope Scope(CGF);
5595     auto IPriv = Privates.begin();
5596     auto ILHS = LHSExprs.begin();
5597     auto IRHS = RHSExprs.begin();
5598     for (const Expr *E : ReductionOps) {
5599       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5600                                   cast<DeclRefExpr>(*IRHS));
5601       ++IPriv;
5602       ++ILHS;
5603       ++IRHS;
5604     }
5605     return;
5606   }
5607 
5608   // 1. Build a list of reduction variables.
5609   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5610   auto Size = RHSExprs.size();
5611   for (const Expr *E : Privates) {
5612     if (E->getType()->isVariablyModifiedType())
5613       // Reserve place for array size.
5614       ++Size;
5615   }
5616   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5617   QualType ReductionArrayTy =
5618       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5619                              /*IndexTypeQuals=*/0);
5620   Address ReductionList =
5621       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5622   auto IPriv = Privates.begin();
5623   unsigned Idx = 0;
5624   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5625     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5626     CGF.Builder.CreateStore(
5627         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5628             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5629         Elem);
5630     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5631       // Store array size.
5632       ++Idx;
5633       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5634       llvm::Value *Size = CGF.Builder.CreateIntCast(
5635           CGF.getVLASize(
5636                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5637               .NumElts,
5638           CGF.SizeTy, /*isSigned=*/false);
5639       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5640                               Elem);
5641     }
5642   }
5643 
5644   // 2. Emit reduce_func().
5645   llvm::Function *ReductionFn = emitReductionFunction(
5646       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5647       LHSExprs, RHSExprs, ReductionOps);
5648 
5649   // 3. Create static kmp_critical_name lock = { 0 };
5650   std::string Name = getName({"reduction"});
5651   llvm::Value *Lock = getCriticalRegionLock(Name);
5652 
5653   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5654   // RedList, reduce_func, &<lock>);
5655   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5656   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5657   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5658   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5659       ReductionList.getPointer(), CGF.VoidPtrTy);
5660   llvm::Value *Args[] = {
5661       IdentTLoc,                             // ident_t *<loc>
5662       ThreadId,                              // i32 <gtid>
5663       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5664       ReductionArrayTySize,                  // size_type sizeof(RedList)
5665       RL,                                    // void *RedList
5666       ReductionFn, // void (*) (void *, void *) <reduce_func>
5667       Lock         // kmp_critical_name *&<lock>
5668   };
5669   llvm::Value *Res = CGF.EmitRuntimeCall(
5670       OMPBuilder.getOrCreateRuntimeFunction(
5671           CGM.getModule(),
5672           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5673       Args);
5674 
5675   // 5. Build switch(res)
5676   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5677   llvm::SwitchInst *SwInst =
5678       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5679 
5680   // 6. Build case 1:
5681   //  ...
5682   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5683   //  ...
5684   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685   // break;
5686   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5687   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5688   CGF.EmitBlock(Case1BB);
5689 
5690   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5691   llvm::Value *EndArgs[] = {
5692       IdentTLoc, // ident_t *<loc>
5693       ThreadId,  // i32 <gtid>
5694       Lock       // kmp_critical_name *&<lock>
5695   };
5696   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5697                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5698     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5699     auto IPriv = Privates.begin();
5700     auto ILHS = LHSExprs.begin();
5701     auto IRHS = RHSExprs.begin();
5702     for (const Expr *E : ReductionOps) {
5703       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5704                                      cast<DeclRefExpr>(*IRHS));
5705       ++IPriv;
5706       ++ILHS;
5707       ++IRHS;
5708     }
5709   };
5710   RegionCodeGenTy RCG(CodeGen);
5711   CommonActionTy Action(
5712       nullptr, llvm::None,
5713       OMPBuilder.getOrCreateRuntimeFunction(
5714           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5715                                       : OMPRTL___kmpc_end_reduce),
5716       EndArgs);
5717   RCG.setAction(Action);
5718   RCG(CGF);
5719 
5720   CGF.EmitBranch(DefaultBB);
5721 
5722   // 7. Build case 2:
5723   //  ...
5724   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5725   //  ...
5726   // break;
5727   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5728   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5729   CGF.EmitBlock(Case2BB);
5730 
5731   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5732                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5733     auto ILHS = LHSExprs.begin();
5734     auto IRHS = RHSExprs.begin();
5735     auto IPriv = Privates.begin();
5736     for (const Expr *E : ReductionOps) {
5737       const Expr *XExpr = nullptr;
5738       const Expr *EExpr = nullptr;
5739       const Expr *UpExpr = nullptr;
5740       BinaryOperatorKind BO = BO_Comma;
5741       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5742         if (BO->getOpcode() == BO_Assign) {
5743           XExpr = BO->getLHS();
5744           UpExpr = BO->getRHS();
5745         }
5746       }
5747       // Try to emit update expression as a simple atomic.
5748       const Expr *RHSExpr = UpExpr;
5749       if (RHSExpr) {
5750         // Analyze RHS part of the whole expression.
5751         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5752                 RHSExpr->IgnoreParenImpCasts())) {
5753           // If this is a conditional operator, analyze its condition for
5754           // min/max reduction operator.
5755           RHSExpr = ACO->getCond();
5756         }
5757         if (const auto *BORHS =
5758                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5759           EExpr = BORHS->getRHS();
5760           BO = BORHS->getOpcode();
5761         }
5762       }
5763       if (XExpr) {
5764         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5765         auto &&AtomicRedGen = [BO, VD,
5766                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5767                                     const Expr *EExpr, const Expr *UpExpr) {
5768           LValue X = CGF.EmitLValue(XExpr);
5769           RValue E;
5770           if (EExpr)
5771             E = CGF.EmitAnyExpr(EExpr);
5772           CGF.EmitOMPAtomicSimpleUpdateExpr(
5773               X, E, BO, /*IsXLHSInRHSPart=*/true,
5774               llvm::AtomicOrdering::Monotonic, Loc,
5775               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5776                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5777                 PrivateScope.addPrivate(
5778                     VD, [&CGF, VD, XRValue, Loc]() {
5779                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5780                       CGF.emitOMPSimpleStore(
5781                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5782                           VD->getType().getNonReferenceType(), Loc);
5783                       return LHSTemp;
5784                     });
5785                 (void)PrivateScope.Privatize();
5786                 return CGF.EmitAnyExpr(UpExpr);
5787               });
5788         };
5789         if ((*IPriv)->getType()->isArrayType()) {
5790           // Emit atomic reduction for array section.
5791           const auto *RHSVar =
5792               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5793           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5794                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5795         } else {
5796           // Emit atomic reduction for array subscript or single variable.
5797           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5798         }
5799       } else {
5800         // Emit as a critical region.
5801         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5802                                            const Expr *, const Expr *) {
5803           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5804           std::string Name = RT.getName({"atomic_reduction"});
5805           RT.emitCriticalRegion(
5806               CGF, Name,
5807               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5808                 Action.Enter(CGF);
5809                 emitReductionCombiner(CGF, E);
5810               },
5811               Loc);
5812         };
5813         if ((*IPriv)->getType()->isArrayType()) {
5814           const auto *LHSVar =
5815               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5816           const auto *RHSVar =
5817               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5818           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5819                                     CritRedGen);
5820         } else {
5821           CritRedGen(CGF, nullptr, nullptr, nullptr);
5822         }
5823       }
5824       ++ILHS;
5825       ++IRHS;
5826       ++IPriv;
5827     }
5828   };
5829   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5830   if (!WithNowait) {
5831     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5832     llvm::Value *EndArgs[] = {
5833         IdentTLoc, // ident_t *<loc>
5834         ThreadId,  // i32 <gtid>
5835         Lock       // kmp_critical_name *&<lock>
5836     };
5837     CommonActionTy Action(nullptr, llvm::None,
5838                           OMPBuilder.getOrCreateRuntimeFunction(
5839                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5840                           EndArgs);
5841     AtomicRCG.setAction(Action);
5842     AtomicRCG(CGF);
5843   } else {
5844     AtomicRCG(CGF);
5845   }
5846 
5847   CGF.EmitBranch(DefaultBB);
5848   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5849 }
5850 
5851 /// Generates unique name for artificial threadprivate variables.
5852 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5853 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5854                                       const Expr *Ref) {
5855   SmallString<256> Buffer;
5856   llvm::raw_svector_ostream Out(Buffer);
5857   const clang::DeclRefExpr *DE;
5858   const VarDecl *D = ::getBaseDecl(Ref, DE);
5859   if (!D)
5860     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5861   D = D->getCanonicalDecl();
5862   std::string Name = CGM.getOpenMPRuntime().getName(
5863       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5864   Out << Prefix << Name << "_"
5865       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5866   return std::string(Out.str());
5867 }
5868 
5869 /// Emits reduction initializer function:
5870 /// \code
5871 /// void @.red_init(void* %arg, void* %orig) {
5872 /// %0 = bitcast void* %arg to <type>*
5873 /// store <type> <init>, <type>* %0
5874 /// ret void
5875 /// }
5876 /// \endcode
5877 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5878                                            SourceLocation Loc,
5879                                            ReductionCodeGen &RCG, unsigned N) {
5880   ASTContext &C = CGM.getContext();
5881   QualType VoidPtrTy = C.VoidPtrTy;
5882   VoidPtrTy.addRestrict();
5883   FunctionArgList Args;
5884   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5885                           ImplicitParamDecl::Other);
5886   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5887                               ImplicitParamDecl::Other);
5888   Args.emplace_back(&Param);
5889   Args.emplace_back(&ParamOrig);
5890   const auto &FnInfo =
5891       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5892   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5893   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5894   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5895                                     Name, &CGM.getModule());
5896   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5897   Fn->setDoesNotRecurse();
5898   CodeGenFunction CGF(CGM);
5899   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5900   Address PrivateAddr = CGF.EmitLoadOfPointer(
5901       CGF.GetAddrOfLocalVar(&Param),
5902       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5903   llvm::Value *Size = nullptr;
5904   // If the size of the reduction item is non-constant, load it from global
5905   // threadprivate variable.
5906   if (RCG.getSizes(N).second) {
5907     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5908         CGF, CGM.getContext().getSizeType(),
5909         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5910     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5911                                 CGM.getContext().getSizeType(), Loc);
5912   }
5913   RCG.emitAggregateType(CGF, N, Size);
5914   Address OrigAddr = Address::invalid();
5915   // If initializer uses initializer from declare reduction construct, emit a
5916   // pointer to the address of the original reduction item (reuired by reduction
5917   // initializer)
5918   if (RCG.usesReductionInitializer(N)) {
5919     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5920     OrigAddr = CGF.EmitLoadOfPointer(
5921         SharedAddr,
5922         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5923   }
5924   // Emit the initializer:
5925   // %0 = bitcast void* %arg to <type>*
5926   // store <type> <init>, <type>* %0
5927   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5928                          [](CodeGenFunction &) { return false; });
5929   CGF.FinishFunction();
5930   return Fn;
5931 }
5932 
5933 /// Emits reduction combiner function:
5934 /// \code
5935 /// void @.red_comb(void* %arg0, void* %arg1) {
5936 /// %lhs = bitcast void* %arg0 to <type>*
5937 /// %rhs = bitcast void* %arg1 to <type>*
5938 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5939 /// store <type> %2, <type>* %lhs
5940 /// ret void
5941 /// }
5942 /// \endcode
5943 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5944                                            SourceLocation Loc,
5945                                            ReductionCodeGen &RCG, unsigned N,
5946                                            const Expr *ReductionOp,
5947                                            const Expr *LHS, const Expr *RHS,
5948                                            const Expr *PrivateRef) {
5949   ASTContext &C = CGM.getContext();
5950   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5951   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5952   FunctionArgList Args;
5953   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5954                                C.VoidPtrTy, ImplicitParamDecl::Other);
5955   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5956                             ImplicitParamDecl::Other);
5957   Args.emplace_back(&ParamInOut);
5958   Args.emplace_back(&ParamIn);
5959   const auto &FnInfo =
5960       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5961   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5962   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5963   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5964                                     Name, &CGM.getModule());
5965   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5966   Fn->setDoesNotRecurse();
5967   CodeGenFunction CGF(CGM);
5968   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5969   llvm::Value *Size = nullptr;
5970   // If the size of the reduction item is non-constant, load it from global
5971   // threadprivate variable.
5972   if (RCG.getSizes(N).second) {
5973     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5974         CGF, CGM.getContext().getSizeType(),
5975         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5976     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5977                                 CGM.getContext().getSizeType(), Loc);
5978   }
5979   RCG.emitAggregateType(CGF, N, Size);
5980   // Remap lhs and rhs variables to the addresses of the function arguments.
5981   // %lhs = bitcast void* %arg0 to <type>*
5982   // %rhs = bitcast void* %arg1 to <type>*
5983   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5984   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5985     // Pull out the pointer to the variable.
5986     Address PtrAddr = CGF.EmitLoadOfPointer(
5987         CGF.GetAddrOfLocalVar(&ParamInOut),
5988         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5989     return CGF.Builder.CreateElementBitCast(
5990         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5991   });
5992   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5993     // Pull out the pointer to the variable.
5994     Address PtrAddr = CGF.EmitLoadOfPointer(
5995         CGF.GetAddrOfLocalVar(&ParamIn),
5996         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5997     return CGF.Builder.CreateElementBitCast(
5998         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5999   });
6000   PrivateScope.Privatize();
6001   // Emit the combiner body:
6002   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6003   // store <type> %2, <type>* %lhs
6004   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6005       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6006       cast<DeclRefExpr>(RHS));
6007   CGF.FinishFunction();
6008   return Fn;
6009 }
6010 
6011 /// Emits reduction finalizer function:
6012 /// \code
6013 /// void @.red_fini(void* %arg) {
6014 /// %0 = bitcast void* %arg to <type>*
6015 /// <destroy>(<type>* %0)
6016 /// ret void
6017 /// }
6018 /// \endcode
6019 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6020                                            SourceLocation Loc,
6021                                            ReductionCodeGen &RCG, unsigned N) {
6022   if (!RCG.needCleanups(N))
6023     return nullptr;
6024   ASTContext &C = CGM.getContext();
6025   FunctionArgList Args;
6026   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6027                           ImplicitParamDecl::Other);
6028   Args.emplace_back(&Param);
6029   const auto &FnInfo =
6030       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6031   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6032   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6033   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6034                                     Name, &CGM.getModule());
6035   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6036   Fn->setDoesNotRecurse();
6037   CodeGenFunction CGF(CGM);
6038   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6039   Address PrivateAddr = CGF.EmitLoadOfPointer(
6040       CGF.GetAddrOfLocalVar(&Param),
6041       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6042   llvm::Value *Size = nullptr;
6043   // If the size of the reduction item is non-constant, load it from global
6044   // threadprivate variable.
6045   if (RCG.getSizes(N).second) {
6046     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6047         CGF, CGM.getContext().getSizeType(),
6048         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6049     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6050                                 CGM.getContext().getSizeType(), Loc);
6051   }
6052   RCG.emitAggregateType(CGF, N, Size);
6053   // Emit the finalizer body:
6054   // <destroy>(<type>* %0)
6055   RCG.emitCleanups(CGF, N, PrivateAddr);
6056   CGF.FinishFunction(Loc);
6057   return Fn;
6058 }
6059 
6060 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6061     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6062     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6063   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6064     return nullptr;
6065 
6066   // Build typedef struct:
6067   // kmp_taskred_input {
6068   //   void *reduce_shar; // shared reduction item
6069   //   void *reduce_orig; // original reduction item used for initialization
6070   //   size_t reduce_size; // size of data item
6071   //   void *reduce_init; // data initialization routine
6072   //   void *reduce_fini; // data finalization routine
6073   //   void *reduce_comb; // data combiner routine
6074   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6075   // } kmp_taskred_input_t;
6076   ASTContext &C = CGM.getContext();
6077   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6078   RD->startDefinition();
6079   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6080   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6081   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6082   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6084   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6085   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6086       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6087   RD->completeDefinition();
6088   QualType RDType = C.getRecordType(RD);
6089   unsigned Size = Data.ReductionVars.size();
6090   llvm::APInt ArraySize(/*numBits=*/64, Size);
6091   QualType ArrayRDType = C.getConstantArrayType(
6092       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6093   // kmp_task_red_input_t .rd_input.[Size];
6094   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6095   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6096                        Data.ReductionCopies, Data.ReductionOps);
6097   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6098     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6099     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6100                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6101     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6102         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6103         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6104         ".rd_input.gep.");
6105     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6106     // ElemLVal.reduce_shar = &Shareds[Cnt];
6107     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6108     RCG.emitSharedOrigLValue(CGF, Cnt);
6109     llvm::Value *CastedShared =
6110         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6111     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6112     // ElemLVal.reduce_orig = &Origs[Cnt];
6113     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6114     llvm::Value *CastedOrig =
6115         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6116     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6117     RCG.emitAggregateType(CGF, Cnt);
6118     llvm::Value *SizeValInChars;
6119     llvm::Value *SizeVal;
6120     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6121     // We use delayed creation/initialization for VLAs and array sections. It is
6122     // required because runtime does not provide the way to pass the sizes of
6123     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6124     // threadprivate global variables are used to store these values and use
6125     // them in the functions.
6126     bool DelayedCreation = !!SizeVal;
6127     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6128                                                /*isSigned=*/false);
6129     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6130     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6131     // ElemLVal.reduce_init = init;
6132     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6133     llvm::Value *InitAddr =
6134         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6135     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6136     // ElemLVal.reduce_fini = fini;
6137     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6138     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6139     llvm::Value *FiniAddr = Fini
6140                                 ? CGF.EmitCastToVoidPtr(Fini)
6141                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6142     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6143     // ElemLVal.reduce_comb = comb;
6144     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6145     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6146         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6147         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6148     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6149     // ElemLVal.flags = 0;
6150     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6151     if (DelayedCreation) {
6152       CGF.EmitStoreOfScalar(
6153           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6154           FlagsLVal);
6155     } else
6156       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6157                                  FlagsLVal.getType());
6158   }
6159   if (Data.IsReductionWithTaskMod) {
6160     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6161     // is_ws, int num, void *data);
6162     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6163     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6164                                                   CGM.IntTy, /*isSigned=*/true);
6165     llvm::Value *Args[] = {
6166         IdentTLoc, GTid,
6167         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6168                                /*isSigned=*/true),
6169         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6170         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6171             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6172     return CGF.EmitRuntimeCall(
6173         OMPBuilder.getOrCreateRuntimeFunction(
6174             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6175         Args);
6176   }
6177   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6178   llvm::Value *Args[] = {
6179       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6180                                 /*isSigned=*/true),
6181       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6182       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6183                                                       CGM.VoidPtrTy)};
6184   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6185                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6186                              Args);
6187 }
6188 
6189 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6190                                             SourceLocation Loc,
6191                                             bool IsWorksharingReduction) {
6192   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6193   // is_ws, int num, void *data);
6194   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6195   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6196                                                 CGM.IntTy, /*isSigned=*/true);
6197   llvm::Value *Args[] = {IdentTLoc, GTid,
6198                          llvm::ConstantInt::get(CGM.IntTy,
6199                                                 IsWorksharingReduction ? 1 : 0,
6200                                                 /*isSigned=*/true)};
6201   (void)CGF.EmitRuntimeCall(
6202       OMPBuilder.getOrCreateRuntimeFunction(
6203           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6204       Args);
6205 }
6206 
6207 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6208                                               SourceLocation Loc,
6209                                               ReductionCodeGen &RCG,
6210                                               unsigned N) {
6211   auto Sizes = RCG.getSizes(N);
6212   // Emit threadprivate global variable if the type is non-constant
6213   // (Sizes.second = nullptr).
6214   if (Sizes.second) {
6215     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6216                                                      /*isSigned=*/false);
6217     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6218         CGF, CGM.getContext().getSizeType(),
6219         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6220     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6221   }
6222 }
6223 
6224 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6225                                               SourceLocation Loc,
6226                                               llvm::Value *ReductionsPtr,
6227                                               LValue SharedLVal) {
6228   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6229   // *d);
6230   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6231                                                    CGM.IntTy,
6232                                                    /*isSigned=*/true),
6233                          ReductionsPtr,
6234                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6235                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6236   return Address(
6237       CGF.EmitRuntimeCall(
6238           OMPBuilder.getOrCreateRuntimeFunction(
6239               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6240           Args),
6241       SharedLVal.getAlignment());
6242 }
6243 
6244 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6245                                        const OMPTaskDataTy &Data) {
6246   if (!CGF.HaveInsertPoint())
6247     return;
6248 
6249   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6250     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6251     OMPBuilder.createTaskwait(CGF.Builder);
6252   } else {
6253     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6254     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6255     auto &M = CGM.getModule();
6256     Address DependenciesArray = Address::invalid();
6257     llvm::Value *NumOfElements;
6258     std::tie(NumOfElements, DependenciesArray) =
6259         emitDependClause(CGF, Data.Dependences, Loc);
6260     llvm::Value *DepWaitTaskArgs[6];
6261     if (!Data.Dependences.empty()) {
6262       DepWaitTaskArgs[0] = UpLoc;
6263       DepWaitTaskArgs[1] = ThreadID;
6264       DepWaitTaskArgs[2] = NumOfElements;
6265       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6266       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6267       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6268 
6269       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6270 
6271       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6272       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6273       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6274       // is specified.
6275       CGF.EmitRuntimeCall(
6276           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6277           DepWaitTaskArgs);
6278 
6279     } else {
6280 
6281       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6282       // global_tid);
6283       llvm::Value *Args[] = {UpLoc, ThreadID};
6284       // Ignore return result until untied tasks are supported.
6285       CGF.EmitRuntimeCall(
6286           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6287           Args);
6288     }
6289   }
6290 
6291   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6292     Region->emitUntiedSwitch(CGF);
6293 }
6294 
6295 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6296                                            OpenMPDirectiveKind InnerKind,
6297                                            const RegionCodeGenTy &CodeGen,
6298                                            bool HasCancel) {
6299   if (!CGF.HaveInsertPoint())
6300     return;
6301   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6302                                  InnerKind != OMPD_critical &&
6303                                      InnerKind != OMPD_master &&
6304                                      InnerKind != OMPD_masked);
6305   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6306 }
6307 
6308 namespace {
6309 enum RTCancelKind {
6310   CancelNoreq = 0,
6311   CancelParallel = 1,
6312   CancelLoop = 2,
6313   CancelSections = 3,
6314   CancelTaskgroup = 4
6315 };
6316 } // anonymous namespace
6317 
6318 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6319   RTCancelKind CancelKind = CancelNoreq;
6320   if (CancelRegion == OMPD_parallel)
6321     CancelKind = CancelParallel;
6322   else if (CancelRegion == OMPD_for)
6323     CancelKind = CancelLoop;
6324   else if (CancelRegion == OMPD_sections)
6325     CancelKind = CancelSections;
6326   else {
6327     assert(CancelRegion == OMPD_taskgroup);
6328     CancelKind = CancelTaskgroup;
6329   }
6330   return CancelKind;
6331 }
6332 
6333 void CGOpenMPRuntime::emitCancellationPointCall(
6334     CodeGenFunction &CGF, SourceLocation Loc,
6335     OpenMPDirectiveKind CancelRegion) {
6336   if (!CGF.HaveInsertPoint())
6337     return;
6338   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6339   // global_tid, kmp_int32 cncl_kind);
6340   if (auto *OMPRegionInfo =
6341           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6342     // For 'cancellation point taskgroup', the task region info may not have a
6343     // cancel. This may instead happen in another adjacent task.
6344     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6345       llvm::Value *Args[] = {
6346           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6347           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6348       // Ignore return result until untied tasks are supported.
6349       llvm::Value *Result = CGF.EmitRuntimeCall(
6350           OMPBuilder.getOrCreateRuntimeFunction(
6351               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6352           Args);
6353       // if (__kmpc_cancellationpoint()) {
6354       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6355       //   exit from construct;
6356       // }
6357       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6358       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6359       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6360       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6361       CGF.EmitBlock(ExitBB);
6362       if (CancelRegion == OMPD_parallel)
6363         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6364       // exit from construct;
6365       CodeGenFunction::JumpDest CancelDest =
6366           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6367       CGF.EmitBranchThroughCleanup(CancelDest);
6368       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6369     }
6370   }
6371 }
6372 
6373 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6374                                      const Expr *IfCond,
6375                                      OpenMPDirectiveKind CancelRegion) {
6376   if (!CGF.HaveInsertPoint())
6377     return;
6378   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6379   // kmp_int32 cncl_kind);
6380   auto &M = CGM.getModule();
6381   if (auto *OMPRegionInfo =
6382           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6383     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6384                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6385       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6386       llvm::Value *Args[] = {
6387           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6388           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6389       // Ignore return result until untied tasks are supported.
6390       llvm::Value *Result = CGF.EmitRuntimeCall(
6391           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6392       // if (__kmpc_cancel()) {
6393       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6394       //   exit from construct;
6395       // }
6396       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6397       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6398       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6399       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6400       CGF.EmitBlock(ExitBB);
6401       if (CancelRegion == OMPD_parallel)
6402         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6403       // exit from construct;
6404       CodeGenFunction::JumpDest CancelDest =
6405           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6406       CGF.EmitBranchThroughCleanup(CancelDest);
6407       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6408     };
6409     if (IfCond) {
6410       emitIfClause(CGF, IfCond, ThenGen,
6411                    [](CodeGenFunction &, PrePostActionTy &) {});
6412     } else {
6413       RegionCodeGenTy ThenRCG(ThenGen);
6414       ThenRCG(CGF);
6415     }
6416   }
6417 }
6418 
6419 namespace {
6420 /// Cleanup action for uses_allocators support.
6421 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6422   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6423 
6424 public:
6425   OMPUsesAllocatorsActionTy(
6426       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6427       : Allocators(Allocators) {}
6428   void Enter(CodeGenFunction &CGF) override {
6429     if (!CGF.HaveInsertPoint())
6430       return;
6431     for (const auto &AllocatorData : Allocators) {
6432       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6433           CGF, AllocatorData.first, AllocatorData.second);
6434     }
6435   }
6436   void Exit(CodeGenFunction &CGF) override {
6437     if (!CGF.HaveInsertPoint())
6438       return;
6439     for (const auto &AllocatorData : Allocators) {
6440       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6441                                                         AllocatorData.first);
6442     }
6443   }
6444 };
6445 } // namespace
6446 
6447 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6448     const OMPExecutableDirective &D, StringRef ParentName,
6449     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6450     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6451   assert(!ParentName.empty() && "Invalid target region parent name!");
6452   HasEmittedTargetRegion = true;
6453   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6454   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6455     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6456       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6457       if (!D.AllocatorTraits)
6458         continue;
6459       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6460     }
6461   }
6462   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6463   CodeGen.setAction(UsesAllocatorAction);
6464   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6465                                    IsOffloadEntry, CodeGen);
6466 }
6467 
6468 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6469                                              const Expr *Allocator,
6470                                              const Expr *AllocatorTraits) {
6471   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6472   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6473   // Use default memspace handle.
6474   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6475   llvm::Value *NumTraits = llvm::ConstantInt::get(
6476       CGF.IntTy, cast<ConstantArrayType>(
6477                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6478                      ->getSize()
6479                      .getLimitedValue());
6480   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6481   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6482       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6483   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6484                                            AllocatorTraitsLVal.getBaseInfo(),
6485                                            AllocatorTraitsLVal.getTBAAInfo());
6486   llvm::Value *Traits =
6487       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6488 
6489   llvm::Value *AllocatorVal =
6490       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6491                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6492                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6493   // Store to allocator.
6494   CGF.EmitVarDecl(*cast<VarDecl>(
6495       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6496   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6497   AllocatorVal =
6498       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6499                                Allocator->getType(), Allocator->getExprLoc());
6500   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6501 }
6502 
6503 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6504                                              const Expr *Allocator) {
6505   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6506   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6507   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6508   llvm::Value *AllocatorVal =
6509       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6510   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6511                                           CGF.getContext().VoidPtrTy,
6512                                           Allocator->getExprLoc());
6513   (void)CGF.EmitRuntimeCall(
6514       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6515                                             OMPRTL___kmpc_destroy_allocator),
6516       {ThreadId, AllocatorVal});
6517 }
6518 
6519 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6520     const OMPExecutableDirective &D, StringRef ParentName,
6521     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6522     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6523   // Create a unique name for the entry function using the source location
6524   // information of the current target region. The name will be something like:
6525   //
6526   // __omp_offloading_DD_FFFF_PP_lBB
6527   //
6528   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6529   // mangled name of the function that encloses the target region and BB is the
6530   // line number of the target region.
6531 
6532   unsigned DeviceID;
6533   unsigned FileID;
6534   unsigned Line;
6535   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6536                            Line);
6537   SmallString<64> EntryFnName;
6538   {
6539     llvm::raw_svector_ostream OS(EntryFnName);
6540     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6541        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6542   }
6543 
6544   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6545 
6546   CodeGenFunction CGF(CGM, true);
6547   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6548   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6549 
6550   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6551 
6552   // If this target outline function is not an offload entry, we don't need to
6553   // register it.
6554   if (!IsOffloadEntry)
6555     return;
6556 
6557   // The target region ID is used by the runtime library to identify the current
6558   // target region, so it only has to be unique and not necessarily point to
6559   // anything. It could be the pointer to the outlined function that implements
6560   // the target region, but we aren't using that so that the compiler doesn't
6561   // need to keep that, and could therefore inline the host function if proven
6562   // worthwhile during optimization. In the other hand, if emitting code for the
6563   // device, the ID has to be the function address so that it can retrieved from
6564   // the offloading entry and launched by the runtime library. We also mark the
6565   // outlined function to have external linkage in case we are emitting code for
6566   // the device, because these functions will be entry points to the device.
6567 
6568   if (CGM.getLangOpts().OpenMPIsDevice) {
6569     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6570     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6571     OutlinedFn->setDSOLocal(false);
6572     if (CGM.getTriple().isAMDGCN())
6573       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6574   } else {
6575     std::string Name = getName({EntryFnName, "region_id"});
6576     OutlinedFnID = new llvm::GlobalVariable(
6577         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6578         llvm::GlobalValue::WeakAnyLinkage,
6579         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6580   }
6581 
6582   // Register the information for the entry associated with this target region.
6583   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6584       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6585       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6586 
6587   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6588   int32_t DefaultValTeams = -1;
6589   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6590   if (DefaultValTeams > 0) {
6591     OutlinedFn->addFnAttr("omp_target_num_teams",
6592                           std::to_string(DefaultValTeams));
6593   }
6594   int32_t DefaultValThreads = -1;
6595   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6596   if (DefaultValThreads > 0) {
6597     OutlinedFn->addFnAttr("omp_target_thread_limit",
6598                           std::to_string(DefaultValThreads));
6599   }
6600 
6601   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6602 }
6603 
6604 /// Checks if the expression is constant or does not have non-trivial function
6605 /// calls.
6606 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6607   // We can skip constant expressions.
6608   // We can skip expressions with trivial calls or simple expressions.
6609   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6610           !E->hasNonTrivialCall(Ctx)) &&
6611          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6612 }
6613 
6614 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6615                                                     const Stmt *Body) {
6616   const Stmt *Child = Body->IgnoreContainers();
6617   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6618     Child = nullptr;
6619     for (const Stmt *S : C->body()) {
6620       if (const auto *E = dyn_cast<Expr>(S)) {
6621         if (isTrivial(Ctx, E))
6622           continue;
6623       }
6624       // Some of the statements can be ignored.
6625       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6626           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6627         continue;
6628       // Analyze declarations.
6629       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6630         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6631               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6632                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6633                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6634                   isa<UsingDirectiveDecl>(D) ||
6635                   isa<OMPDeclareReductionDecl>(D) ||
6636                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6637                 return true;
6638               const auto *VD = dyn_cast<VarDecl>(D);
6639               if (!VD)
6640                 return false;
6641               return VD->hasGlobalStorage() || !VD->isUsed();
6642             }))
6643           continue;
6644       }
6645       // Found multiple children - cannot get the one child only.
6646       if (Child)
6647         return nullptr;
6648       Child = S;
6649     }
6650     if (Child)
6651       Child = Child->IgnoreContainers();
6652   }
6653   return Child;
6654 }
6655 
6656 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6657     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6658     int32_t &DefaultVal) {
6659 
6660   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6661   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6662          "Expected target-based executable directive.");
6663   switch (DirectiveKind) {
6664   case OMPD_target: {
6665     const auto *CS = D.getInnermostCapturedStmt();
6666     const auto *Body =
6667         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6668     const Stmt *ChildStmt =
6669         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6670     if (const auto *NestedDir =
6671             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6672       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6673         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6674           const Expr *NumTeams =
6675               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6676           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6677             if (auto Constant =
6678                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6679               DefaultVal = Constant->getExtValue();
6680           return NumTeams;
6681         }
6682         DefaultVal = 0;
6683         return nullptr;
6684       }
6685       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6686           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6687         DefaultVal = 1;
6688         return nullptr;
6689       }
6690       DefaultVal = 1;
6691       return nullptr;
6692     }
6693     // A value of -1 is used to check if we need to emit no teams region
6694     DefaultVal = -1;
6695     return nullptr;
6696   }
6697   case OMPD_target_teams:
6698   case OMPD_target_teams_distribute:
6699   case OMPD_target_teams_distribute_simd:
6700   case OMPD_target_teams_distribute_parallel_for:
6701   case OMPD_target_teams_distribute_parallel_for_simd: {
6702     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6703       const Expr *NumTeams =
6704           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6705       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6706         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6707           DefaultVal = Constant->getExtValue();
6708       return NumTeams;
6709     }
6710     DefaultVal = 0;
6711     return nullptr;
6712   }
6713   case OMPD_target_parallel:
6714   case OMPD_target_parallel_for:
6715   case OMPD_target_parallel_for_simd:
6716   case OMPD_target_simd:
6717     DefaultVal = 1;
6718     return nullptr;
6719   case OMPD_parallel:
6720   case OMPD_for:
6721   case OMPD_parallel_for:
6722   case OMPD_parallel_master:
6723   case OMPD_parallel_sections:
6724   case OMPD_for_simd:
6725   case OMPD_parallel_for_simd:
6726   case OMPD_cancel:
6727   case OMPD_cancellation_point:
6728   case OMPD_ordered:
6729   case OMPD_threadprivate:
6730   case OMPD_allocate:
6731   case OMPD_task:
6732   case OMPD_simd:
6733   case OMPD_tile:
6734   case OMPD_unroll:
6735   case OMPD_sections:
6736   case OMPD_section:
6737   case OMPD_single:
6738   case OMPD_master:
6739   case OMPD_critical:
6740   case OMPD_taskyield:
6741   case OMPD_barrier:
6742   case OMPD_taskwait:
6743   case OMPD_taskgroup:
6744   case OMPD_atomic:
6745   case OMPD_flush:
6746   case OMPD_depobj:
6747   case OMPD_scan:
6748   case OMPD_teams:
6749   case OMPD_target_data:
6750   case OMPD_target_exit_data:
6751   case OMPD_target_enter_data:
6752   case OMPD_distribute:
6753   case OMPD_distribute_simd:
6754   case OMPD_distribute_parallel_for:
6755   case OMPD_distribute_parallel_for_simd:
6756   case OMPD_teams_distribute:
6757   case OMPD_teams_distribute_simd:
6758   case OMPD_teams_distribute_parallel_for:
6759   case OMPD_teams_distribute_parallel_for_simd:
6760   case OMPD_target_update:
6761   case OMPD_declare_simd:
6762   case OMPD_declare_variant:
6763   case OMPD_begin_declare_variant:
6764   case OMPD_end_declare_variant:
6765   case OMPD_declare_target:
6766   case OMPD_end_declare_target:
6767   case OMPD_declare_reduction:
6768   case OMPD_declare_mapper:
6769   case OMPD_taskloop:
6770   case OMPD_taskloop_simd:
6771   case OMPD_master_taskloop:
6772   case OMPD_master_taskloop_simd:
6773   case OMPD_parallel_master_taskloop:
6774   case OMPD_parallel_master_taskloop_simd:
6775   case OMPD_requires:
6776   case OMPD_metadirective:
6777   case OMPD_unknown:
6778     break;
6779   default:
6780     break;
6781   }
6782   llvm_unreachable("Unexpected directive kind.");
6783 }
6784 
6785 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6786     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6787   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6788          "Clauses associated with the teams directive expected to be emitted "
6789          "only for the host!");
6790   CGBuilderTy &Bld = CGF.Builder;
6791   int32_t DefaultNT = -1;
6792   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6793   if (NumTeams != nullptr) {
6794     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6795 
6796     switch (DirectiveKind) {
6797     case OMPD_target: {
6798       const auto *CS = D.getInnermostCapturedStmt();
6799       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6800       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6801       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6802                                                   /*IgnoreResultAssign*/ true);
6803       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6804                              /*isSigned=*/true);
6805     }
6806     case OMPD_target_teams:
6807     case OMPD_target_teams_distribute:
6808     case OMPD_target_teams_distribute_simd:
6809     case OMPD_target_teams_distribute_parallel_for:
6810     case OMPD_target_teams_distribute_parallel_for_simd: {
6811       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6812       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6813                                                   /*IgnoreResultAssign*/ true);
6814       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6815                              /*isSigned=*/true);
6816     }
6817     default:
6818       break;
6819     }
6820   } else if (DefaultNT == -1) {
6821     return nullptr;
6822   }
6823 
6824   return Bld.getInt32(DefaultNT);
6825 }
6826 
6827 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6828                                   llvm::Value *DefaultThreadLimitVal) {
6829   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6830       CGF.getContext(), CS->getCapturedStmt());
6831   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6832     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6833       llvm::Value *NumThreads = nullptr;
6834       llvm::Value *CondVal = nullptr;
6835       // Handle if clause. If if clause present, the number of threads is
6836       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6837       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6838         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6839         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6840         const OMPIfClause *IfClause = nullptr;
6841         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6842           if (C->getNameModifier() == OMPD_unknown ||
6843               C->getNameModifier() == OMPD_parallel) {
6844             IfClause = C;
6845             break;
6846           }
6847         }
6848         if (IfClause) {
6849           const Expr *Cond = IfClause->getCondition();
6850           bool Result;
6851           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6852             if (!Result)
6853               return CGF.Builder.getInt32(1);
6854           } else {
6855             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6856             if (const auto *PreInit =
6857                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6858               for (const auto *I : PreInit->decls()) {
6859                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6860                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6861                 } else {
6862                   CodeGenFunction::AutoVarEmission Emission =
6863                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6864                   CGF.EmitAutoVarCleanups(Emission);
6865                 }
6866               }
6867             }
6868             CondVal = CGF.EvaluateExprAsBool(Cond);
6869           }
6870         }
6871       }
6872       // Check the value of num_threads clause iff if clause was not specified
6873       // or is not evaluated to false.
6874       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6875         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6876         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6877         const auto *NumThreadsClause =
6878             Dir->getSingleClause<OMPNumThreadsClause>();
6879         CodeGenFunction::LexicalScope Scope(
6880             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6881         if (const auto *PreInit =
6882                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6883           for (const auto *I : PreInit->decls()) {
6884             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6885               CGF.EmitVarDecl(cast<VarDecl>(*I));
6886             } else {
6887               CodeGenFunction::AutoVarEmission Emission =
6888                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6889               CGF.EmitAutoVarCleanups(Emission);
6890             }
6891           }
6892         }
6893         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6894         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6895                                                /*isSigned=*/false);
6896         if (DefaultThreadLimitVal)
6897           NumThreads = CGF.Builder.CreateSelect(
6898               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6899               DefaultThreadLimitVal, NumThreads);
6900       } else {
6901         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6902                                            : CGF.Builder.getInt32(0);
6903       }
6904       // Process condition of the if clause.
6905       if (CondVal) {
6906         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6907                                               CGF.Builder.getInt32(1));
6908       }
6909       return NumThreads;
6910     }
6911     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6912       return CGF.Builder.getInt32(1);
6913     return DefaultThreadLimitVal;
6914   }
6915   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6916                                : CGF.Builder.getInt32(0);
6917 }
6918 
6919 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6920     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6921     int32_t &DefaultVal) {
6922   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6923   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6924          "Expected target-based executable directive.");
6925 
6926   switch (DirectiveKind) {
6927   case OMPD_target:
6928     // Teams have no clause thread_limit
6929     return nullptr;
6930   case OMPD_target_teams:
6931   case OMPD_target_teams_distribute:
6932     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6933       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6934       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6935       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6936         if (auto Constant =
6937                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6938           DefaultVal = Constant->getExtValue();
6939       return ThreadLimit;
6940     }
6941     return nullptr;
6942   case OMPD_target_parallel:
6943   case OMPD_target_parallel_for:
6944   case OMPD_target_parallel_for_simd:
6945   case OMPD_target_teams_distribute_parallel_for:
6946   case OMPD_target_teams_distribute_parallel_for_simd: {
6947     Expr *ThreadLimit = nullptr;
6948     Expr *NumThreads = nullptr;
6949     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6950       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6951       ThreadLimit = ThreadLimitClause->getThreadLimit();
6952       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6953         if (auto Constant =
6954                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6955           DefaultVal = Constant->getExtValue();
6956     }
6957     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6958       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6959       NumThreads = NumThreadsClause->getNumThreads();
6960       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6961         if (auto Constant =
6962                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6963           if (Constant->getExtValue() < DefaultVal) {
6964             DefaultVal = Constant->getExtValue();
6965             ThreadLimit = NumThreads;
6966           }
6967         }
6968       }
6969     }
6970     return ThreadLimit;
6971   }
6972   case OMPD_target_teams_distribute_simd:
6973   case OMPD_target_simd:
6974     DefaultVal = 1;
6975     return nullptr;
6976   case OMPD_parallel:
6977   case OMPD_for:
6978   case OMPD_parallel_for:
6979   case OMPD_parallel_master:
6980   case OMPD_parallel_sections:
6981   case OMPD_for_simd:
6982   case OMPD_parallel_for_simd:
6983   case OMPD_cancel:
6984   case OMPD_cancellation_point:
6985   case OMPD_ordered:
6986   case OMPD_threadprivate:
6987   case OMPD_allocate:
6988   case OMPD_task:
6989   case OMPD_simd:
6990   case OMPD_tile:
6991   case OMPD_unroll:
6992   case OMPD_sections:
6993   case OMPD_section:
6994   case OMPD_single:
6995   case OMPD_master:
6996   case OMPD_critical:
6997   case OMPD_taskyield:
6998   case OMPD_barrier:
6999   case OMPD_taskwait:
7000   case OMPD_taskgroup:
7001   case OMPD_atomic:
7002   case OMPD_flush:
7003   case OMPD_depobj:
7004   case OMPD_scan:
7005   case OMPD_teams:
7006   case OMPD_target_data:
7007   case OMPD_target_exit_data:
7008   case OMPD_target_enter_data:
7009   case OMPD_distribute:
7010   case OMPD_distribute_simd:
7011   case OMPD_distribute_parallel_for:
7012   case OMPD_distribute_parallel_for_simd:
7013   case OMPD_teams_distribute:
7014   case OMPD_teams_distribute_simd:
7015   case OMPD_teams_distribute_parallel_for:
7016   case OMPD_teams_distribute_parallel_for_simd:
7017   case OMPD_target_update:
7018   case OMPD_declare_simd:
7019   case OMPD_declare_variant:
7020   case OMPD_begin_declare_variant:
7021   case OMPD_end_declare_variant:
7022   case OMPD_declare_target:
7023   case OMPD_end_declare_target:
7024   case OMPD_declare_reduction:
7025   case OMPD_declare_mapper:
7026   case OMPD_taskloop:
7027   case OMPD_taskloop_simd:
7028   case OMPD_master_taskloop:
7029   case OMPD_master_taskloop_simd:
7030   case OMPD_parallel_master_taskloop:
7031   case OMPD_parallel_master_taskloop_simd:
7032   case OMPD_requires:
7033   case OMPD_unknown:
7034     break;
7035   default:
7036     break;
7037   }
7038   llvm_unreachable("Unsupported directive kind.");
7039 }
7040 
7041 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7042     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7043   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7044          "Clauses associated with the teams directive expected to be emitted "
7045          "only for the host!");
7046   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7047   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7048          "Expected target-based executable directive.");
7049   CGBuilderTy &Bld = CGF.Builder;
7050   llvm::Value *ThreadLimitVal = nullptr;
7051   llvm::Value *NumThreadsVal = nullptr;
7052   switch (DirectiveKind) {
7053   case OMPD_target: {
7054     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7055     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7056       return NumThreads;
7057     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7058         CGF.getContext(), CS->getCapturedStmt());
7059     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7060       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7061         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7062         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7063         const auto *ThreadLimitClause =
7064             Dir->getSingleClause<OMPThreadLimitClause>();
7065         CodeGenFunction::LexicalScope Scope(
7066             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7067         if (const auto *PreInit =
7068                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7069           for (const auto *I : PreInit->decls()) {
7070             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7071               CGF.EmitVarDecl(cast<VarDecl>(*I));
7072             } else {
7073               CodeGenFunction::AutoVarEmission Emission =
7074                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7075               CGF.EmitAutoVarCleanups(Emission);
7076             }
7077           }
7078         }
7079         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7080             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7081         ThreadLimitVal =
7082             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7083       }
7084       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7085           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7086         CS = Dir->getInnermostCapturedStmt();
7087         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7088             CGF.getContext(), CS->getCapturedStmt());
7089         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7090       }
7091       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7092           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7093         CS = Dir->getInnermostCapturedStmt();
7094         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7095           return NumThreads;
7096       }
7097       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7098         return Bld.getInt32(1);
7099     }
7100     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7101   }
7102   case OMPD_target_teams: {
7103     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7104       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7105       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7106       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7107           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7108       ThreadLimitVal =
7109           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7110     }
7111     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7112     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7113       return NumThreads;
7114     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7115         CGF.getContext(), CS->getCapturedStmt());
7116     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7117       if (Dir->getDirectiveKind() == OMPD_distribute) {
7118         CS = Dir->getInnermostCapturedStmt();
7119         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7120           return NumThreads;
7121       }
7122     }
7123     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7124   }
7125   case OMPD_target_teams_distribute:
7126     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7127       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7128       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7129       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7130           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7131       ThreadLimitVal =
7132           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7133     }
7134     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7135   case OMPD_target_parallel:
7136   case OMPD_target_parallel_for:
7137   case OMPD_target_parallel_for_simd:
7138   case OMPD_target_teams_distribute_parallel_for:
7139   case OMPD_target_teams_distribute_parallel_for_simd: {
7140     llvm::Value *CondVal = nullptr;
7141     // Handle if clause. If if clause present, the number of threads is
7142     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7143     if (D.hasClausesOfKind<OMPIfClause>()) {
7144       const OMPIfClause *IfClause = nullptr;
7145       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7146         if (C->getNameModifier() == OMPD_unknown ||
7147             C->getNameModifier() == OMPD_parallel) {
7148           IfClause = C;
7149           break;
7150         }
7151       }
7152       if (IfClause) {
7153         const Expr *Cond = IfClause->getCondition();
7154         bool Result;
7155         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7156           if (!Result)
7157             return Bld.getInt32(1);
7158         } else {
7159           CodeGenFunction::RunCleanupsScope Scope(CGF);
7160           CondVal = CGF.EvaluateExprAsBool(Cond);
7161         }
7162       }
7163     }
7164     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7165       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7166       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7167       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7168           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7169       ThreadLimitVal =
7170           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7171     }
7172     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7173       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7174       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7175       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7176           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7177       NumThreadsVal =
7178           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7179       ThreadLimitVal = ThreadLimitVal
7180                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7181                                                                 ThreadLimitVal),
7182                                               NumThreadsVal, ThreadLimitVal)
7183                            : NumThreadsVal;
7184     }
7185     if (!ThreadLimitVal)
7186       ThreadLimitVal = Bld.getInt32(0);
7187     if (CondVal)
7188       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7189     return ThreadLimitVal;
7190   }
7191   case OMPD_target_teams_distribute_simd:
7192   case OMPD_target_simd:
7193     return Bld.getInt32(1);
7194   case OMPD_parallel:
7195   case OMPD_for:
7196   case OMPD_parallel_for:
7197   case OMPD_parallel_master:
7198   case OMPD_parallel_sections:
7199   case OMPD_for_simd:
7200   case OMPD_parallel_for_simd:
7201   case OMPD_cancel:
7202   case OMPD_cancellation_point:
7203   case OMPD_ordered:
7204   case OMPD_threadprivate:
7205   case OMPD_allocate:
7206   case OMPD_task:
7207   case OMPD_simd:
7208   case OMPD_tile:
7209   case OMPD_unroll:
7210   case OMPD_sections:
7211   case OMPD_section:
7212   case OMPD_single:
7213   case OMPD_master:
7214   case OMPD_critical:
7215   case OMPD_taskyield:
7216   case OMPD_barrier:
7217   case OMPD_taskwait:
7218   case OMPD_taskgroup:
7219   case OMPD_atomic:
7220   case OMPD_flush:
7221   case OMPD_depobj:
7222   case OMPD_scan:
7223   case OMPD_teams:
7224   case OMPD_target_data:
7225   case OMPD_target_exit_data:
7226   case OMPD_target_enter_data:
7227   case OMPD_distribute:
7228   case OMPD_distribute_simd:
7229   case OMPD_distribute_parallel_for:
7230   case OMPD_distribute_parallel_for_simd:
7231   case OMPD_teams_distribute:
7232   case OMPD_teams_distribute_simd:
7233   case OMPD_teams_distribute_parallel_for:
7234   case OMPD_teams_distribute_parallel_for_simd:
7235   case OMPD_target_update:
7236   case OMPD_declare_simd:
7237   case OMPD_declare_variant:
7238   case OMPD_begin_declare_variant:
7239   case OMPD_end_declare_variant:
7240   case OMPD_declare_target:
7241   case OMPD_end_declare_target:
7242   case OMPD_declare_reduction:
7243   case OMPD_declare_mapper:
7244   case OMPD_taskloop:
7245   case OMPD_taskloop_simd:
7246   case OMPD_master_taskloop:
7247   case OMPD_master_taskloop_simd:
7248   case OMPD_parallel_master_taskloop:
7249   case OMPD_parallel_master_taskloop_simd:
7250   case OMPD_requires:
7251   case OMPD_metadirective:
7252   case OMPD_unknown:
7253     break;
7254   default:
7255     break;
7256   }
7257   llvm_unreachable("Unsupported directive kind.");
7258 }
7259 
7260 namespace {
7261 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7262 
7263 // Utility to handle information from clauses associated with a given
7264 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7265 // It provides a convenient interface to obtain the information and generate
7266 // code for that information.
7267 class MappableExprsHandler {
7268 public:
7269   /// Values for bit flags used to specify the mapping type for
7270   /// offloading.
7271   enum OpenMPOffloadMappingFlags : uint64_t {
7272     /// No flags
7273     OMP_MAP_NONE = 0x0,
7274     /// Allocate memory on the device and move data from host to device.
7275     OMP_MAP_TO = 0x01,
7276     /// Allocate memory on the device and move data from device to host.
7277     OMP_MAP_FROM = 0x02,
7278     /// Always perform the requested mapping action on the element, even
7279     /// if it was already mapped before.
7280     OMP_MAP_ALWAYS = 0x04,
7281     /// Delete the element from the device environment, ignoring the
7282     /// current reference count associated with the element.
7283     OMP_MAP_DELETE = 0x08,
7284     /// The element being mapped is a pointer-pointee pair; both the
7285     /// pointer and the pointee should be mapped.
7286     OMP_MAP_PTR_AND_OBJ = 0x10,
7287     /// This flags signals that the base address of an entry should be
7288     /// passed to the target kernel as an argument.
7289     OMP_MAP_TARGET_PARAM = 0x20,
7290     /// Signal that the runtime library has to return the device pointer
7291     /// in the current position for the data being mapped. Used when we have the
7292     /// use_device_ptr or use_device_addr clause.
7293     OMP_MAP_RETURN_PARAM = 0x40,
7294     /// This flag signals that the reference being passed is a pointer to
7295     /// private data.
7296     OMP_MAP_PRIVATE = 0x80,
7297     /// Pass the element to the device by value.
7298     OMP_MAP_LITERAL = 0x100,
7299     /// Implicit map
7300     OMP_MAP_IMPLICIT = 0x200,
7301     /// Close is a hint to the runtime to allocate memory close to
7302     /// the target device.
7303     OMP_MAP_CLOSE = 0x400,
7304     /// 0x800 is reserved for compatibility with XLC.
7305     /// Produce a runtime error if the data is not already allocated.
7306     OMP_MAP_PRESENT = 0x1000,
7307     // Increment and decrement a separate reference counter so that the data
7308     // cannot be unmapped within the associated region.  Thus, this flag is
7309     // intended to be used on 'target' and 'target data' directives because they
7310     // are inherently structured.  It is not intended to be used on 'target
7311     // enter data' and 'target exit data' directives because they are inherently
7312     // dynamic.
7313     // This is an OpenMP extension for the sake of OpenACC support.
7314     OMP_MAP_OMPX_HOLD = 0x2000,
7315     /// Signal that the runtime library should use args as an array of
7316     /// descriptor_dim pointers and use args_size as dims. Used when we have
7317     /// non-contiguous list items in target update directive
7318     OMP_MAP_NON_CONTIG = 0x100000000000,
7319     /// The 16 MSBs of the flags indicate whether the entry is member of some
7320     /// struct/class.
7321     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7322     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7323   };
7324 
7325   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7326   static unsigned getFlagMemberOffset() {
7327     unsigned Offset = 0;
7328     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7329          Remain = Remain >> 1)
7330       Offset++;
7331     return Offset;
7332   }
7333 
7334   /// Class that holds debugging information for a data mapping to be passed to
7335   /// the runtime library.
7336   class MappingExprInfo {
7337     /// The variable declaration used for the data mapping.
7338     const ValueDecl *MapDecl = nullptr;
7339     /// The original expression used in the map clause, or null if there is
7340     /// none.
7341     const Expr *MapExpr = nullptr;
7342 
7343   public:
7344     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7345         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7346 
7347     const ValueDecl *getMapDecl() const { return MapDecl; }
7348     const Expr *getMapExpr() const { return MapExpr; }
7349   };
7350 
7351   /// Class that associates information with a base pointer to be passed to the
7352   /// runtime library.
7353   class BasePointerInfo {
7354     /// The base pointer.
7355     llvm::Value *Ptr = nullptr;
7356     /// The base declaration that refers to this device pointer, or null if
7357     /// there is none.
7358     const ValueDecl *DevPtrDecl = nullptr;
7359 
7360   public:
7361     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7362         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7363     llvm::Value *operator*() const { return Ptr; }
7364     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7365     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7366   };
7367 
7368   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7369   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7370   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7371   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7372   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7373   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7374   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7375 
7376   /// This structure contains combined information generated for mappable
7377   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7378   /// mappers, and non-contiguous information.
7379   struct MapCombinedInfoTy {
7380     struct StructNonContiguousInfo {
7381       bool IsNonContiguous = false;
7382       MapDimArrayTy Dims;
7383       MapNonContiguousArrayTy Offsets;
7384       MapNonContiguousArrayTy Counts;
7385       MapNonContiguousArrayTy Strides;
7386     };
7387     MapExprsArrayTy Exprs;
7388     MapBaseValuesArrayTy BasePointers;
7389     MapValuesArrayTy Pointers;
7390     MapValuesArrayTy Sizes;
7391     MapFlagsArrayTy Types;
7392     MapMappersArrayTy Mappers;
7393     StructNonContiguousInfo NonContigInfo;
7394 
7395     /// Append arrays in \a CurInfo.
7396     void append(MapCombinedInfoTy &CurInfo) {
7397       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7398       BasePointers.append(CurInfo.BasePointers.begin(),
7399                           CurInfo.BasePointers.end());
7400       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7401       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7402       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7403       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7404       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7405                                  CurInfo.NonContigInfo.Dims.end());
7406       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7407                                     CurInfo.NonContigInfo.Offsets.end());
7408       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7409                                    CurInfo.NonContigInfo.Counts.end());
7410       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7411                                     CurInfo.NonContigInfo.Strides.end());
7412     }
7413   };
7414 
7415   /// Map between a struct and the its lowest & highest elements which have been
7416   /// mapped.
7417   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7418   ///                    HE(FieldIndex, Pointer)}
7419   struct StructRangeInfoTy {
7420     MapCombinedInfoTy PreliminaryMapData;
7421     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7422         0, Address::invalid()};
7423     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7424         0, Address::invalid()};
7425     Address Base = Address::invalid();
7426     Address LB = Address::invalid();
7427     bool IsArraySection = false;
7428     bool HasCompleteRecord = false;
7429   };
7430 
7431 private:
7432   /// Kind that defines how a device pointer has to be returned.
7433   struct MapInfo {
7434     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7435     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7436     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7437     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7438     bool ReturnDevicePointer = false;
7439     bool IsImplicit = false;
7440     const ValueDecl *Mapper = nullptr;
7441     const Expr *VarRef = nullptr;
7442     bool ForDeviceAddr = false;
7443 
7444     MapInfo() = default;
7445     MapInfo(
7446         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7447         OpenMPMapClauseKind MapType,
7448         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7449         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7450         bool ReturnDevicePointer, bool IsImplicit,
7451         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7452         bool ForDeviceAddr = false)
7453         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7454           MotionModifiers(MotionModifiers),
7455           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7456           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7457   };
7458 
7459   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7460   /// member and there is no map information about it, then emission of that
7461   /// entry is deferred until the whole struct has been processed.
7462   struct DeferredDevicePtrEntryTy {
7463     const Expr *IE = nullptr;
7464     const ValueDecl *VD = nullptr;
7465     bool ForDeviceAddr = false;
7466 
7467     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7468                              bool ForDeviceAddr)
7469         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7470   };
7471 
7472   /// The target directive from where the mappable clauses were extracted. It
7473   /// is either a executable directive or a user-defined mapper directive.
7474   llvm::PointerUnion<const OMPExecutableDirective *,
7475                      const OMPDeclareMapperDecl *>
7476       CurDir;
7477 
7478   /// Function the directive is being generated for.
7479   CodeGenFunction &CGF;
7480 
7481   /// Set of all first private variables in the current directive.
7482   /// bool data is set to true if the variable is implicitly marked as
7483   /// firstprivate, false otherwise.
7484   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7485 
7486   /// Map between device pointer declarations and their expression components.
7487   /// The key value for declarations in 'this' is null.
7488   llvm::DenseMap<
7489       const ValueDecl *,
7490       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7491       DevPointersMap;
7492 
7493   /// Map between lambda declarations and their map type.
7494   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7495 
7496   llvm::Value *getExprTypeSize(const Expr *E) const {
7497     QualType ExprTy = E->getType().getCanonicalType();
7498 
7499     // Calculate the size for array shaping expression.
7500     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7501       llvm::Value *Size =
7502           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7503       for (const Expr *SE : OAE->getDimensions()) {
7504         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7505         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7506                                       CGF.getContext().getSizeType(),
7507                                       SE->getExprLoc());
7508         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7509       }
7510       return Size;
7511     }
7512 
7513     // Reference types are ignored for mapping purposes.
7514     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7515       ExprTy = RefTy->getPointeeType().getCanonicalType();
7516 
7517     // Given that an array section is considered a built-in type, we need to
7518     // do the calculation based on the length of the section instead of relying
7519     // on CGF.getTypeSize(E->getType()).
7520     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7521       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7522                             OAE->getBase()->IgnoreParenImpCasts())
7523                             .getCanonicalType();
7524 
7525       // If there is no length associated with the expression and lower bound is
7526       // not specified too, that means we are using the whole length of the
7527       // base.
7528       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7529           !OAE->getLowerBound())
7530         return CGF.getTypeSize(BaseTy);
7531 
7532       llvm::Value *ElemSize;
7533       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7534         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7535       } else {
7536         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7537         assert(ATy && "Expecting array type if not a pointer type.");
7538         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7539       }
7540 
7541       // If we don't have a length at this point, that is because we have an
7542       // array section with a single element.
7543       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7544         return ElemSize;
7545 
7546       if (const Expr *LenExpr = OAE->getLength()) {
7547         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7548         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7549                                              CGF.getContext().getSizeType(),
7550                                              LenExpr->getExprLoc());
7551         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7552       }
7553       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7554              OAE->getLowerBound() && "expected array_section[lb:].");
7555       // Size = sizetype - lb * elemtype;
7556       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7557       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7558       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7559                                        CGF.getContext().getSizeType(),
7560                                        OAE->getLowerBound()->getExprLoc());
7561       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7562       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7563       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7564       LengthVal = CGF.Builder.CreateSelect(
7565           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7566       return LengthVal;
7567     }
7568     return CGF.getTypeSize(ExprTy);
7569   }
7570 
7571   /// Return the corresponding bits for a given map clause modifier. Add
7572   /// a flag marking the map as a pointer if requested. Add a flag marking the
7573   /// map as the first one of a series of maps that relate to the same map
7574   /// expression.
7575   OpenMPOffloadMappingFlags getMapTypeBits(
7576       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7577       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7578       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7579     OpenMPOffloadMappingFlags Bits =
7580         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7581     switch (MapType) {
7582     case OMPC_MAP_alloc:
7583     case OMPC_MAP_release:
7584       // alloc and release is the default behavior in the runtime library,  i.e.
7585       // if we don't pass any bits alloc/release that is what the runtime is
7586       // going to do. Therefore, we don't need to signal anything for these two
7587       // type modifiers.
7588       break;
7589     case OMPC_MAP_to:
7590       Bits |= OMP_MAP_TO;
7591       break;
7592     case OMPC_MAP_from:
7593       Bits |= OMP_MAP_FROM;
7594       break;
7595     case OMPC_MAP_tofrom:
7596       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7597       break;
7598     case OMPC_MAP_delete:
7599       Bits |= OMP_MAP_DELETE;
7600       break;
7601     case OMPC_MAP_unknown:
7602       llvm_unreachable("Unexpected map type!");
7603     }
7604     if (AddPtrFlag)
7605       Bits |= OMP_MAP_PTR_AND_OBJ;
7606     if (AddIsTargetParamFlag)
7607       Bits |= OMP_MAP_TARGET_PARAM;
7608     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7609       Bits |= OMP_MAP_ALWAYS;
7610     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7611       Bits |= OMP_MAP_CLOSE;
7612     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7613         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7614       Bits |= OMP_MAP_PRESENT;
7615     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7616       Bits |= OMP_MAP_OMPX_HOLD;
7617     if (IsNonContiguous)
7618       Bits |= OMP_MAP_NON_CONTIG;
7619     return Bits;
7620   }
7621 
7622   /// Return true if the provided expression is a final array section. A
7623   /// final array section, is one whose length can't be proved to be one.
7624   bool isFinalArraySectionExpression(const Expr *E) const {
7625     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7626 
7627     // It is not an array section and therefore not a unity-size one.
7628     if (!OASE)
7629       return false;
7630 
7631     // An array section with no colon always refer to a single element.
7632     if (OASE->getColonLocFirst().isInvalid())
7633       return false;
7634 
7635     const Expr *Length = OASE->getLength();
7636 
7637     // If we don't have a length we have to check if the array has size 1
7638     // for this dimension. Also, we should always expect a length if the
7639     // base type is pointer.
7640     if (!Length) {
7641       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7642                              OASE->getBase()->IgnoreParenImpCasts())
7643                              .getCanonicalType();
7644       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7645         return ATy->getSize().getSExtValue() != 1;
7646       // If we don't have a constant dimension length, we have to consider
7647       // the current section as having any size, so it is not necessarily
7648       // unitary. If it happen to be unity size, that's user fault.
7649       return true;
7650     }
7651 
7652     // Check if the length evaluates to 1.
7653     Expr::EvalResult Result;
7654     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7655       return true; // Can have more that size 1.
7656 
7657     llvm::APSInt ConstLength = Result.Val.getInt();
7658     return ConstLength.getSExtValue() != 1;
7659   }
7660 
7661   /// Generate the base pointers, section pointers, sizes, map type bits, and
7662   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7663   /// map type, map or motion modifiers, and expression components.
7664   /// \a IsFirstComponent should be set to true if the provided set of
7665   /// components is the first associated with a capture.
7666   void generateInfoForComponentList(
7667       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7668       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7669       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7670       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7671       bool IsFirstComponentList, bool IsImplicit,
7672       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7673       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7674       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7675           OverlappedElements = llvm::None) const {
7676     // The following summarizes what has to be generated for each map and the
7677     // types below. The generated information is expressed in this order:
7678     // base pointer, section pointer, size, flags
7679     // (to add to the ones that come from the map type and modifier).
7680     //
7681     // double d;
7682     // int i[100];
7683     // float *p;
7684     //
7685     // struct S1 {
7686     //   int i;
7687     //   float f[50];
7688     // }
7689     // struct S2 {
7690     //   int i;
7691     //   float f[50];
7692     //   S1 s;
7693     //   double *p;
7694     //   struct S2 *ps;
7695     //   int &ref;
7696     // }
7697     // S2 s;
7698     // S2 *ps;
7699     //
7700     // map(d)
7701     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7702     //
7703     // map(i)
7704     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7705     //
7706     // map(i[1:23])
7707     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7708     //
7709     // map(p)
7710     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7711     //
7712     // map(p[1:24])
7713     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7714     // in unified shared memory mode or for local pointers
7715     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7716     //
7717     // map(s)
7718     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7719     //
7720     // map(s.i)
7721     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7722     //
7723     // map(s.s.f)
7724     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7725     //
7726     // map(s.p)
7727     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7728     //
7729     // map(to: s.p[:22])
7730     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7731     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7732     // &(s.p), &(s.p[0]), 22*sizeof(double),
7733     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7734     // (*) alloc space for struct members, only this is a target parameter
7735     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7736     //      optimizes this entry out, same in the examples below)
7737     // (***) map the pointee (map: to)
7738     //
7739     // map(to: s.ref)
7740     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7741     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7742     // (*) alloc space for struct members, only this is a target parameter
7743     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7744     //      optimizes this entry out, same in the examples below)
7745     // (***) map the pointee (map: to)
7746     //
7747     // map(s.ps)
7748     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7749     //
7750     // map(from: s.ps->s.i)
7751     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7752     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7753     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7754     //
7755     // map(to: s.ps->ps)
7756     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7757     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7758     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7759     //
7760     // map(s.ps->ps->ps)
7761     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7762     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7763     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7764     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7765     //
7766     // map(to: s.ps->ps->s.f[:22])
7767     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7768     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7769     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7770     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7771     //
7772     // map(ps)
7773     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7774     //
7775     // map(ps->i)
7776     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7777     //
7778     // map(ps->s.f)
7779     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7780     //
7781     // map(from: ps->p)
7782     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7783     //
7784     // map(to: ps->p[:22])
7785     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7786     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7787     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7788     //
7789     // map(ps->ps)
7790     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7791     //
7792     // map(from: ps->ps->s.i)
7793     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7794     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7795     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7796     //
7797     // map(from: ps->ps->ps)
7798     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7799     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7800     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7801     //
7802     // map(ps->ps->ps->ps)
7803     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7804     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7805     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7806     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7807     //
7808     // map(to: ps->ps->ps->s.f[:22])
7809     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7810     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7811     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7812     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7813     //
7814     // map(to: s.f[:22]) map(from: s.p[:33])
7815     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7816     //     sizeof(double*) (**), TARGET_PARAM
7817     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7818     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7819     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7820     // (*) allocate contiguous space needed to fit all mapped members even if
7821     //     we allocate space for members not mapped (in this example,
7822     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7823     //     them as well because they fall between &s.f[0] and &s.p)
7824     //
7825     // map(from: s.f[:22]) map(to: ps->p[:33])
7826     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7827     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7828     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7829     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7830     // (*) the struct this entry pertains to is the 2nd element in the list of
7831     //     arguments, hence MEMBER_OF(2)
7832     //
7833     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7834     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7835     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7836     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7837     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7838     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7839     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7840     // (*) the struct this entry pertains to is the 4th element in the list
7841     //     of arguments, hence MEMBER_OF(4)
7842 
7843     // Track if the map information being generated is the first for a capture.
7844     bool IsCaptureFirstInfo = IsFirstComponentList;
7845     // When the variable is on a declare target link or in a to clause with
7846     // unified memory, a reference is needed to hold the host/device address
7847     // of the variable.
7848     bool RequiresReference = false;
7849 
7850     // Scan the components from the base to the complete expression.
7851     auto CI = Components.rbegin();
7852     auto CE = Components.rend();
7853     auto I = CI;
7854 
7855     // Track if the map information being generated is the first for a list of
7856     // components.
7857     bool IsExpressionFirstInfo = true;
7858     bool FirstPointerInComplexData = false;
7859     Address BP = Address::invalid();
7860     const Expr *AssocExpr = I->getAssociatedExpression();
7861     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7862     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7863     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7864 
7865     if (isa<MemberExpr>(AssocExpr)) {
7866       // The base is the 'this' pointer. The content of the pointer is going
7867       // to be the base of the field being mapped.
7868       BP = CGF.LoadCXXThisAddress();
7869     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7870                (OASE &&
7871                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7872       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7873     } else if (OAShE &&
7874                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7875       BP = Address(
7876           CGF.EmitScalarExpr(OAShE->getBase()),
7877           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7878     } else {
7879       // The base is the reference to the variable.
7880       // BP = &Var.
7881       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7882       if (const auto *VD =
7883               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7884         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7885                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7886           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7887               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7888                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7889             RequiresReference = true;
7890             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7891           }
7892         }
7893       }
7894 
7895       // If the variable is a pointer and is being dereferenced (i.e. is not
7896       // the last component), the base has to be the pointer itself, not its
7897       // reference. References are ignored for mapping purposes.
7898       QualType Ty =
7899           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7900       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7901         // No need to generate individual map information for the pointer, it
7902         // can be associated with the combined storage if shared memory mode is
7903         // active or the base declaration is not global variable.
7904         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7905         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7906             !VD || VD->hasLocalStorage())
7907           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7908         else
7909           FirstPointerInComplexData = true;
7910         ++I;
7911       }
7912     }
7913 
7914     // Track whether a component of the list should be marked as MEMBER_OF some
7915     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7916     // in a component list should be marked as MEMBER_OF, all subsequent entries
7917     // do not belong to the base struct. E.g.
7918     // struct S2 s;
7919     // s.ps->ps->ps->f[:]
7920     //   (1) (2) (3) (4)
7921     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7922     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7923     // is the pointee of ps(2) which is not member of struct s, so it should not
7924     // be marked as such (it is still PTR_AND_OBJ).
7925     // The variable is initialized to false so that PTR_AND_OBJ entries which
7926     // are not struct members are not considered (e.g. array of pointers to
7927     // data).
7928     bool ShouldBeMemberOf = false;
7929 
7930     // Variable keeping track of whether or not we have encountered a component
7931     // in the component list which is a member expression. Useful when we have a
7932     // pointer or a final array section, in which case it is the previous
7933     // component in the list which tells us whether we have a member expression.
7934     // E.g. X.f[:]
7935     // While processing the final array section "[:]" it is "f" which tells us
7936     // whether we are dealing with a member of a declared struct.
7937     const MemberExpr *EncounteredME = nullptr;
7938 
7939     // Track for the total number of dimension. Start from one for the dummy
7940     // dimension.
7941     uint64_t DimSize = 1;
7942 
7943     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7944     bool IsPrevMemberReference = false;
7945 
7946     for (; I != CE; ++I) {
7947       // If the current component is member of a struct (parent struct) mark it.
7948       if (!EncounteredME) {
7949         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7950         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7951         // as MEMBER_OF the parent struct.
7952         if (EncounteredME) {
7953           ShouldBeMemberOf = true;
7954           // Do not emit as complex pointer if this is actually not array-like
7955           // expression.
7956           if (FirstPointerInComplexData) {
7957             QualType Ty = std::prev(I)
7958                               ->getAssociatedDeclaration()
7959                               ->getType()
7960                               .getNonReferenceType();
7961             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7962             FirstPointerInComplexData = false;
7963           }
7964         }
7965       }
7966 
7967       auto Next = std::next(I);
7968 
7969       // We need to generate the addresses and sizes if this is the last
7970       // component, if the component is a pointer or if it is an array section
7971       // whose length can't be proved to be one. If this is a pointer, it
7972       // becomes the base address for the following components.
7973 
7974       // A final array section, is one whose length can't be proved to be one.
7975       // If the map item is non-contiguous then we don't treat any array section
7976       // as final array section.
7977       bool IsFinalArraySection =
7978           !IsNonContiguous &&
7979           isFinalArraySectionExpression(I->getAssociatedExpression());
7980 
7981       // If we have a declaration for the mapping use that, otherwise use
7982       // the base declaration of the map clause.
7983       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7984                                      ? I->getAssociatedDeclaration()
7985                                      : BaseDecl;
7986       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7987                                                : MapExpr;
7988 
7989       // Get information on whether the element is a pointer. Have to do a
7990       // special treatment for array sections given that they are built-in
7991       // types.
7992       const auto *OASE =
7993           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7994       const auto *OAShE =
7995           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7996       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7997       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7998       bool IsPointer =
7999           OAShE ||
8000           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8001                        .getCanonicalType()
8002                        ->isAnyPointerType()) ||
8003           I->getAssociatedExpression()->getType()->isAnyPointerType();
8004       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8005                                MapDecl &&
8006                                MapDecl->getType()->isLValueReferenceType();
8007       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8008 
8009       if (OASE)
8010         ++DimSize;
8011 
8012       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8013           IsFinalArraySection) {
8014         // If this is not the last component, we expect the pointer to be
8015         // associated with an array expression or member expression.
8016         assert((Next == CE ||
8017                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8018                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8019                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8020                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8021                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8022                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8023                "Unexpected expression");
8024 
8025         Address LB = Address::invalid();
8026         Address LowestElem = Address::invalid();
8027         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8028                                        const MemberExpr *E) {
8029           const Expr *BaseExpr = E->getBase();
8030           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8031           // scalar.
8032           LValue BaseLV;
8033           if (E->isArrow()) {
8034             LValueBaseInfo BaseInfo;
8035             TBAAAccessInfo TBAAInfo;
8036             Address Addr =
8037                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8038             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8039             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8040           } else {
8041             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8042           }
8043           return BaseLV;
8044         };
8045         if (OAShE) {
8046           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8047                                     CGF.getContext().getTypeAlignInChars(
8048                                         OAShE->getBase()->getType()));
8049         } else if (IsMemberReference) {
8050           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8051           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8052           LowestElem = CGF.EmitLValueForFieldInitialization(
8053                               BaseLVal, cast<FieldDecl>(MapDecl))
8054                            .getAddress(CGF);
8055           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8056                    .getAddress(CGF);
8057         } else {
8058           LowestElem = LB =
8059               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8060                   .getAddress(CGF);
8061         }
8062 
8063         // If this component is a pointer inside the base struct then we don't
8064         // need to create any entry for it - it will be combined with the object
8065         // it is pointing to into a single PTR_AND_OBJ entry.
8066         bool IsMemberPointerOrAddr =
8067             EncounteredME &&
8068             (((IsPointer || ForDeviceAddr) &&
8069               I->getAssociatedExpression() == EncounteredME) ||
8070              (IsPrevMemberReference && !IsPointer) ||
8071              (IsMemberReference && Next != CE &&
8072               !Next->getAssociatedExpression()->getType()->isPointerType()));
8073         if (!OverlappedElements.empty() && Next == CE) {
8074           // Handle base element with the info for overlapped elements.
8075           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8076           assert(!IsPointer &&
8077                  "Unexpected base element with the pointer type.");
8078           // Mark the whole struct as the struct that requires allocation on the
8079           // device.
8080           PartialStruct.LowestElem = {0, LowestElem};
8081           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8082               I->getAssociatedExpression()->getType());
8083           Address HB = CGF.Builder.CreateConstGEP(
8084               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8085                                                               CGF.VoidPtrTy),
8086               TypeSize.getQuantity() - 1);
8087           PartialStruct.HighestElem = {
8088               std::numeric_limits<decltype(
8089                   PartialStruct.HighestElem.first)>::max(),
8090               HB};
8091           PartialStruct.Base = BP;
8092           PartialStruct.LB = LB;
8093           assert(
8094               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8095               "Overlapped elements must be used only once for the variable.");
8096           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8097           // Emit data for non-overlapped data.
8098           OpenMPOffloadMappingFlags Flags =
8099               OMP_MAP_MEMBER_OF |
8100               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8101                              /*AddPtrFlag=*/false,
8102                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8103           llvm::Value *Size = nullptr;
8104           // Do bitcopy of all non-overlapped structure elements.
8105           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8106                    Component : OverlappedElements) {
8107             Address ComponentLB = Address::invalid();
8108             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8109                  Component) {
8110               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8111                 const auto *FD = dyn_cast<FieldDecl>(VD);
8112                 if (FD && FD->getType()->isLValueReferenceType()) {
8113                   const auto *ME =
8114                       cast<MemberExpr>(MC.getAssociatedExpression());
8115                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8116                   ComponentLB =
8117                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8118                           .getAddress(CGF);
8119                 } else {
8120                   ComponentLB =
8121                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8122                           .getAddress(CGF);
8123                 }
8124                 Size = CGF.Builder.CreatePtrDiff(
8125                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8126                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8127                 break;
8128               }
8129             }
8130             assert(Size && "Failed to determine structure size");
8131             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8132             CombinedInfo.BasePointers.push_back(BP.getPointer());
8133             CombinedInfo.Pointers.push_back(LB.getPointer());
8134             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8135                 Size, CGF.Int64Ty, /*isSigned=*/true));
8136             CombinedInfo.Types.push_back(Flags);
8137             CombinedInfo.Mappers.push_back(nullptr);
8138             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8139                                                                       : 1);
8140             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8141           }
8142           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8143           CombinedInfo.BasePointers.push_back(BP.getPointer());
8144           CombinedInfo.Pointers.push_back(LB.getPointer());
8145           Size = CGF.Builder.CreatePtrDiff(
8146               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8147               CGF.EmitCastToVoidPtr(LB.getPointer()));
8148           CombinedInfo.Sizes.push_back(
8149               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8150           CombinedInfo.Types.push_back(Flags);
8151           CombinedInfo.Mappers.push_back(nullptr);
8152           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8153                                                                     : 1);
8154           break;
8155         }
8156         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8157         if (!IsMemberPointerOrAddr ||
8158             (Next == CE && MapType != OMPC_MAP_unknown)) {
8159           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8160           CombinedInfo.BasePointers.push_back(BP.getPointer());
8161           CombinedInfo.Pointers.push_back(LB.getPointer());
8162           CombinedInfo.Sizes.push_back(
8163               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8164           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8165                                                                     : 1);
8166 
8167           // If Mapper is valid, the last component inherits the mapper.
8168           bool HasMapper = Mapper && Next == CE;
8169           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8170 
8171           // We need to add a pointer flag for each map that comes from the
8172           // same expression except for the first one. We also need to signal
8173           // this map is the first one that relates with the current capture
8174           // (there is a set of entries for each capture).
8175           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8176               MapType, MapModifiers, MotionModifiers, IsImplicit,
8177               !IsExpressionFirstInfo || RequiresReference ||
8178                   FirstPointerInComplexData || IsMemberReference,
8179               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8180 
8181           if (!IsExpressionFirstInfo || IsMemberReference) {
8182             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8183             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8184             if (IsPointer || (IsMemberReference && Next != CE))
8185               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8186                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8187 
8188             if (ShouldBeMemberOf) {
8189               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8190               // should be later updated with the correct value of MEMBER_OF.
8191               Flags |= OMP_MAP_MEMBER_OF;
8192               // From now on, all subsequent PTR_AND_OBJ entries should not be
8193               // marked as MEMBER_OF.
8194               ShouldBeMemberOf = false;
8195             }
8196           }
8197 
8198           CombinedInfo.Types.push_back(Flags);
8199         }
8200 
8201         // If we have encountered a member expression so far, keep track of the
8202         // mapped member. If the parent is "*this", then the value declaration
8203         // is nullptr.
8204         if (EncounteredME) {
8205           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8206           unsigned FieldIndex = FD->getFieldIndex();
8207 
8208           // Update info about the lowest and highest elements for this struct
8209           if (!PartialStruct.Base.isValid()) {
8210             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8211             if (IsFinalArraySection) {
8212               Address HB =
8213                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8214                       .getAddress(CGF);
8215               PartialStruct.HighestElem = {FieldIndex, HB};
8216             } else {
8217               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8218             }
8219             PartialStruct.Base = BP;
8220             PartialStruct.LB = BP;
8221           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8222             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8223           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8224             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8225           }
8226         }
8227 
8228         // Need to emit combined struct for array sections.
8229         if (IsFinalArraySection || IsNonContiguous)
8230           PartialStruct.IsArraySection = true;
8231 
8232         // If we have a final array section, we are done with this expression.
8233         if (IsFinalArraySection)
8234           break;
8235 
8236         // The pointer becomes the base for the next element.
8237         if (Next != CE)
8238           BP = IsMemberReference ? LowestElem : LB;
8239 
8240         IsExpressionFirstInfo = false;
8241         IsCaptureFirstInfo = false;
8242         FirstPointerInComplexData = false;
8243         IsPrevMemberReference = IsMemberReference;
8244       } else if (FirstPointerInComplexData) {
8245         QualType Ty = Components.rbegin()
8246                           ->getAssociatedDeclaration()
8247                           ->getType()
8248                           .getNonReferenceType();
8249         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8250         FirstPointerInComplexData = false;
8251       }
8252     }
8253     // If ran into the whole component - allocate the space for the whole
8254     // record.
8255     if (!EncounteredME)
8256       PartialStruct.HasCompleteRecord = true;
8257 
8258     if (!IsNonContiguous)
8259       return;
8260 
8261     const ASTContext &Context = CGF.getContext();
8262 
8263     // For supporting stride in array section, we need to initialize the first
8264     // dimension size as 1, first offset as 0, and first count as 1
8265     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8266     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8267     MapValuesArrayTy CurStrides;
8268     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8269     uint64_t ElementTypeSize;
8270 
8271     // Collect Size information for each dimension and get the element size as
8272     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8273     // should be [10, 10] and the first stride is 4 btyes.
8274     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8275          Components) {
8276       const Expr *AssocExpr = Component.getAssociatedExpression();
8277       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8278 
8279       if (!OASE)
8280         continue;
8281 
8282       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8283       auto *CAT = Context.getAsConstantArrayType(Ty);
8284       auto *VAT = Context.getAsVariableArrayType(Ty);
8285 
8286       // We need all the dimension size except for the last dimension.
8287       assert((VAT || CAT || &Component == &*Components.begin()) &&
8288              "Should be either ConstantArray or VariableArray if not the "
8289              "first Component");
8290 
8291       // Get element size if CurStrides is empty.
8292       if (CurStrides.empty()) {
8293         const Type *ElementType = nullptr;
8294         if (CAT)
8295           ElementType = CAT->getElementType().getTypePtr();
8296         else if (VAT)
8297           ElementType = VAT->getElementType().getTypePtr();
8298         else
8299           assert(&Component == &*Components.begin() &&
8300                  "Only expect pointer (non CAT or VAT) when this is the "
8301                  "first Component");
8302         // If ElementType is null, then it means the base is a pointer
8303         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8304         // for next iteration.
8305         if (ElementType) {
8306           // For the case that having pointer as base, we need to remove one
8307           // level of indirection.
8308           if (&Component != &*Components.begin())
8309             ElementType = ElementType->getPointeeOrArrayElementType();
8310           ElementTypeSize =
8311               Context.getTypeSizeInChars(ElementType).getQuantity();
8312           CurStrides.push_back(
8313               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8314         }
8315       }
8316       // Get dimension value except for the last dimension since we don't need
8317       // it.
8318       if (DimSizes.size() < Components.size() - 1) {
8319         if (CAT)
8320           DimSizes.push_back(llvm::ConstantInt::get(
8321               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8322         else if (VAT)
8323           DimSizes.push_back(CGF.Builder.CreateIntCast(
8324               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8325               /*IsSigned=*/false));
8326       }
8327     }
8328 
8329     // Skip the dummy dimension since we have already have its information.
8330     auto DI = DimSizes.begin() + 1;
8331     // Product of dimension.
8332     llvm::Value *DimProd =
8333         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8334 
8335     // Collect info for non-contiguous. Notice that offset, count, and stride
8336     // are only meaningful for array-section, so we insert a null for anything
8337     // other than array-section.
8338     // Also, the size of offset, count, and stride are not the same as
8339     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8340     // count, and stride are the same as the number of non-contiguous
8341     // declaration in target update to/from clause.
8342     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8343          Components) {
8344       const Expr *AssocExpr = Component.getAssociatedExpression();
8345 
8346       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8347         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8348             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8349             /*isSigned=*/false);
8350         CurOffsets.push_back(Offset);
8351         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8352         CurStrides.push_back(CurStrides.back());
8353         continue;
8354       }
8355 
8356       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8357 
8358       if (!OASE)
8359         continue;
8360 
8361       // Offset
8362       const Expr *OffsetExpr = OASE->getLowerBound();
8363       llvm::Value *Offset = nullptr;
8364       if (!OffsetExpr) {
8365         // If offset is absent, then we just set it to zero.
8366         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8367       } else {
8368         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8369                                            CGF.Int64Ty,
8370                                            /*isSigned=*/false);
8371       }
8372       CurOffsets.push_back(Offset);
8373 
8374       // Count
8375       const Expr *CountExpr = OASE->getLength();
8376       llvm::Value *Count = nullptr;
8377       if (!CountExpr) {
8378         // In Clang, once a high dimension is an array section, we construct all
8379         // the lower dimension as array section, however, for case like
8380         // arr[0:2][2], Clang construct the inner dimension as an array section
8381         // but it actually is not in an array section form according to spec.
8382         if (!OASE->getColonLocFirst().isValid() &&
8383             !OASE->getColonLocSecond().isValid()) {
8384           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8385         } else {
8386           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8387           // When the length is absent it defaults to ⌈(size −
8388           // lower-bound)/stride⌉, where size is the size of the array
8389           // dimension.
8390           const Expr *StrideExpr = OASE->getStride();
8391           llvm::Value *Stride =
8392               StrideExpr
8393                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8394                                               CGF.Int64Ty, /*isSigned=*/false)
8395                   : nullptr;
8396           if (Stride)
8397             Count = CGF.Builder.CreateUDiv(
8398                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8399           else
8400             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8401         }
8402       } else {
8403         Count = CGF.EmitScalarExpr(CountExpr);
8404       }
8405       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8406       CurCounts.push_back(Count);
8407 
8408       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8409       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8410       //              Offset      Count     Stride
8411       //    D0          0           1         4    (int)    <- dummy dimension
8412       //    D1          0           2         8    (2 * (1) * 4)
8413       //    D2          1           2         20   (1 * (1 * 5) * 4)
8414       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8415       const Expr *StrideExpr = OASE->getStride();
8416       llvm::Value *Stride =
8417           StrideExpr
8418               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8419                                           CGF.Int64Ty, /*isSigned=*/false)
8420               : nullptr;
8421       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8422       if (Stride)
8423         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8424       else
8425         CurStrides.push_back(DimProd);
8426       if (DI != DimSizes.end())
8427         ++DI;
8428     }
8429 
8430     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8431     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8432     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8433   }
8434 
8435   /// Return the adjusted map modifiers if the declaration a capture refers to
8436   /// appears in a first-private clause. This is expected to be used only with
8437   /// directives that start with 'target'.
8438   MappableExprsHandler::OpenMPOffloadMappingFlags
8439   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8440     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8441 
8442     // A first private variable captured by reference will use only the
8443     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8444     // declaration is known as first-private in this handler.
8445     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8446       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8447         return MappableExprsHandler::OMP_MAP_TO |
8448                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8449       return MappableExprsHandler::OMP_MAP_PRIVATE |
8450              MappableExprsHandler::OMP_MAP_TO;
8451     }
8452     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8453     if (I != LambdasMap.end())
8454       // for map(to: lambda): using user specified map type.
8455       return getMapTypeBits(
8456           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8457           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8458           /*AddPtrFlag=*/false,
8459           /*AddIsTargetParamFlag=*/false,
8460           /*isNonContiguous=*/false);
8461     return MappableExprsHandler::OMP_MAP_TO |
8462            MappableExprsHandler::OMP_MAP_FROM;
8463   }
8464 
8465   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8466     // Rotate by getFlagMemberOffset() bits.
8467     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8468                                                   << getFlagMemberOffset());
8469   }
8470 
8471   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8472                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8473     // If the entry is PTR_AND_OBJ but has not been marked with the special
8474     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8475     // marked as MEMBER_OF.
8476     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8477         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8478       return;
8479 
8480     // Reset the placeholder value to prepare the flag for the assignment of the
8481     // proper MEMBER_OF value.
8482     Flags &= ~OMP_MAP_MEMBER_OF;
8483     Flags |= MemberOfFlag;
8484   }
8485 
8486   void getPlainLayout(const CXXRecordDecl *RD,
8487                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8488                       bool AsBase) const {
8489     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8490 
8491     llvm::StructType *St =
8492         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8493 
8494     unsigned NumElements = St->getNumElements();
8495     llvm::SmallVector<
8496         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8497         RecordLayout(NumElements);
8498 
8499     // Fill bases.
8500     for (const auto &I : RD->bases()) {
8501       if (I.isVirtual())
8502         continue;
8503       const auto *Base = I.getType()->getAsCXXRecordDecl();
8504       // Ignore empty bases.
8505       if (Base->isEmpty() || CGF.getContext()
8506                                  .getASTRecordLayout(Base)
8507                                  .getNonVirtualSize()
8508                                  .isZero())
8509         continue;
8510 
8511       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8512       RecordLayout[FieldIndex] = Base;
8513     }
8514     // Fill in virtual bases.
8515     for (const auto &I : RD->vbases()) {
8516       const auto *Base = I.getType()->getAsCXXRecordDecl();
8517       // Ignore empty bases.
8518       if (Base->isEmpty())
8519         continue;
8520       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8521       if (RecordLayout[FieldIndex])
8522         continue;
8523       RecordLayout[FieldIndex] = Base;
8524     }
8525     // Fill in all the fields.
8526     assert(!RD->isUnion() && "Unexpected union.");
8527     for (const auto *Field : RD->fields()) {
8528       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8529       // will fill in later.)
8530       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8531         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8532         RecordLayout[FieldIndex] = Field;
8533       }
8534     }
8535     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8536              &Data : RecordLayout) {
8537       if (Data.isNull())
8538         continue;
8539       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8540         getPlainLayout(Base, Layout, /*AsBase=*/true);
8541       else
8542         Layout.push_back(Data.get<const FieldDecl *>());
8543     }
8544   }
8545 
8546   /// Generate all the base pointers, section pointers, sizes, map types, and
8547   /// mappers for the extracted mappable expressions (all included in \a
8548   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8549   /// pair of the relevant declaration and index where it occurs is appended to
8550   /// the device pointers info array.
8551   void generateAllInfoForClauses(
8552       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8553       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8554           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8555     // We have to process the component lists that relate with the same
8556     // declaration in a single chunk so that we can generate the map flags
8557     // correctly. Therefore, we organize all lists in a map.
8558     enum MapKind { Present, Allocs, Other, Total };
8559     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8560                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8561         Info;
8562 
8563     // Helper function to fill the information map for the different supported
8564     // clauses.
8565     auto &&InfoGen =
8566         [&Info, &SkipVarSet](
8567             const ValueDecl *D, MapKind Kind,
8568             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8569             OpenMPMapClauseKind MapType,
8570             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8571             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8572             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8573             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8574           if (SkipVarSet.contains(D))
8575             return;
8576           auto It = Info.find(D);
8577           if (It == Info.end())
8578             It = Info
8579                      .insert(std::make_pair(
8580                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8581                      .first;
8582           It->second[Kind].emplace_back(
8583               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8584               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8585         };
8586 
8587     for (const auto *Cl : Clauses) {
8588       const auto *C = dyn_cast<OMPMapClause>(Cl);
8589       if (!C)
8590         continue;
8591       MapKind Kind = Other;
8592       if (llvm::is_contained(C->getMapTypeModifiers(),
8593                              OMPC_MAP_MODIFIER_present))
8594         Kind = Present;
8595       else if (C->getMapType() == OMPC_MAP_alloc)
8596         Kind = Allocs;
8597       const auto *EI = C->getVarRefs().begin();
8598       for (const auto L : C->component_lists()) {
8599         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8600         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8601                 C->getMapTypeModifiers(), llvm::None,
8602                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8603                 E);
8604         ++EI;
8605       }
8606     }
8607     for (const auto *Cl : Clauses) {
8608       const auto *C = dyn_cast<OMPToClause>(Cl);
8609       if (!C)
8610         continue;
8611       MapKind Kind = Other;
8612       if (llvm::is_contained(C->getMotionModifiers(),
8613                              OMPC_MOTION_MODIFIER_present))
8614         Kind = Present;
8615       const auto *EI = C->getVarRefs().begin();
8616       for (const auto L : C->component_lists()) {
8617         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8618                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8619                 C->isImplicit(), std::get<2>(L), *EI);
8620         ++EI;
8621       }
8622     }
8623     for (const auto *Cl : Clauses) {
8624       const auto *C = dyn_cast<OMPFromClause>(Cl);
8625       if (!C)
8626         continue;
8627       MapKind Kind = Other;
8628       if (llvm::is_contained(C->getMotionModifiers(),
8629                              OMPC_MOTION_MODIFIER_present))
8630         Kind = Present;
8631       const auto *EI = C->getVarRefs().begin();
8632       for (const auto L : C->component_lists()) {
8633         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8634                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8635                 C->isImplicit(), std::get<2>(L), *EI);
8636         ++EI;
8637       }
8638     }
8639 
8640     // Look at the use_device_ptr clause information and mark the existing map
8641     // entries as such. If there is no map information for an entry in the
8642     // use_device_ptr list, we create one with map type 'alloc' and zero size
8643     // section. It is the user fault if that was not mapped before. If there is
8644     // no map information and the pointer is a struct member, then we defer the
8645     // emission of that entry until the whole struct has been processed.
8646     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8647                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8648         DeferredInfo;
8649     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8650 
8651     for (const auto *Cl : Clauses) {
8652       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8653       if (!C)
8654         continue;
8655       for (const auto L : C->component_lists()) {
8656         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8657             std::get<1>(L);
8658         assert(!Components.empty() &&
8659                "Not expecting empty list of components!");
8660         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8661         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8662         const Expr *IE = Components.back().getAssociatedExpression();
8663         // If the first component is a member expression, we have to look into
8664         // 'this', which maps to null in the map of map information. Otherwise
8665         // look directly for the information.
8666         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8667 
8668         // We potentially have map information for this declaration already.
8669         // Look for the first set of components that refer to it.
8670         if (It != Info.end()) {
8671           bool Found = false;
8672           for (auto &Data : It->second) {
8673             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8674               return MI.Components.back().getAssociatedDeclaration() == VD;
8675             });
8676             // If we found a map entry, signal that the pointer has to be
8677             // returned and move on to the next declaration. Exclude cases where
8678             // the base pointer is mapped as array subscript, array section or
8679             // array shaping. The base address is passed as a pointer to base in
8680             // this case and cannot be used as a base for use_device_ptr list
8681             // item.
8682             if (CI != Data.end()) {
8683               auto PrevCI = std::next(CI->Components.rbegin());
8684               const auto *VarD = dyn_cast<VarDecl>(VD);
8685               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8686                   isa<MemberExpr>(IE) ||
8687                   !VD->getType().getNonReferenceType()->isPointerType() ||
8688                   PrevCI == CI->Components.rend() ||
8689                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8690                   VarD->hasLocalStorage()) {
8691                 CI->ReturnDevicePointer = true;
8692                 Found = true;
8693                 break;
8694               }
8695             }
8696           }
8697           if (Found)
8698             continue;
8699         }
8700 
8701         // We didn't find any match in our map information - generate a zero
8702         // size array section - if the pointer is a struct member we defer this
8703         // action until the whole struct has been processed.
8704         if (isa<MemberExpr>(IE)) {
8705           // Insert the pointer into Info to be processed by
8706           // generateInfoForComponentList. Because it is a member pointer
8707           // without a pointee, no entry will be generated for it, therefore
8708           // we need to generate one after the whole struct has been processed.
8709           // Nonetheless, generateInfoForComponentList must be called to take
8710           // the pointer into account for the calculation of the range of the
8711           // partial struct.
8712           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8713                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8714                   nullptr);
8715           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8716         } else {
8717           llvm::Value *Ptr =
8718               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8719           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8720           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8721           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8722           UseDevicePtrCombinedInfo.Sizes.push_back(
8723               llvm::Constant::getNullValue(CGF.Int64Ty));
8724           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8725           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8726         }
8727       }
8728     }
8729 
8730     // Look at the use_device_addr clause information and mark the existing map
8731     // entries as such. If there is no map information for an entry in the
8732     // use_device_addr list, we create one with map type 'alloc' and zero size
8733     // section. It is the user fault if that was not mapped before. If there is
8734     // no map information and the pointer is a struct member, then we defer the
8735     // emission of that entry until the whole struct has been processed.
8736     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8737     for (const auto *Cl : Clauses) {
8738       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8739       if (!C)
8740         continue;
8741       for (const auto L : C->component_lists()) {
8742         assert(!std::get<1>(L).empty() &&
8743                "Not expecting empty list of components!");
8744         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8745         if (!Processed.insert(VD).second)
8746           continue;
8747         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8748         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8749         // If the first component is a member expression, we have to look into
8750         // 'this', which maps to null in the map of map information. Otherwise
8751         // look directly for the information.
8752         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8753 
8754         // We potentially have map information for this declaration already.
8755         // Look for the first set of components that refer to it.
8756         if (It != Info.end()) {
8757           bool Found = false;
8758           for (auto &Data : It->second) {
8759             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8760               return MI.Components.back().getAssociatedDeclaration() == VD;
8761             });
8762             // If we found a map entry, signal that the pointer has to be
8763             // returned and move on to the next declaration.
8764             if (CI != Data.end()) {
8765               CI->ReturnDevicePointer = true;
8766               Found = true;
8767               break;
8768             }
8769           }
8770           if (Found)
8771             continue;
8772         }
8773 
8774         // We didn't find any match in our map information - generate a zero
8775         // size array section - if the pointer is a struct member we defer this
8776         // action until the whole struct has been processed.
8777         if (isa<MemberExpr>(IE)) {
8778           // Insert the pointer into Info to be processed by
8779           // generateInfoForComponentList. Because it is a member pointer
8780           // without a pointee, no entry will be generated for it, therefore
8781           // we need to generate one after the whole struct has been processed.
8782           // Nonetheless, generateInfoForComponentList must be called to take
8783           // the pointer into account for the calculation of the range of the
8784           // partial struct.
8785           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8786                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8787                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8788           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8789         } else {
8790           llvm::Value *Ptr;
8791           if (IE->isGLValue())
8792             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8793           else
8794             Ptr = CGF.EmitScalarExpr(IE);
8795           CombinedInfo.Exprs.push_back(VD);
8796           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8797           CombinedInfo.Pointers.push_back(Ptr);
8798           CombinedInfo.Sizes.push_back(
8799               llvm::Constant::getNullValue(CGF.Int64Ty));
8800           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8801           CombinedInfo.Mappers.push_back(nullptr);
8802         }
8803       }
8804     }
8805 
8806     for (const auto &Data : Info) {
8807       StructRangeInfoTy PartialStruct;
8808       // Temporary generated information.
8809       MapCombinedInfoTy CurInfo;
8810       const Decl *D = Data.first;
8811       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8812       for (const auto &M : Data.second) {
8813         for (const MapInfo &L : M) {
8814           assert(!L.Components.empty() &&
8815                  "Not expecting declaration with no component lists.");
8816 
8817           // Remember the current base pointer index.
8818           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8819           CurInfo.NonContigInfo.IsNonContiguous =
8820               L.Components.back().isNonContiguous();
8821           generateInfoForComponentList(
8822               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8823               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8824               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8825 
8826           // If this entry relates with a device pointer, set the relevant
8827           // declaration and add the 'return pointer' flag.
8828           if (L.ReturnDevicePointer) {
8829             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8830                    "Unexpected number of mapped base pointers.");
8831 
8832             const ValueDecl *RelevantVD =
8833                 L.Components.back().getAssociatedDeclaration();
8834             assert(RelevantVD &&
8835                    "No relevant declaration related with device pointer??");
8836 
8837             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8838                 RelevantVD);
8839             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8840           }
8841         }
8842       }
8843 
8844       // Append any pending zero-length pointers which are struct members and
8845       // used with use_device_ptr or use_device_addr.
8846       auto CI = DeferredInfo.find(Data.first);
8847       if (CI != DeferredInfo.end()) {
8848         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8849           llvm::Value *BasePtr;
8850           llvm::Value *Ptr;
8851           if (L.ForDeviceAddr) {
8852             if (L.IE->isGLValue())
8853               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8854             else
8855               Ptr = this->CGF.EmitScalarExpr(L.IE);
8856             BasePtr = Ptr;
8857             // Entry is RETURN_PARAM. Also, set the placeholder value
8858             // MEMBER_OF=FFFF so that the entry is later updated with the
8859             // correct value of MEMBER_OF.
8860             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8861           } else {
8862             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8863             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8864                                              L.IE->getExprLoc());
8865             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8866             // placeholder value MEMBER_OF=FFFF so that the entry is later
8867             // updated with the correct value of MEMBER_OF.
8868             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8869                                     OMP_MAP_MEMBER_OF);
8870           }
8871           CurInfo.Exprs.push_back(L.VD);
8872           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8873           CurInfo.Pointers.push_back(Ptr);
8874           CurInfo.Sizes.push_back(
8875               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8876           CurInfo.Mappers.push_back(nullptr);
8877         }
8878       }
8879       // If there is an entry in PartialStruct it means we have a struct with
8880       // individual members mapped. Emit an extra combined entry.
8881       if (PartialStruct.Base.isValid()) {
8882         CurInfo.NonContigInfo.Dims.push_back(0);
8883         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8884       }
8885 
8886       // We need to append the results of this capture to what we already
8887       // have.
8888       CombinedInfo.append(CurInfo);
8889     }
8890     // Append data for use_device_ptr clauses.
8891     CombinedInfo.append(UseDevicePtrCombinedInfo);
8892   }
8893 
8894 public:
8895   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8896       : CurDir(&Dir), CGF(CGF) {
8897     // Extract firstprivate clause information.
8898     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8899       for (const auto *D : C->varlists())
8900         FirstPrivateDecls.try_emplace(
8901             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8902     // Extract implicit firstprivates from uses_allocators clauses.
8903     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8904       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8905         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8906         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8907           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8908                                         /*Implicit=*/true);
8909         else if (const auto *VD = dyn_cast<VarDecl>(
8910                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8911                          ->getDecl()))
8912           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8913       }
8914     }
8915     // Extract device pointer clause information.
8916     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8917       for (auto L : C->component_lists())
8918         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8919     // Extract map information.
8920     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8921       if (C->getMapType() != OMPC_MAP_to)
8922         continue;
8923       for (auto L : C->component_lists()) {
8924         const ValueDecl *VD = std::get<0>(L);
8925         const auto *RD = VD ? VD->getType()
8926                                   .getCanonicalType()
8927                                   .getNonReferenceType()
8928                                   ->getAsCXXRecordDecl()
8929                             : nullptr;
8930         if (RD && RD->isLambda())
8931           LambdasMap.try_emplace(std::get<0>(L), C);
8932       }
8933     }
8934   }
8935 
8936   /// Constructor for the declare mapper directive.
8937   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8938       : CurDir(&Dir), CGF(CGF) {}
8939 
8940   /// Generate code for the combined entry if we have a partially mapped struct
8941   /// and take care of the mapping flags of the arguments corresponding to
8942   /// individual struct members.
8943   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8944                          MapFlagsArrayTy &CurTypes,
8945                          const StructRangeInfoTy &PartialStruct,
8946                          const ValueDecl *VD = nullptr,
8947                          bool NotTargetParams = true) const {
8948     if (CurTypes.size() == 1 &&
8949         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8950         !PartialStruct.IsArraySection)
8951       return;
8952     Address LBAddr = PartialStruct.LowestElem.second;
8953     Address HBAddr = PartialStruct.HighestElem.second;
8954     if (PartialStruct.HasCompleteRecord) {
8955       LBAddr = PartialStruct.LB;
8956       HBAddr = PartialStruct.LB;
8957     }
8958     CombinedInfo.Exprs.push_back(VD);
8959     // Base is the base of the struct
8960     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8961     // Pointer is the address of the lowest element
8962     llvm::Value *LB = LBAddr.getPointer();
8963     CombinedInfo.Pointers.push_back(LB);
8964     // There should not be a mapper for a combined entry.
8965     CombinedInfo.Mappers.push_back(nullptr);
8966     // Size is (addr of {highest+1} element) - (addr of lowest element)
8967     llvm::Value *HB = HBAddr.getPointer();
8968     llvm::Value *HAddr =
8969         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8970     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8971     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8972     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8973     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8974                                                   /*isSigned=*/false);
8975     CombinedInfo.Sizes.push_back(Size);
8976     // Map type is always TARGET_PARAM, if generate info for captures.
8977     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8978                                                  : OMP_MAP_TARGET_PARAM);
8979     // If any element has the present modifier, then make sure the runtime
8980     // doesn't attempt to allocate the struct.
8981     if (CurTypes.end() !=
8982         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8983           return Type & OMP_MAP_PRESENT;
8984         }))
8985       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8986     // Remove TARGET_PARAM flag from the first element
8987     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8988     // If any element has the ompx_hold modifier, then make sure the runtime
8989     // uses the hold reference count for the struct as a whole so that it won't
8990     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8991     // elements as well so the runtime knows which reference count to check
8992     // when determining whether it's time for device-to-host transfers of
8993     // individual elements.
8994     if (CurTypes.end() !=
8995         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8996           return Type & OMP_MAP_OMPX_HOLD;
8997         })) {
8998       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8999       for (auto &M : CurTypes)
9000         M |= OMP_MAP_OMPX_HOLD;
9001     }
9002 
9003     // All other current entries will be MEMBER_OF the combined entry
9004     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9005     // 0xFFFF in the MEMBER_OF field).
9006     OpenMPOffloadMappingFlags MemberOfFlag =
9007         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9008     for (auto &M : CurTypes)
9009       setCorrectMemberOfFlag(M, MemberOfFlag);
9010   }
9011 
9012   /// Generate all the base pointers, section pointers, sizes, map types, and
9013   /// mappers for the extracted mappable expressions (all included in \a
9014   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9015   /// pair of the relevant declaration and index where it occurs is appended to
9016   /// the device pointers info array.
9017   void generateAllInfo(
9018       MapCombinedInfoTy &CombinedInfo,
9019       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9020           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9021     assert(CurDir.is<const OMPExecutableDirective *>() &&
9022            "Expect a executable directive");
9023     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9024     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9025   }
9026 
9027   /// Generate all the base pointers, section pointers, sizes, map types, and
9028   /// mappers for the extracted map clauses of user-defined mapper (all included
9029   /// in \a CombinedInfo).
9030   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9031     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9032            "Expect a declare mapper directive");
9033     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9034     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9035   }
9036 
9037   /// Emit capture info for lambdas for variables captured by reference.
9038   void generateInfoForLambdaCaptures(
9039       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9040       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9041     const auto *RD = VD->getType()
9042                          .getCanonicalType()
9043                          .getNonReferenceType()
9044                          ->getAsCXXRecordDecl();
9045     if (!RD || !RD->isLambda())
9046       return;
9047     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9048     LValue VDLVal = CGF.MakeAddrLValue(
9049         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9050     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9051     FieldDecl *ThisCapture = nullptr;
9052     RD->getCaptureFields(Captures, ThisCapture);
9053     if (ThisCapture) {
9054       LValue ThisLVal =
9055           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9056       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9057       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9058                                  VDLVal.getPointer(CGF));
9059       CombinedInfo.Exprs.push_back(VD);
9060       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9061       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9062       CombinedInfo.Sizes.push_back(
9063           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9064                                     CGF.Int64Ty, /*isSigned=*/true));
9065       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9066                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9067       CombinedInfo.Mappers.push_back(nullptr);
9068     }
9069     for (const LambdaCapture &LC : RD->captures()) {
9070       if (!LC.capturesVariable())
9071         continue;
9072       const VarDecl *VD = LC.getCapturedVar();
9073       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9074         continue;
9075       auto It = Captures.find(VD);
9076       assert(It != Captures.end() && "Found lambda capture without field.");
9077       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9078       if (LC.getCaptureKind() == LCK_ByRef) {
9079         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9080         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9081                                    VDLVal.getPointer(CGF));
9082         CombinedInfo.Exprs.push_back(VD);
9083         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9084         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9085         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9086             CGF.getTypeSize(
9087                 VD->getType().getCanonicalType().getNonReferenceType()),
9088             CGF.Int64Ty, /*isSigned=*/true));
9089       } else {
9090         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9091         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9092                                    VDLVal.getPointer(CGF));
9093         CombinedInfo.Exprs.push_back(VD);
9094         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9095         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9096         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9097       }
9098       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9099                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9100       CombinedInfo.Mappers.push_back(nullptr);
9101     }
9102   }
9103 
9104   /// Set correct indices for lambdas captures.
9105   void adjustMemberOfForLambdaCaptures(
9106       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9107       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9108       MapFlagsArrayTy &Types) const {
9109     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9110       // Set correct member_of idx for all implicit lambda captures.
9111       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9112                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9113         continue;
9114       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9115       assert(BasePtr && "Unable to find base lambda address.");
9116       int TgtIdx = -1;
9117       for (unsigned J = I; J > 0; --J) {
9118         unsigned Idx = J - 1;
9119         if (Pointers[Idx] != BasePtr)
9120           continue;
9121         TgtIdx = Idx;
9122         break;
9123       }
9124       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9125       // All other current entries will be MEMBER_OF the combined entry
9126       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9127       // 0xFFFF in the MEMBER_OF field).
9128       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9129       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9130     }
9131   }
9132 
9133   /// Generate the base pointers, section pointers, sizes, map types, and
9134   /// mappers associated to a given capture (all included in \a CombinedInfo).
9135   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9136                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9137                               StructRangeInfoTy &PartialStruct) const {
9138     assert(!Cap->capturesVariableArrayType() &&
9139            "Not expecting to generate map info for a variable array type!");
9140 
9141     // We need to know when we generating information for the first component
9142     const ValueDecl *VD = Cap->capturesThis()
9143                               ? nullptr
9144                               : Cap->getCapturedVar()->getCanonicalDecl();
9145 
9146     // for map(to: lambda): skip here, processing it in
9147     // generateDefaultMapInfo
9148     if (LambdasMap.count(VD))
9149       return;
9150 
9151     // If this declaration appears in a is_device_ptr clause we just have to
9152     // pass the pointer by value. If it is a reference to a declaration, we just
9153     // pass its value.
9154     if (DevPointersMap.count(VD)) {
9155       CombinedInfo.Exprs.push_back(VD);
9156       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9157       CombinedInfo.Pointers.push_back(Arg);
9158       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9159           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9160           /*isSigned=*/true));
9161       CombinedInfo.Types.push_back(
9162           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9163           OMP_MAP_TARGET_PARAM);
9164       CombinedInfo.Mappers.push_back(nullptr);
9165       return;
9166     }
9167 
9168     using MapData =
9169         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9170                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9171                    const ValueDecl *, const Expr *>;
9172     SmallVector<MapData, 4> DeclComponentLists;
9173     assert(CurDir.is<const OMPExecutableDirective *>() &&
9174            "Expect a executable directive");
9175     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9176     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9177       const auto *EI = C->getVarRefs().begin();
9178       for (const auto L : C->decl_component_lists(VD)) {
9179         const ValueDecl *VDecl, *Mapper;
9180         // The Expression is not correct if the mapping is implicit
9181         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9182         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9183         std::tie(VDecl, Components, Mapper) = L;
9184         assert(VDecl == VD && "We got information for the wrong declaration??");
9185         assert(!Components.empty() &&
9186                "Not expecting declaration with no component lists.");
9187         DeclComponentLists.emplace_back(Components, C->getMapType(),
9188                                         C->getMapTypeModifiers(),
9189                                         C->isImplicit(), Mapper, E);
9190         ++EI;
9191       }
9192     }
9193     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9194                                              const MapData &RHS) {
9195       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9196       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9197       bool HasPresent =
9198           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9199       bool HasAllocs = MapType == OMPC_MAP_alloc;
9200       MapModifiers = std::get<2>(RHS);
9201       MapType = std::get<1>(LHS);
9202       bool HasPresentR =
9203           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9204       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9205       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9206     });
9207 
9208     // Find overlapping elements (including the offset from the base element).
9209     llvm::SmallDenseMap<
9210         const MapData *,
9211         llvm::SmallVector<
9212             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9213         4>
9214         OverlappedData;
9215     size_t Count = 0;
9216     for (const MapData &L : DeclComponentLists) {
9217       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9218       OpenMPMapClauseKind MapType;
9219       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9220       bool IsImplicit;
9221       const ValueDecl *Mapper;
9222       const Expr *VarRef;
9223       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9224           L;
9225       ++Count;
9226       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9227         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9228         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9229                  VarRef) = L1;
9230         auto CI = Components.rbegin();
9231         auto CE = Components.rend();
9232         auto SI = Components1.rbegin();
9233         auto SE = Components1.rend();
9234         for (; CI != CE && SI != SE; ++CI, ++SI) {
9235           if (CI->getAssociatedExpression()->getStmtClass() !=
9236               SI->getAssociatedExpression()->getStmtClass())
9237             break;
9238           // Are we dealing with different variables/fields?
9239           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9240             break;
9241         }
9242         // Found overlapping if, at least for one component, reached the head
9243         // of the components list.
9244         if (CI == CE || SI == SE) {
9245           // Ignore it if it is the same component.
9246           if (CI == CE && SI == SE)
9247             continue;
9248           const auto It = (SI == SE) ? CI : SI;
9249           // If one component is a pointer and another one is a kind of
9250           // dereference of this pointer (array subscript, section, dereference,
9251           // etc.), it is not an overlapping.
9252           // Same, if one component is a base and another component is a
9253           // dereferenced pointer memberexpr with the same base.
9254           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9255               (std::prev(It)->getAssociatedDeclaration() &&
9256                std::prev(It)
9257                    ->getAssociatedDeclaration()
9258                    ->getType()
9259                    ->isPointerType()) ||
9260               (It->getAssociatedDeclaration() &&
9261                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9262                std::next(It) != CE && std::next(It) != SE))
9263             continue;
9264           const MapData &BaseData = CI == CE ? L : L1;
9265           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9266               SI == SE ? Components : Components1;
9267           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9268           OverlappedElements.getSecond().push_back(SubData);
9269         }
9270       }
9271     }
9272     // Sort the overlapped elements for each item.
9273     llvm::SmallVector<const FieldDecl *, 4> Layout;
9274     if (!OverlappedData.empty()) {
9275       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9276       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9277       while (BaseType != OrigType) {
9278         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9279         OrigType = BaseType->getPointeeOrArrayElementType();
9280       }
9281 
9282       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9283         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9284       else {
9285         const auto *RD = BaseType->getAsRecordDecl();
9286         Layout.append(RD->field_begin(), RD->field_end());
9287       }
9288     }
9289     for (auto &Pair : OverlappedData) {
9290       llvm::stable_sort(
9291           Pair.getSecond(),
9292           [&Layout](
9293               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9294               OMPClauseMappableExprCommon::MappableExprComponentListRef
9295                   Second) {
9296             auto CI = First.rbegin();
9297             auto CE = First.rend();
9298             auto SI = Second.rbegin();
9299             auto SE = Second.rend();
9300             for (; CI != CE && SI != SE; ++CI, ++SI) {
9301               if (CI->getAssociatedExpression()->getStmtClass() !=
9302                   SI->getAssociatedExpression()->getStmtClass())
9303                 break;
9304               // Are we dealing with different variables/fields?
9305               if (CI->getAssociatedDeclaration() !=
9306                   SI->getAssociatedDeclaration())
9307                 break;
9308             }
9309 
9310             // Lists contain the same elements.
9311             if (CI == CE && SI == SE)
9312               return false;
9313 
9314             // List with less elements is less than list with more elements.
9315             if (CI == CE || SI == SE)
9316               return CI == CE;
9317 
9318             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9319             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9320             if (FD1->getParent() == FD2->getParent())
9321               return FD1->getFieldIndex() < FD2->getFieldIndex();
9322             const auto *It =
9323                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9324                   return FD == FD1 || FD == FD2;
9325                 });
9326             return *It == FD1;
9327           });
9328     }
9329 
9330     // Associated with a capture, because the mapping flags depend on it.
9331     // Go through all of the elements with the overlapped elements.
9332     bool IsFirstComponentList = true;
9333     for (const auto &Pair : OverlappedData) {
9334       const MapData &L = *Pair.getFirst();
9335       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9336       OpenMPMapClauseKind MapType;
9337       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9338       bool IsImplicit;
9339       const ValueDecl *Mapper;
9340       const Expr *VarRef;
9341       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9342           L;
9343       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9344           OverlappedComponents = Pair.getSecond();
9345       generateInfoForComponentList(
9346           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9347           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9348           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9349       IsFirstComponentList = false;
9350     }
9351     // Go through other elements without overlapped elements.
9352     for (const MapData &L : DeclComponentLists) {
9353       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9354       OpenMPMapClauseKind MapType;
9355       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9356       bool IsImplicit;
9357       const ValueDecl *Mapper;
9358       const Expr *VarRef;
9359       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9360           L;
9361       auto It = OverlappedData.find(&L);
9362       if (It == OverlappedData.end())
9363         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9364                                      Components, CombinedInfo, PartialStruct,
9365                                      IsFirstComponentList, IsImplicit, Mapper,
9366                                      /*ForDeviceAddr=*/false, VD, VarRef);
9367       IsFirstComponentList = false;
9368     }
9369   }
9370 
9371   /// Generate the default map information for a given capture \a CI,
9372   /// record field declaration \a RI and captured value \a CV.
9373   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9374                               const FieldDecl &RI, llvm::Value *CV,
9375                               MapCombinedInfoTy &CombinedInfo) const {
9376     bool IsImplicit = true;
9377     // Do the default mapping.
9378     if (CI.capturesThis()) {
9379       CombinedInfo.Exprs.push_back(nullptr);
9380       CombinedInfo.BasePointers.push_back(CV);
9381       CombinedInfo.Pointers.push_back(CV);
9382       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9383       CombinedInfo.Sizes.push_back(
9384           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9385                                     CGF.Int64Ty, /*isSigned=*/true));
9386       // Default map type.
9387       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9388     } else if (CI.capturesVariableByCopy()) {
9389       const VarDecl *VD = CI.getCapturedVar();
9390       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9391       CombinedInfo.BasePointers.push_back(CV);
9392       CombinedInfo.Pointers.push_back(CV);
9393       if (!RI.getType()->isAnyPointerType()) {
9394         // We have to signal to the runtime captures passed by value that are
9395         // not pointers.
9396         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9397         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9398             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9399       } else {
9400         // Pointers are implicitly mapped with a zero size and no flags
9401         // (other than first map that is added for all implicit maps).
9402         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9403         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9404       }
9405       auto I = FirstPrivateDecls.find(VD);
9406       if (I != FirstPrivateDecls.end())
9407         IsImplicit = I->getSecond();
9408     } else {
9409       assert(CI.capturesVariable() && "Expected captured reference.");
9410       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9411       QualType ElementType = PtrTy->getPointeeType();
9412       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9413           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9414       // The default map type for a scalar/complex type is 'to' because by
9415       // default the value doesn't have to be retrieved. For an aggregate
9416       // type, the default is 'tofrom'.
9417       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9418       const VarDecl *VD = CI.getCapturedVar();
9419       auto I = FirstPrivateDecls.find(VD);
9420       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9421       CombinedInfo.BasePointers.push_back(CV);
9422       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9423         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9424             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9425             AlignmentSource::Decl));
9426         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9427       } else {
9428         CombinedInfo.Pointers.push_back(CV);
9429       }
9430       if (I != FirstPrivateDecls.end())
9431         IsImplicit = I->getSecond();
9432     }
9433     // Every default map produces a single argument which is a target parameter.
9434     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9435 
9436     // Add flag stating this is an implicit map.
9437     if (IsImplicit)
9438       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9439 
9440     // No user-defined mapper for default mapping.
9441     CombinedInfo.Mappers.push_back(nullptr);
9442   }
9443 };
9444 } // anonymous namespace
9445 
9446 static void emitNonContiguousDescriptor(
9447     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9448     CGOpenMPRuntime::TargetDataInfo &Info) {
9449   CodeGenModule &CGM = CGF.CGM;
9450   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9451       &NonContigInfo = CombinedInfo.NonContigInfo;
9452 
9453   // Build an array of struct descriptor_dim and then assign it to
9454   // offload_args.
9455   //
9456   // struct descriptor_dim {
9457   //  uint64_t offset;
9458   //  uint64_t count;
9459   //  uint64_t stride
9460   // };
9461   ASTContext &C = CGF.getContext();
9462   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9463   RecordDecl *RD;
9464   RD = C.buildImplicitRecord("descriptor_dim");
9465   RD->startDefinition();
9466   addFieldToRecordDecl(C, RD, Int64Ty);
9467   addFieldToRecordDecl(C, RD, Int64Ty);
9468   addFieldToRecordDecl(C, RD, Int64Ty);
9469   RD->completeDefinition();
9470   QualType DimTy = C.getRecordType(RD);
9471 
9472   enum { OffsetFD = 0, CountFD, StrideFD };
9473   // We need two index variable here since the size of "Dims" is the same as the
9474   // size of Components, however, the size of offset, count, and stride is equal
9475   // to the size of base declaration that is non-contiguous.
9476   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9477     // Skip emitting ir if dimension size is 1 since it cannot be
9478     // non-contiguous.
9479     if (NonContigInfo.Dims[I] == 1)
9480       continue;
9481     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9482     QualType ArrayTy =
9483         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9484     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9485     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9486       unsigned RevIdx = EE - II - 1;
9487       LValue DimsLVal = CGF.MakeAddrLValue(
9488           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9489       // Offset
9490       LValue OffsetLVal = CGF.EmitLValueForField(
9491           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9492       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9493       // Count
9494       LValue CountLVal = CGF.EmitLValueForField(
9495           DimsLVal, *std::next(RD->field_begin(), CountFD));
9496       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9497       // Stride
9498       LValue StrideLVal = CGF.EmitLValueForField(
9499           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9500       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9501     }
9502     // args[I] = &dims
9503     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9504         DimsAddr, CGM.Int8PtrTy);
9505     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9506         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9507         Info.PointersArray, 0, I);
9508     Address PAddr(P, CGF.getPointerAlign());
9509     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9510     ++L;
9511   }
9512 }
9513 
9514 // Try to extract the base declaration from a `this->x` expression if possible.
9515 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9516   if (!E)
9517     return nullptr;
9518 
9519   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9520     if (const MemberExpr *ME =
9521             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9522       return ME->getMemberDecl();
9523   return nullptr;
9524 }
9525 
9526 /// Emit a string constant containing the names of the values mapped to the
9527 /// offloading runtime library.
9528 llvm::Constant *
9529 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9530                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9531 
9532   uint32_t SrcLocStrSize;
9533   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9534     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9535 
9536   SourceLocation Loc;
9537   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9538     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9539       Loc = VD->getLocation();
9540     else
9541       Loc = MapExprs.getMapExpr()->getExprLoc();
9542   } else {
9543     Loc = MapExprs.getMapDecl()->getLocation();
9544   }
9545 
9546   std::string ExprName;
9547   if (MapExprs.getMapExpr()) {
9548     PrintingPolicy P(CGF.getContext().getLangOpts());
9549     llvm::raw_string_ostream OS(ExprName);
9550     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9551     OS.flush();
9552   } else {
9553     ExprName = MapExprs.getMapDecl()->getNameAsString();
9554   }
9555 
9556   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9557   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9558                                          PLoc.getLine(), PLoc.getColumn(),
9559                                          SrcLocStrSize);
9560 }
9561 
9562 /// Emit the arrays used to pass the captures and map information to the
9563 /// offloading runtime library. If there is no map or capture information,
9564 /// return nullptr by reference.
9565 static void emitOffloadingArrays(
9566     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9567     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9568     bool IsNonContiguous = false) {
9569   CodeGenModule &CGM = CGF.CGM;
9570   ASTContext &Ctx = CGF.getContext();
9571 
9572   // Reset the array information.
9573   Info.clearArrayInfo();
9574   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9575 
9576   if (Info.NumberOfPtrs) {
9577     // Detect if we have any capture size requiring runtime evaluation of the
9578     // size so that a constant array could be eventually used.
9579     bool hasRuntimeEvaluationCaptureSize = false;
9580     for (llvm::Value *S : CombinedInfo.Sizes)
9581       if (!isa<llvm::Constant>(S)) {
9582         hasRuntimeEvaluationCaptureSize = true;
9583         break;
9584       }
9585 
9586     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9587     QualType PointerArrayType = Ctx.getConstantArrayType(
9588         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9589         /*IndexTypeQuals=*/0);
9590 
9591     Info.BasePointersArray =
9592         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9593     Info.PointersArray =
9594         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9595     Address MappersArray =
9596         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9597     Info.MappersArray = MappersArray.getPointer();
9598 
9599     // If we don't have any VLA types or other types that require runtime
9600     // evaluation, we can use a constant array for the map sizes, otherwise we
9601     // need to fill up the arrays as we do for the pointers.
9602     QualType Int64Ty =
9603         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9604     if (hasRuntimeEvaluationCaptureSize) {
9605       QualType SizeArrayType = Ctx.getConstantArrayType(
9606           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9607           /*IndexTypeQuals=*/0);
9608       Info.SizesArray =
9609           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9610     } else {
9611       // We expect all the sizes to be constant, so we collect them to create
9612       // a constant array.
9613       SmallVector<llvm::Constant *, 16> ConstSizes;
9614       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9615         if (IsNonContiguous &&
9616             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9617           ConstSizes.push_back(llvm::ConstantInt::get(
9618               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9619         } else {
9620           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9621         }
9622       }
9623 
9624       auto *SizesArrayInit = llvm::ConstantArray::get(
9625           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9626       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9627       auto *SizesArrayGbl = new llvm::GlobalVariable(
9628           CGM.getModule(), SizesArrayInit->getType(),
9629           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9630           SizesArrayInit, Name);
9631       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9632       Info.SizesArray = SizesArrayGbl;
9633     }
9634 
9635     // The map types are always constant so we don't need to generate code to
9636     // fill arrays. Instead, we create an array constant.
9637     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9638     llvm::copy(CombinedInfo.Types, Mapping.begin());
9639     std::string MaptypesName =
9640         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9641     auto *MapTypesArrayGbl =
9642         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9643     Info.MapTypesArray = MapTypesArrayGbl;
9644 
9645     // The information types are only built if there is debug information
9646     // requested.
9647     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9648       Info.MapNamesArray = llvm::Constant::getNullValue(
9649           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9650     } else {
9651       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9652         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9653       };
9654       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9655       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9656       std::string MapnamesName =
9657           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9658       auto *MapNamesArrayGbl =
9659           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9660       Info.MapNamesArray = MapNamesArrayGbl;
9661     }
9662 
9663     // If there's a present map type modifier, it must not be applied to the end
9664     // of a region, so generate a separate map type array in that case.
9665     if (Info.separateBeginEndCalls()) {
9666       bool EndMapTypesDiffer = false;
9667       for (uint64_t &Type : Mapping) {
9668         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9669           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9670           EndMapTypesDiffer = true;
9671         }
9672       }
9673       if (EndMapTypesDiffer) {
9674         MapTypesArrayGbl =
9675             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9676         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9677       }
9678     }
9679 
9680     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9681       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9682       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9683           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9684           Info.BasePointersArray, 0, I);
9685       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9686           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9687       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9688       CGF.Builder.CreateStore(BPVal, BPAddr);
9689 
9690       if (Info.requiresDevicePointerInfo())
9691         if (const ValueDecl *DevVD =
9692                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9693           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9694 
9695       llvm::Value *PVal = CombinedInfo.Pointers[I];
9696       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9697           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9698           Info.PointersArray, 0, I);
9699       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9700           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9701       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9702       CGF.Builder.CreateStore(PVal, PAddr);
9703 
9704       if (hasRuntimeEvaluationCaptureSize) {
9705         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9706             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9707             Info.SizesArray,
9708             /*Idx0=*/0,
9709             /*Idx1=*/I);
9710         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9711         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9712                                                           CGM.Int64Ty,
9713                                                           /*isSigned=*/true),
9714                                 SAddr);
9715       }
9716 
9717       // Fill up the mapper array.
9718       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9719       if (CombinedInfo.Mappers[I]) {
9720         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9721             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9722         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9723         Info.HasMapper = true;
9724       }
9725       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9726       CGF.Builder.CreateStore(MFunc, MAddr);
9727     }
9728   }
9729 
9730   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9731       Info.NumberOfPtrs == 0)
9732     return;
9733 
9734   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9735 }
9736 
9737 namespace {
9738 /// Additional arguments for emitOffloadingArraysArgument function.
9739 struct ArgumentsOptions {
9740   bool ForEndCall = false;
9741   ArgumentsOptions() = default;
9742   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9743 };
9744 } // namespace
9745 
9746 /// Emit the arguments to be passed to the runtime library based on the
9747 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9748 /// ForEndCall, emit map types to be passed for the end of the region instead of
9749 /// the beginning.
9750 static void emitOffloadingArraysArgument(
9751     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9752     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9753     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9754     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9755     const ArgumentsOptions &Options = ArgumentsOptions()) {
9756   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9757          "expected region end call to runtime only when end call is separate");
9758   CodeGenModule &CGM = CGF.CGM;
9759   if (Info.NumberOfPtrs) {
9760     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9761         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9762         Info.BasePointersArray,
9763         /*Idx0=*/0, /*Idx1=*/0);
9764     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9765         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9766         Info.PointersArray,
9767         /*Idx0=*/0,
9768         /*Idx1=*/0);
9769     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9770         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9771         /*Idx0=*/0, /*Idx1=*/0);
9772     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9773         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9774         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9775                                                     : Info.MapTypesArray,
9776         /*Idx0=*/0,
9777         /*Idx1=*/0);
9778 
9779     // Only emit the mapper information arrays if debug information is
9780     // requested.
9781     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9782       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9783     else
9784       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9785           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9786           Info.MapNamesArray,
9787           /*Idx0=*/0,
9788           /*Idx1=*/0);
9789     // If there is no user-defined mapper, set the mapper array to nullptr to
9790     // avoid an unnecessary data privatization
9791     if (!Info.HasMapper)
9792       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9793     else
9794       MappersArrayArg =
9795           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9796   } else {
9797     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9798     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9799     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9800     MapTypesArrayArg =
9801         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9802     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9803     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9804   }
9805 }
9806 
9807 /// Check for inner distribute directive.
9808 static const OMPExecutableDirective *
9809 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9810   const auto *CS = D.getInnermostCapturedStmt();
9811   const auto *Body =
9812       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9813   const Stmt *ChildStmt =
9814       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9815 
9816   if (const auto *NestedDir =
9817           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9818     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9819     switch (D.getDirectiveKind()) {
9820     case OMPD_target:
9821       if (isOpenMPDistributeDirective(DKind))
9822         return NestedDir;
9823       if (DKind == OMPD_teams) {
9824         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9825             /*IgnoreCaptured=*/true);
9826         if (!Body)
9827           return nullptr;
9828         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9829         if (const auto *NND =
9830                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9831           DKind = NND->getDirectiveKind();
9832           if (isOpenMPDistributeDirective(DKind))
9833             return NND;
9834         }
9835       }
9836       return nullptr;
9837     case OMPD_target_teams:
9838       if (isOpenMPDistributeDirective(DKind))
9839         return NestedDir;
9840       return nullptr;
9841     case OMPD_target_parallel:
9842     case OMPD_target_simd:
9843     case OMPD_target_parallel_for:
9844     case OMPD_target_parallel_for_simd:
9845       return nullptr;
9846     case OMPD_target_teams_distribute:
9847     case OMPD_target_teams_distribute_simd:
9848     case OMPD_target_teams_distribute_parallel_for:
9849     case OMPD_target_teams_distribute_parallel_for_simd:
9850     case OMPD_parallel:
9851     case OMPD_for:
9852     case OMPD_parallel_for:
9853     case OMPD_parallel_master:
9854     case OMPD_parallel_sections:
9855     case OMPD_for_simd:
9856     case OMPD_parallel_for_simd:
9857     case OMPD_cancel:
9858     case OMPD_cancellation_point:
9859     case OMPD_ordered:
9860     case OMPD_threadprivate:
9861     case OMPD_allocate:
9862     case OMPD_task:
9863     case OMPD_simd:
9864     case OMPD_tile:
9865     case OMPD_unroll:
9866     case OMPD_sections:
9867     case OMPD_section:
9868     case OMPD_single:
9869     case OMPD_master:
9870     case OMPD_critical:
9871     case OMPD_taskyield:
9872     case OMPD_barrier:
9873     case OMPD_taskwait:
9874     case OMPD_taskgroup:
9875     case OMPD_atomic:
9876     case OMPD_flush:
9877     case OMPD_depobj:
9878     case OMPD_scan:
9879     case OMPD_teams:
9880     case OMPD_target_data:
9881     case OMPD_target_exit_data:
9882     case OMPD_target_enter_data:
9883     case OMPD_distribute:
9884     case OMPD_distribute_simd:
9885     case OMPD_distribute_parallel_for:
9886     case OMPD_distribute_parallel_for_simd:
9887     case OMPD_teams_distribute:
9888     case OMPD_teams_distribute_simd:
9889     case OMPD_teams_distribute_parallel_for:
9890     case OMPD_teams_distribute_parallel_for_simd:
9891     case OMPD_target_update:
9892     case OMPD_declare_simd:
9893     case OMPD_declare_variant:
9894     case OMPD_begin_declare_variant:
9895     case OMPD_end_declare_variant:
9896     case OMPD_declare_target:
9897     case OMPD_end_declare_target:
9898     case OMPD_declare_reduction:
9899     case OMPD_declare_mapper:
9900     case OMPD_taskloop:
9901     case OMPD_taskloop_simd:
9902     case OMPD_master_taskloop:
9903     case OMPD_master_taskloop_simd:
9904     case OMPD_parallel_master_taskloop:
9905     case OMPD_parallel_master_taskloop_simd:
9906     case OMPD_requires:
9907     case OMPD_metadirective:
9908     case OMPD_unknown:
9909     default:
9910       llvm_unreachable("Unexpected directive.");
9911     }
9912   }
9913 
9914   return nullptr;
9915 }
9916 
9917 /// Emit the user-defined mapper function. The code generation follows the
9918 /// pattern in the example below.
9919 /// \code
9920 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9921 ///                                           void *base, void *begin,
9922 ///                                           int64_t size, int64_t type,
9923 ///                                           void *name = nullptr) {
9924 ///   // Allocate space for an array section first or add a base/begin for
9925 ///   // pointer dereference.
9926 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9927 ///       !maptype.IsDelete)
9928 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9929 ///                                 size*sizeof(Ty), clearToFromMember(type));
9930 ///   // Map members.
9931 ///   for (unsigned i = 0; i < size; i++) {
9932 ///     // For each component specified by this mapper:
9933 ///     for (auto c : begin[i]->all_components) {
9934 ///       if (c.hasMapper())
9935 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9936 ///                       c.arg_type, c.arg_name);
9937 ///       else
9938 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9939 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9940 ///                                     c.arg_name);
9941 ///     }
9942 ///   }
9943 ///   // Delete the array section.
9944 ///   if (size > 1 && maptype.IsDelete)
9945 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9946 ///                                 size*sizeof(Ty), clearToFromMember(type));
9947 /// }
9948 /// \endcode
9949 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9950                                             CodeGenFunction *CGF) {
9951   if (UDMMap.count(D) > 0)
9952     return;
9953   ASTContext &C = CGM.getContext();
9954   QualType Ty = D->getType();
9955   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9956   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9957   auto *MapperVarDecl =
9958       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9959   SourceLocation Loc = D->getLocation();
9960   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9961 
9962   // Prepare mapper function arguments and attributes.
9963   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9964                               C.VoidPtrTy, ImplicitParamDecl::Other);
9965   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9966                             ImplicitParamDecl::Other);
9967   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9968                              C.VoidPtrTy, ImplicitParamDecl::Other);
9969   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9970                             ImplicitParamDecl::Other);
9971   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9972                             ImplicitParamDecl::Other);
9973   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9974                             ImplicitParamDecl::Other);
9975   FunctionArgList Args;
9976   Args.push_back(&HandleArg);
9977   Args.push_back(&BaseArg);
9978   Args.push_back(&BeginArg);
9979   Args.push_back(&SizeArg);
9980   Args.push_back(&TypeArg);
9981   Args.push_back(&NameArg);
9982   const CGFunctionInfo &FnInfo =
9983       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9984   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9985   SmallString<64> TyStr;
9986   llvm::raw_svector_ostream Out(TyStr);
9987   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9988   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9989   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9990                                     Name, &CGM.getModule());
9991   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9992   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9993   // Start the mapper function code generation.
9994   CodeGenFunction MapperCGF(CGM);
9995   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9996   // Compute the starting and end addresses of array elements.
9997   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9998       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9999       C.getPointerType(Int64Ty), Loc);
10000   // Prepare common arguments for array initiation and deletion.
10001   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10002       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10003       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10004   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10005       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10006       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10007   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10008       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10009       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10010   // Convert the size in bytes into the number of array elements.
10011   Size = MapperCGF.Builder.CreateExactUDiv(
10012       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10013   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10014       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10015   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10016       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10017   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10018       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10019       C.getPointerType(Int64Ty), Loc);
10020   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10021       MapperCGF.GetAddrOfLocalVar(&NameArg),
10022       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10023 
10024   // Emit array initiation if this is an array section and \p MapType indicates
10025   // that memory allocation is required.
10026   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10027   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10028                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10029 
10030   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10031 
10032   // Emit the loop header block.
10033   MapperCGF.EmitBlock(HeadBB);
10034   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10035   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10036   // Evaluate whether the initial condition is satisfied.
10037   llvm::Value *IsEmpty =
10038       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10039   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10040   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10041 
10042   // Emit the loop body block.
10043   MapperCGF.EmitBlock(BodyBB);
10044   llvm::BasicBlock *LastBB = BodyBB;
10045   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10046       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10047   PtrPHI->addIncoming(PtrBegin, EntryBB);
10048   Address PtrCurrent =
10049       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10050                           .getAlignment()
10051                           .alignmentOfArrayElement(ElementSize));
10052   // Privatize the declared variable of mapper to be the current array element.
10053   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10054   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10055   (void)Scope.Privatize();
10056 
10057   // Get map clause information. Fill up the arrays with all mapped variables.
10058   MappableExprsHandler::MapCombinedInfoTy Info;
10059   MappableExprsHandler MEHandler(*D, MapperCGF);
10060   MEHandler.generateAllInfoForMapper(Info);
10061 
10062   // Call the runtime API __tgt_mapper_num_components to get the number of
10063   // pre-existing components.
10064   llvm::Value *OffloadingArgs[] = {Handle};
10065   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10066       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10067                                             OMPRTL___tgt_mapper_num_components),
10068       OffloadingArgs);
10069   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10070       PreviousSize,
10071       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10072 
10073   // Fill up the runtime mapper handle for all components.
10074   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10075     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10076         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10077     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10078         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10079     llvm::Value *CurSizeArg = Info.Sizes[I];
10080     llvm::Value *CurNameArg =
10081         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10082             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10083             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10084 
10085     // Extract the MEMBER_OF field from the map type.
10086     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10087     llvm::Value *MemberMapType =
10088         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10089 
10090     // Combine the map type inherited from user-defined mapper with that
10091     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10092     // bits of the \a MapType, which is the input argument of the mapper
10093     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10094     // bits of MemberMapType.
10095     // [OpenMP 5.0], 1.2.6. map-type decay.
10096     //        | alloc |  to   | from  | tofrom | release | delete
10097     // ----------------------------------------------------------
10098     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10099     // to     | alloc |  to   | alloc |   to   | release | delete
10100     // from   | alloc | alloc | from  |  from  | release | delete
10101     // tofrom | alloc |  to   | from  | tofrom | release | delete
10102     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10103         MapType,
10104         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10105                                    MappableExprsHandler::OMP_MAP_FROM));
10106     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10107     llvm::BasicBlock *AllocElseBB =
10108         MapperCGF.createBasicBlock("omp.type.alloc.else");
10109     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10110     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10111     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10112     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10113     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10114     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10115     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10116     MapperCGF.EmitBlock(AllocBB);
10117     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10118         MemberMapType,
10119         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10120                                      MappableExprsHandler::OMP_MAP_FROM)));
10121     MapperCGF.Builder.CreateBr(EndBB);
10122     MapperCGF.EmitBlock(AllocElseBB);
10123     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10124         LeftToFrom,
10125         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10126     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10127     // In case of to, clear OMP_MAP_FROM.
10128     MapperCGF.EmitBlock(ToBB);
10129     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10130         MemberMapType,
10131         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10132     MapperCGF.Builder.CreateBr(EndBB);
10133     MapperCGF.EmitBlock(ToElseBB);
10134     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10135         LeftToFrom,
10136         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10137     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10138     // In case of from, clear OMP_MAP_TO.
10139     MapperCGF.EmitBlock(FromBB);
10140     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10141         MemberMapType,
10142         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10143     // In case of tofrom, do nothing.
10144     MapperCGF.EmitBlock(EndBB);
10145     LastBB = EndBB;
10146     llvm::PHINode *CurMapType =
10147         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10148     CurMapType->addIncoming(AllocMapType, AllocBB);
10149     CurMapType->addIncoming(ToMapType, ToBB);
10150     CurMapType->addIncoming(FromMapType, FromBB);
10151     CurMapType->addIncoming(MemberMapType, ToElseBB);
10152 
10153     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10154                                      CurSizeArg, CurMapType, CurNameArg};
10155     if (Info.Mappers[I]) {
10156       // Call the corresponding mapper function.
10157       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10158           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10159       assert(MapperFunc && "Expect a valid mapper function is available.");
10160       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10161     } else {
10162       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10163       // data structure.
10164       MapperCGF.EmitRuntimeCall(
10165           OMPBuilder.getOrCreateRuntimeFunction(
10166               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10167           OffloadingArgs);
10168     }
10169   }
10170 
10171   // Update the pointer to point to the next element that needs to be mapped,
10172   // and check whether we have mapped all elements.
10173   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10174   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10175       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10176   PtrPHI->addIncoming(PtrNext, LastBB);
10177   llvm::Value *IsDone =
10178       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10179   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10180   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10181 
10182   MapperCGF.EmitBlock(ExitBB);
10183   // Emit array deletion if this is an array section and \p MapType indicates
10184   // that deletion is required.
10185   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10186                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10187 
10188   // Emit the function exit block.
10189   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10190   MapperCGF.FinishFunction();
10191   UDMMap.try_emplace(D, Fn);
10192   if (CGF) {
10193     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10194     Decls.second.push_back(D);
10195   }
10196 }
10197 
10198 /// Emit the array initialization or deletion portion for user-defined mapper
10199 /// code generation. First, it evaluates whether an array section is mapped and
10200 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10201 /// true, and \a MapType indicates to not delete this array, array
10202 /// initialization code is generated. If \a IsInit is false, and \a MapType
10203 /// indicates to not this array, array deletion code is generated.
10204 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10205     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10206     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10207     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10208     bool IsInit) {
10209   StringRef Prefix = IsInit ? ".init" : ".del";
10210 
10211   // Evaluate if this is an array section.
10212   llvm::BasicBlock *BodyBB =
10213       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10214   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10215       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10216   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10217       MapType,
10218       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10219   llvm::Value *DeleteCond;
10220   llvm::Value *Cond;
10221   if (IsInit) {
10222     // base != begin?
10223     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10224         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10225     // IsPtrAndObj?
10226     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10227         MapType,
10228         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10229     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10230     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10231     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10232     DeleteCond = MapperCGF.Builder.CreateIsNull(
10233         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10234   } else {
10235     Cond = IsArray;
10236     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10237         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10238   }
10239   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10240   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10241 
10242   MapperCGF.EmitBlock(BodyBB);
10243   // Get the array size by multiplying element size and element number (i.e., \p
10244   // Size).
10245   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10246       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10247   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10248   // memory allocation/deletion purpose only.
10249   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10250       MapType,
10251       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10252                                    MappableExprsHandler::OMP_MAP_FROM)));
10253   MapTypeArg = MapperCGF.Builder.CreateOr(
10254       MapTypeArg,
10255       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10256 
10257   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10258   // data structure.
10259   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10260                                    ArraySize, MapTypeArg, MapName};
10261   MapperCGF.EmitRuntimeCall(
10262       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10263                                             OMPRTL___tgt_push_mapper_component),
10264       OffloadingArgs);
10265 }
10266 
10267 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10268     const OMPDeclareMapperDecl *D) {
10269   auto I = UDMMap.find(D);
10270   if (I != UDMMap.end())
10271     return I->second;
10272   emitUserDefinedMapper(D);
10273   return UDMMap.lookup(D);
10274 }
10275 
10276 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10277     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10278     llvm::Value *DeviceID,
10279     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10280                                      const OMPLoopDirective &D)>
10281         SizeEmitter) {
10282   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10283   const OMPExecutableDirective *TD = &D;
10284   // Get nested teams distribute kind directive, if any.
10285   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10286     TD = getNestedDistributeDirective(CGM.getContext(), D);
10287   if (!TD)
10288     return;
10289   const auto *LD = cast<OMPLoopDirective>(TD);
10290   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10291                                                          PrePostActionTy &) {
10292     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10293       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10294       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10295       CGF.EmitRuntimeCall(
10296           OMPBuilder.getOrCreateRuntimeFunction(
10297               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10298           Args);
10299     }
10300   };
10301   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10302 }
10303 
10304 void CGOpenMPRuntime::emitTargetCall(
10305     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10306     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10307     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10308     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10309                                      const OMPLoopDirective &D)>
10310         SizeEmitter) {
10311   if (!CGF.HaveInsertPoint())
10312     return;
10313 
10314   assert(OutlinedFn && "Invalid outlined function!");
10315 
10316   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10317                                  D.hasClausesOfKind<OMPNowaitClause>();
10318   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10319   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10320   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10321                                             PrePostActionTy &) {
10322     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10323   };
10324   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10325 
10326   CodeGenFunction::OMPTargetDataInfo InputInfo;
10327   llvm::Value *MapTypesArray = nullptr;
10328   llvm::Value *MapNamesArray = nullptr;
10329   // Fill up the pointer arrays and transfer execution to the device.
10330   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10331                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10332                     &CapturedVars,
10333                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10334     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10335       // Reverse offloading is not supported, so just execute on the host.
10336       if (RequiresOuterTask) {
10337         CapturedVars.clear();
10338         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10339       }
10340       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10341       return;
10342     }
10343 
10344     // On top of the arrays that were filled up, the target offloading call
10345     // takes as arguments the device id as well as the host pointer. The host
10346     // pointer is used by the runtime library to identify the current target
10347     // region, so it only has to be unique and not necessarily point to
10348     // anything. It could be the pointer to the outlined function that
10349     // implements the target region, but we aren't using that so that the
10350     // compiler doesn't need to keep that, and could therefore inline the host
10351     // function if proven worthwhile during optimization.
10352 
10353     // From this point on, we need to have an ID of the target region defined.
10354     assert(OutlinedFnID && "Invalid outlined function ID!");
10355 
10356     // Emit device ID if any.
10357     llvm::Value *DeviceID;
10358     if (Device.getPointer()) {
10359       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10360               Device.getInt() == OMPC_DEVICE_device_num) &&
10361              "Expected device_num modifier.");
10362       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10363       DeviceID =
10364           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10365     } else {
10366       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10367     }
10368 
10369     // Emit the number of elements in the offloading arrays.
10370     llvm::Value *PointerNum =
10371         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10372 
10373     // Return value of the runtime offloading call.
10374     llvm::Value *Return;
10375 
10376     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10377     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10378 
10379     // Source location for the ident struct
10380     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10381 
10382     // Emit tripcount for the target loop-based directive.
10383     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10384 
10385     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10386     // The target region is an outlined function launched by the runtime
10387     // via calls __tgt_target() or __tgt_target_teams().
10388     //
10389     // __tgt_target() launches a target region with one team and one thread,
10390     // executing a serial region.  This master thread may in turn launch
10391     // more threads within its team upon encountering a parallel region,
10392     // however, no additional teams can be launched on the device.
10393     //
10394     // __tgt_target_teams() launches a target region with one or more teams,
10395     // each with one or more threads.  This call is required for target
10396     // constructs such as:
10397     //  'target teams'
10398     //  'target' / 'teams'
10399     //  'target teams distribute parallel for'
10400     //  'target parallel'
10401     // and so on.
10402     //
10403     // Note that on the host and CPU targets, the runtime implementation of
10404     // these calls simply call the outlined function without forking threads.
10405     // The outlined functions themselves have runtime calls to
10406     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10407     // the compiler in emitTeamsCall() and emitParallelCall().
10408     //
10409     // In contrast, on the NVPTX target, the implementation of
10410     // __tgt_target_teams() launches a GPU kernel with the requested number
10411     // of teams and threads so no additional calls to the runtime are required.
10412     if (NumTeams) {
10413       // If we have NumTeams defined this means that we have an enclosed teams
10414       // region. Therefore we also expect to have NumThreads defined. These two
10415       // values should be defined in the presence of a teams directive,
10416       // regardless of having any clauses associated. If the user is using teams
10417       // but no clauses, these two values will be the default that should be
10418       // passed to the runtime library - a 32-bit integer with the value zero.
10419       assert(NumThreads && "Thread limit expression should be available along "
10420                            "with number of teams.");
10421       SmallVector<llvm::Value *> OffloadingArgs = {
10422           RTLoc,
10423           DeviceID,
10424           OutlinedFnID,
10425           PointerNum,
10426           InputInfo.BasePointersArray.getPointer(),
10427           InputInfo.PointersArray.getPointer(),
10428           InputInfo.SizesArray.getPointer(),
10429           MapTypesArray,
10430           MapNamesArray,
10431           InputInfo.MappersArray.getPointer(),
10432           NumTeams,
10433           NumThreads};
10434       if (HasNowait) {
10435         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10436         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10437         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10438         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10439         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10440         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10441       }
10442       Return = CGF.EmitRuntimeCall(
10443           OMPBuilder.getOrCreateRuntimeFunction(
10444               CGM.getModule(), HasNowait
10445                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10446                                    : OMPRTL___tgt_target_teams_mapper),
10447           OffloadingArgs);
10448     } else {
10449       SmallVector<llvm::Value *> OffloadingArgs = {
10450           RTLoc,
10451           DeviceID,
10452           OutlinedFnID,
10453           PointerNum,
10454           InputInfo.BasePointersArray.getPointer(),
10455           InputInfo.PointersArray.getPointer(),
10456           InputInfo.SizesArray.getPointer(),
10457           MapTypesArray,
10458           MapNamesArray,
10459           InputInfo.MappersArray.getPointer()};
10460       if (HasNowait) {
10461         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10462         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10463         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10464         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10465         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10466         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10467       }
10468       Return = CGF.EmitRuntimeCall(
10469           OMPBuilder.getOrCreateRuntimeFunction(
10470               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10471                                          : OMPRTL___tgt_target_mapper),
10472           OffloadingArgs);
10473     }
10474 
10475     // Check the error code and execute the host version if required.
10476     llvm::BasicBlock *OffloadFailedBlock =
10477         CGF.createBasicBlock("omp_offload.failed");
10478     llvm::BasicBlock *OffloadContBlock =
10479         CGF.createBasicBlock("omp_offload.cont");
10480     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10481     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10482 
10483     CGF.EmitBlock(OffloadFailedBlock);
10484     if (RequiresOuterTask) {
10485       CapturedVars.clear();
10486       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10487     }
10488     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10489     CGF.EmitBranch(OffloadContBlock);
10490 
10491     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10492   };
10493 
10494   // Notify that the host version must be executed.
10495   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10496                     RequiresOuterTask](CodeGenFunction &CGF,
10497                                        PrePostActionTy &) {
10498     if (RequiresOuterTask) {
10499       CapturedVars.clear();
10500       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10501     }
10502     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10503   };
10504 
10505   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10506                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10507                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10508     // Fill up the arrays with all the captured variables.
10509     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10510 
10511     // Get mappable expression information.
10512     MappableExprsHandler MEHandler(D, CGF);
10513     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10514     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10515 
10516     auto RI = CS.getCapturedRecordDecl()->field_begin();
10517     auto *CV = CapturedVars.begin();
10518     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10519                                               CE = CS.capture_end();
10520          CI != CE; ++CI, ++RI, ++CV) {
10521       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10522       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10523 
10524       // VLA sizes are passed to the outlined region by copy and do not have map
10525       // information associated.
10526       if (CI->capturesVariableArrayType()) {
10527         CurInfo.Exprs.push_back(nullptr);
10528         CurInfo.BasePointers.push_back(*CV);
10529         CurInfo.Pointers.push_back(*CV);
10530         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10531             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10532         // Copy to the device as an argument. No need to retrieve it.
10533         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10534                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10535                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10536         CurInfo.Mappers.push_back(nullptr);
10537       } else {
10538         // If we have any information in the map clause, we use it, otherwise we
10539         // just do a default mapping.
10540         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10541         if (!CI->capturesThis())
10542           MappedVarSet.insert(CI->getCapturedVar());
10543         else
10544           MappedVarSet.insert(nullptr);
10545         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10546           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10547         // Generate correct mapping for variables captured by reference in
10548         // lambdas.
10549         if (CI->capturesVariable())
10550           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10551                                                   CurInfo, LambdaPointers);
10552       }
10553       // We expect to have at least an element of information for this capture.
10554       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10555              "Non-existing map pointer for capture!");
10556       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10557              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10558              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10559              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10560              "Inconsistent map information sizes!");
10561 
10562       // If there is an entry in PartialStruct it means we have a struct with
10563       // individual members mapped. Emit an extra combined entry.
10564       if (PartialStruct.Base.isValid()) {
10565         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10566         MEHandler.emitCombinedEntry(
10567             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10568             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10569       }
10570 
10571       // We need to append the results of this capture to what we already have.
10572       CombinedInfo.append(CurInfo);
10573     }
10574     // Adjust MEMBER_OF flags for the lambdas captures.
10575     MEHandler.adjustMemberOfForLambdaCaptures(
10576         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10577         CombinedInfo.Types);
10578     // Map any list items in a map clause that were not captures because they
10579     // weren't referenced within the construct.
10580     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10581 
10582     TargetDataInfo Info;
10583     // Fill up the arrays and create the arguments.
10584     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10585     emitOffloadingArraysArgument(
10586         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10587         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10588         {/*ForEndTask=*/false});
10589 
10590     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10591     InputInfo.BasePointersArray =
10592         Address(Info.BasePointersArray, CGM.getPointerAlign());
10593     InputInfo.PointersArray =
10594         Address(Info.PointersArray, CGM.getPointerAlign());
10595     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10596     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10597     MapTypesArray = Info.MapTypesArray;
10598     MapNamesArray = Info.MapNamesArray;
10599     if (RequiresOuterTask)
10600       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10601     else
10602       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10603   };
10604 
10605   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10606                              CodeGenFunction &CGF, PrePostActionTy &) {
10607     if (RequiresOuterTask) {
10608       CodeGenFunction::OMPTargetDataInfo InputInfo;
10609       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10610     } else {
10611       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10612     }
10613   };
10614 
10615   // If we have a target function ID it means that we need to support
10616   // offloading, otherwise, just execute on the host. We need to execute on host
10617   // regardless of the conditional in the if clause if, e.g., the user do not
10618   // specify target triples.
10619   if (OutlinedFnID) {
10620     if (IfCond) {
10621       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10622     } else {
10623       RegionCodeGenTy ThenRCG(TargetThenGen);
10624       ThenRCG(CGF);
10625     }
10626   } else {
10627     RegionCodeGenTy ElseRCG(TargetElseGen);
10628     ElseRCG(CGF);
10629   }
10630 }
10631 
10632 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10633                                                     StringRef ParentName) {
10634   if (!S)
10635     return;
10636 
10637   // Codegen OMP target directives that offload compute to the device.
10638   bool RequiresDeviceCodegen =
10639       isa<OMPExecutableDirective>(S) &&
10640       isOpenMPTargetExecutionDirective(
10641           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10642 
10643   if (RequiresDeviceCodegen) {
10644     const auto &E = *cast<OMPExecutableDirective>(S);
10645     unsigned DeviceID;
10646     unsigned FileID;
10647     unsigned Line;
10648     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10649                              FileID, Line);
10650 
10651     // Is this a target region that should not be emitted as an entry point? If
10652     // so just signal we are done with this target region.
10653     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10654                                                             ParentName, Line))
10655       return;
10656 
10657     switch (E.getDirectiveKind()) {
10658     case OMPD_target:
10659       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10660                                                    cast<OMPTargetDirective>(E));
10661       break;
10662     case OMPD_target_parallel:
10663       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10664           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10665       break;
10666     case OMPD_target_teams:
10667       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10668           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10669       break;
10670     case OMPD_target_teams_distribute:
10671       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10672           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10673       break;
10674     case OMPD_target_teams_distribute_simd:
10675       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10676           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10677       break;
10678     case OMPD_target_parallel_for:
10679       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10680           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10681       break;
10682     case OMPD_target_parallel_for_simd:
10683       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10684           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10685       break;
10686     case OMPD_target_simd:
10687       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10688           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10689       break;
10690     case OMPD_target_teams_distribute_parallel_for:
10691       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10692           CGM, ParentName,
10693           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10694       break;
10695     case OMPD_target_teams_distribute_parallel_for_simd:
10696       CodeGenFunction::
10697           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10698               CGM, ParentName,
10699               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10700       break;
10701     case OMPD_parallel:
10702     case OMPD_for:
10703     case OMPD_parallel_for:
10704     case OMPD_parallel_master:
10705     case OMPD_parallel_sections:
10706     case OMPD_for_simd:
10707     case OMPD_parallel_for_simd:
10708     case OMPD_cancel:
10709     case OMPD_cancellation_point:
10710     case OMPD_ordered:
10711     case OMPD_threadprivate:
10712     case OMPD_allocate:
10713     case OMPD_task:
10714     case OMPD_simd:
10715     case OMPD_tile:
10716     case OMPD_unroll:
10717     case OMPD_sections:
10718     case OMPD_section:
10719     case OMPD_single:
10720     case OMPD_master:
10721     case OMPD_critical:
10722     case OMPD_taskyield:
10723     case OMPD_barrier:
10724     case OMPD_taskwait:
10725     case OMPD_taskgroup:
10726     case OMPD_atomic:
10727     case OMPD_flush:
10728     case OMPD_depobj:
10729     case OMPD_scan:
10730     case OMPD_teams:
10731     case OMPD_target_data:
10732     case OMPD_target_exit_data:
10733     case OMPD_target_enter_data:
10734     case OMPD_distribute:
10735     case OMPD_distribute_simd:
10736     case OMPD_distribute_parallel_for:
10737     case OMPD_distribute_parallel_for_simd:
10738     case OMPD_teams_distribute:
10739     case OMPD_teams_distribute_simd:
10740     case OMPD_teams_distribute_parallel_for:
10741     case OMPD_teams_distribute_parallel_for_simd:
10742     case OMPD_target_update:
10743     case OMPD_declare_simd:
10744     case OMPD_declare_variant:
10745     case OMPD_begin_declare_variant:
10746     case OMPD_end_declare_variant:
10747     case OMPD_declare_target:
10748     case OMPD_end_declare_target:
10749     case OMPD_declare_reduction:
10750     case OMPD_declare_mapper:
10751     case OMPD_taskloop:
10752     case OMPD_taskloop_simd:
10753     case OMPD_master_taskloop:
10754     case OMPD_master_taskloop_simd:
10755     case OMPD_parallel_master_taskloop:
10756     case OMPD_parallel_master_taskloop_simd:
10757     case OMPD_requires:
10758     case OMPD_metadirective:
10759     case OMPD_unknown:
10760     default:
10761       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10762     }
10763     return;
10764   }
10765 
10766   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10767     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10768       return;
10769 
10770     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10771     return;
10772   }
10773 
10774   // If this is a lambda function, look into its body.
10775   if (const auto *L = dyn_cast<LambdaExpr>(S))
10776     S = L->getBody();
10777 
10778   // Keep looking for target regions recursively.
10779   for (const Stmt *II : S->children())
10780     scanForTargetRegionsFunctions(II, ParentName);
10781 }
10782 
10783 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10784   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10785       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10786   if (!DevTy)
10787     return false;
10788   // Do not emit device_type(nohost) functions for the host.
10789   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10790     return true;
10791   // Do not emit device_type(host) functions for the device.
10792   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10793     return true;
10794   return false;
10795 }
10796 
10797 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10798   // If emitting code for the host, we do not process FD here. Instead we do
10799   // the normal code generation.
10800   if (!CGM.getLangOpts().OpenMPIsDevice) {
10801     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10802       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10803                                   CGM.getLangOpts().OpenMPIsDevice))
10804         return true;
10805     return false;
10806   }
10807 
10808   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10809   // Try to detect target regions in the function.
10810   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10811     StringRef Name = CGM.getMangledName(GD);
10812     scanForTargetRegionsFunctions(FD->getBody(), Name);
10813     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10814                                 CGM.getLangOpts().OpenMPIsDevice))
10815       return true;
10816   }
10817 
10818   // Do not to emit function if it is not marked as declare target.
10819   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10820          AlreadyEmittedTargetDecls.count(VD) == 0;
10821 }
10822 
10823 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10824   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10825                               CGM.getLangOpts().OpenMPIsDevice))
10826     return true;
10827 
10828   if (!CGM.getLangOpts().OpenMPIsDevice)
10829     return false;
10830 
10831   // Check if there are Ctors/Dtors in this declaration and look for target
10832   // regions in it. We use the complete variant to produce the kernel name
10833   // mangling.
10834   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10835   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10836     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10837       StringRef ParentName =
10838           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10839       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10840     }
10841     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10842       StringRef ParentName =
10843           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10844       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10845     }
10846   }
10847 
10848   // Do not to emit variable if it is not marked as declare target.
10849   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10850       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10851           cast<VarDecl>(GD.getDecl()));
10852   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10853       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10854        HasRequiresUnifiedSharedMemory)) {
10855     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10856     return true;
10857   }
10858   return false;
10859 }
10860 
10861 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10862                                                    llvm::Constant *Addr) {
10863   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10864       !CGM.getLangOpts().OpenMPIsDevice)
10865     return;
10866 
10867   // If we have host/nohost variables, they do not need to be registered.
10868   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10869       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10870   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10871     return;
10872 
10873   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10874       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10875   if (!Res) {
10876     if (CGM.getLangOpts().OpenMPIsDevice) {
10877       // Register non-target variables being emitted in device code (debug info
10878       // may cause this).
10879       StringRef VarName = CGM.getMangledName(VD);
10880       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10881     }
10882     return;
10883   }
10884   // Register declare target variables.
10885   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10886   StringRef VarName;
10887   CharUnits VarSize;
10888   llvm::GlobalValue::LinkageTypes Linkage;
10889 
10890   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10891       !HasRequiresUnifiedSharedMemory) {
10892     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10893     VarName = CGM.getMangledName(VD);
10894     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10895       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10896       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10897     } else {
10898       VarSize = CharUnits::Zero();
10899     }
10900     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10901     // Temp solution to prevent optimizations of the internal variables.
10902     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10903       // Do not create a "ref-variable" if the original is not also available
10904       // on the host.
10905       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10906         return;
10907       std::string RefName = getName({VarName, "ref"});
10908       if (!CGM.GetGlobalValue(RefName)) {
10909         llvm::Constant *AddrRef =
10910             getOrCreateInternalVariable(Addr->getType(), RefName);
10911         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10912         GVAddrRef->setConstant(/*Val=*/true);
10913         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10914         GVAddrRef->setInitializer(Addr);
10915         CGM.addCompilerUsedGlobal(GVAddrRef);
10916       }
10917     }
10918   } else {
10919     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10920             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10921              HasRequiresUnifiedSharedMemory)) &&
10922            "Declare target attribute must link or to with unified memory.");
10923     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10924       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10925     else
10926       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10927 
10928     if (CGM.getLangOpts().OpenMPIsDevice) {
10929       VarName = Addr->getName();
10930       Addr = nullptr;
10931     } else {
10932       VarName = getAddrOfDeclareTargetVar(VD).getName();
10933       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10934     }
10935     VarSize = CGM.getPointerSize();
10936     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10937   }
10938 
10939   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10940       VarName, Addr, VarSize, Flags, Linkage);
10941 }
10942 
10943 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10944   if (isa<FunctionDecl>(GD.getDecl()) ||
10945       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10946     return emitTargetFunctions(GD);
10947 
10948   return emitTargetGlobalVariable(GD);
10949 }
10950 
10951 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10952   for (const VarDecl *VD : DeferredGlobalVariables) {
10953     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10954         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10955     if (!Res)
10956       continue;
10957     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10958         !HasRequiresUnifiedSharedMemory) {
10959       CGM.EmitGlobal(VD);
10960     } else {
10961       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10962               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10963                HasRequiresUnifiedSharedMemory)) &&
10964              "Expected link clause or to clause with unified memory.");
10965       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10966     }
10967   }
10968 }
10969 
10970 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10971     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10972   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10973          " Expected target-based directive.");
10974 }
10975 
10976 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10977   for (const OMPClause *Clause : D->clauselists()) {
10978     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10979       HasRequiresUnifiedSharedMemory = true;
10980     } else if (const auto *AC =
10981                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10982       switch (AC->getAtomicDefaultMemOrderKind()) {
10983       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10984         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10985         break;
10986       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10987         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10988         break;
10989       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10990         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10991         break;
10992       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10993         break;
10994       }
10995     }
10996   }
10997 }
10998 
10999 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11000   return RequiresAtomicOrdering;
11001 }
11002 
11003 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11004                                                        LangAS &AS) {
11005   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11006     return false;
11007   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11008   switch(A->getAllocatorType()) {
11009   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11010   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11011   // Not supported, fallback to the default mem space.
11012   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11013   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11014   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11015   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11016   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11017   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11018   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11019     AS = LangAS::Default;
11020     return true;
11021   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11022     llvm_unreachable("Expected predefined allocator for the variables with the "
11023                      "static storage.");
11024   }
11025   return false;
11026 }
11027 
11028 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11029   return HasRequiresUnifiedSharedMemory;
11030 }
11031 
11032 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11033     CodeGenModule &CGM)
11034     : CGM(CGM) {
11035   if (CGM.getLangOpts().OpenMPIsDevice) {
11036     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11037     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11038   }
11039 }
11040 
11041 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11042   if (CGM.getLangOpts().OpenMPIsDevice)
11043     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11044 }
11045 
11046 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11047   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11048     return true;
11049 
11050   const auto *D = cast<FunctionDecl>(GD.getDecl());
11051   // Do not to emit function if it is marked as declare target as it was already
11052   // emitted.
11053   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11054     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11055       if (auto *F = dyn_cast_or_null<llvm::Function>(
11056               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11057         return !F->isDeclaration();
11058       return false;
11059     }
11060     return true;
11061   }
11062 
11063   return !AlreadyEmittedTargetDecls.insert(D).second;
11064 }
11065 
11066 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11067   // If we don't have entries or if we are emitting code for the device, we
11068   // don't need to do anything.
11069   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11070       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11071       (OffloadEntriesInfoManager.empty() &&
11072        !HasEmittedDeclareTargetRegion &&
11073        !HasEmittedTargetRegion))
11074     return nullptr;
11075 
11076   // Create and register the function that handles the requires directives.
11077   ASTContext &C = CGM.getContext();
11078 
11079   llvm::Function *RequiresRegFn;
11080   {
11081     CodeGenFunction CGF(CGM);
11082     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11083     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11084     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11085     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11086     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11087     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11088     // TODO: check for other requires clauses.
11089     // The requires directive takes effect only when a target region is
11090     // present in the compilation unit. Otherwise it is ignored and not
11091     // passed to the runtime. This avoids the runtime from throwing an error
11092     // for mismatching requires clauses across compilation units that don't
11093     // contain at least 1 target region.
11094     assert((HasEmittedTargetRegion ||
11095             HasEmittedDeclareTargetRegion ||
11096             !OffloadEntriesInfoManager.empty()) &&
11097            "Target or declare target region expected.");
11098     if (HasRequiresUnifiedSharedMemory)
11099       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11100     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11101                             CGM.getModule(), OMPRTL___tgt_register_requires),
11102                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11103     CGF.FinishFunction();
11104   }
11105   return RequiresRegFn;
11106 }
11107 
11108 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11109                                     const OMPExecutableDirective &D,
11110                                     SourceLocation Loc,
11111                                     llvm::Function *OutlinedFn,
11112                                     ArrayRef<llvm::Value *> CapturedVars) {
11113   if (!CGF.HaveInsertPoint())
11114     return;
11115 
11116   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11117   CodeGenFunction::RunCleanupsScope Scope(CGF);
11118 
11119   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11120   llvm::Value *Args[] = {
11121       RTLoc,
11122       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11123       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11124   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11125   RealArgs.append(std::begin(Args), std::end(Args));
11126   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11127 
11128   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11129       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11130   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11131 }
11132 
11133 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11134                                          const Expr *NumTeams,
11135                                          const Expr *ThreadLimit,
11136                                          SourceLocation Loc) {
11137   if (!CGF.HaveInsertPoint())
11138     return;
11139 
11140   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11141 
11142   llvm::Value *NumTeamsVal =
11143       NumTeams
11144           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11145                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11146           : CGF.Builder.getInt32(0);
11147 
11148   llvm::Value *ThreadLimitVal =
11149       ThreadLimit
11150           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11151                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11152           : CGF.Builder.getInt32(0);
11153 
11154   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11155   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11156                                      ThreadLimitVal};
11157   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11158                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11159                       PushNumTeamsArgs);
11160 }
11161 
11162 void CGOpenMPRuntime::emitTargetDataCalls(
11163     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11164     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11165   if (!CGF.HaveInsertPoint())
11166     return;
11167 
11168   // Action used to replace the default codegen action and turn privatization
11169   // off.
11170   PrePostActionTy NoPrivAction;
11171 
11172   // Generate the code for the opening of the data environment. Capture all the
11173   // arguments of the runtime call by reference because they are used in the
11174   // closing of the region.
11175   auto &&BeginThenGen = [this, &D, Device, &Info,
11176                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11177     // Fill up the arrays with all the mapped variables.
11178     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11179 
11180     // Get map clause information.
11181     MappableExprsHandler MEHandler(D, CGF);
11182     MEHandler.generateAllInfo(CombinedInfo);
11183 
11184     // Fill up the arrays and create the arguments.
11185     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11186                          /*IsNonContiguous=*/true);
11187 
11188     llvm::Value *BasePointersArrayArg = nullptr;
11189     llvm::Value *PointersArrayArg = nullptr;
11190     llvm::Value *SizesArrayArg = nullptr;
11191     llvm::Value *MapTypesArrayArg = nullptr;
11192     llvm::Value *MapNamesArrayArg = nullptr;
11193     llvm::Value *MappersArrayArg = nullptr;
11194     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11195                                  SizesArrayArg, MapTypesArrayArg,
11196                                  MapNamesArrayArg, MappersArrayArg, Info);
11197 
11198     // Emit device ID if any.
11199     llvm::Value *DeviceID = nullptr;
11200     if (Device) {
11201       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11202                                            CGF.Int64Ty, /*isSigned=*/true);
11203     } else {
11204       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11205     }
11206 
11207     // Emit the number of elements in the offloading arrays.
11208     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11209     //
11210     // Source location for the ident struct
11211     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11212 
11213     llvm::Value *OffloadingArgs[] = {RTLoc,
11214                                      DeviceID,
11215                                      PointerNum,
11216                                      BasePointersArrayArg,
11217                                      PointersArrayArg,
11218                                      SizesArrayArg,
11219                                      MapTypesArrayArg,
11220                                      MapNamesArrayArg,
11221                                      MappersArrayArg};
11222     CGF.EmitRuntimeCall(
11223         OMPBuilder.getOrCreateRuntimeFunction(
11224             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11225         OffloadingArgs);
11226 
11227     // If device pointer privatization is required, emit the body of the region
11228     // here. It will have to be duplicated: with and without privatization.
11229     if (!Info.CaptureDeviceAddrMap.empty())
11230       CodeGen(CGF);
11231   };
11232 
11233   // Generate code for the closing of the data region.
11234   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11235                                                 PrePostActionTy &) {
11236     assert(Info.isValid() && "Invalid data environment closing arguments.");
11237 
11238     llvm::Value *BasePointersArrayArg = nullptr;
11239     llvm::Value *PointersArrayArg = nullptr;
11240     llvm::Value *SizesArrayArg = nullptr;
11241     llvm::Value *MapTypesArrayArg = nullptr;
11242     llvm::Value *MapNamesArrayArg = nullptr;
11243     llvm::Value *MappersArrayArg = nullptr;
11244     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11245                                  SizesArrayArg, MapTypesArrayArg,
11246                                  MapNamesArrayArg, MappersArrayArg, Info,
11247                                  {/*ForEndCall=*/true});
11248 
11249     // Emit device ID if any.
11250     llvm::Value *DeviceID = nullptr;
11251     if (Device) {
11252       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11253                                            CGF.Int64Ty, /*isSigned=*/true);
11254     } else {
11255       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11256     }
11257 
11258     // Emit the number of elements in the offloading arrays.
11259     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11260 
11261     // Source location for the ident struct
11262     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11263 
11264     llvm::Value *OffloadingArgs[] = {RTLoc,
11265                                      DeviceID,
11266                                      PointerNum,
11267                                      BasePointersArrayArg,
11268                                      PointersArrayArg,
11269                                      SizesArrayArg,
11270                                      MapTypesArrayArg,
11271                                      MapNamesArrayArg,
11272                                      MappersArrayArg};
11273     CGF.EmitRuntimeCall(
11274         OMPBuilder.getOrCreateRuntimeFunction(
11275             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11276         OffloadingArgs);
11277   };
11278 
11279   // If we need device pointer privatization, we need to emit the body of the
11280   // region with no privatization in the 'else' branch of the conditional.
11281   // Otherwise, we don't have to do anything.
11282   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11283                                                          PrePostActionTy &) {
11284     if (!Info.CaptureDeviceAddrMap.empty()) {
11285       CodeGen.setAction(NoPrivAction);
11286       CodeGen(CGF);
11287     }
11288   };
11289 
11290   // We don't have to do anything to close the region if the if clause evaluates
11291   // to false.
11292   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11293 
11294   if (IfCond) {
11295     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11296   } else {
11297     RegionCodeGenTy RCG(BeginThenGen);
11298     RCG(CGF);
11299   }
11300 
11301   // If we don't require privatization of device pointers, we emit the body in
11302   // between the runtime calls. This avoids duplicating the body code.
11303   if (Info.CaptureDeviceAddrMap.empty()) {
11304     CodeGen.setAction(NoPrivAction);
11305     CodeGen(CGF);
11306   }
11307 
11308   if (IfCond) {
11309     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11310   } else {
11311     RegionCodeGenTy RCG(EndThenGen);
11312     RCG(CGF);
11313   }
11314 }
11315 
11316 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11317     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11318     const Expr *Device) {
11319   if (!CGF.HaveInsertPoint())
11320     return;
11321 
11322   assert((isa<OMPTargetEnterDataDirective>(D) ||
11323           isa<OMPTargetExitDataDirective>(D) ||
11324           isa<OMPTargetUpdateDirective>(D)) &&
11325          "Expecting either target enter, exit data, or update directives.");
11326 
11327   CodeGenFunction::OMPTargetDataInfo InputInfo;
11328   llvm::Value *MapTypesArray = nullptr;
11329   llvm::Value *MapNamesArray = nullptr;
11330   // Generate the code for the opening of the data environment.
11331   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11332                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11333     // Emit device ID if any.
11334     llvm::Value *DeviceID = nullptr;
11335     if (Device) {
11336       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11337                                            CGF.Int64Ty, /*isSigned=*/true);
11338     } else {
11339       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11340     }
11341 
11342     // Emit the number of elements in the offloading arrays.
11343     llvm::Constant *PointerNum =
11344         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11345 
11346     // Source location for the ident struct
11347     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11348 
11349     llvm::Value *OffloadingArgs[] = {RTLoc,
11350                                      DeviceID,
11351                                      PointerNum,
11352                                      InputInfo.BasePointersArray.getPointer(),
11353                                      InputInfo.PointersArray.getPointer(),
11354                                      InputInfo.SizesArray.getPointer(),
11355                                      MapTypesArray,
11356                                      MapNamesArray,
11357                                      InputInfo.MappersArray.getPointer()};
11358 
11359     // Select the right runtime function call for each standalone
11360     // directive.
11361     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11362     RuntimeFunction RTLFn;
11363     switch (D.getDirectiveKind()) {
11364     case OMPD_target_enter_data:
11365       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11366                         : OMPRTL___tgt_target_data_begin_mapper;
11367       break;
11368     case OMPD_target_exit_data:
11369       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11370                         : OMPRTL___tgt_target_data_end_mapper;
11371       break;
11372     case OMPD_target_update:
11373       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11374                         : OMPRTL___tgt_target_data_update_mapper;
11375       break;
11376     case OMPD_parallel:
11377     case OMPD_for:
11378     case OMPD_parallel_for:
11379     case OMPD_parallel_master:
11380     case OMPD_parallel_sections:
11381     case OMPD_for_simd:
11382     case OMPD_parallel_for_simd:
11383     case OMPD_cancel:
11384     case OMPD_cancellation_point:
11385     case OMPD_ordered:
11386     case OMPD_threadprivate:
11387     case OMPD_allocate:
11388     case OMPD_task:
11389     case OMPD_simd:
11390     case OMPD_tile:
11391     case OMPD_unroll:
11392     case OMPD_sections:
11393     case OMPD_section:
11394     case OMPD_single:
11395     case OMPD_master:
11396     case OMPD_critical:
11397     case OMPD_taskyield:
11398     case OMPD_barrier:
11399     case OMPD_taskwait:
11400     case OMPD_taskgroup:
11401     case OMPD_atomic:
11402     case OMPD_flush:
11403     case OMPD_depobj:
11404     case OMPD_scan:
11405     case OMPD_teams:
11406     case OMPD_target_data:
11407     case OMPD_distribute:
11408     case OMPD_distribute_simd:
11409     case OMPD_distribute_parallel_for:
11410     case OMPD_distribute_parallel_for_simd:
11411     case OMPD_teams_distribute:
11412     case OMPD_teams_distribute_simd:
11413     case OMPD_teams_distribute_parallel_for:
11414     case OMPD_teams_distribute_parallel_for_simd:
11415     case OMPD_declare_simd:
11416     case OMPD_declare_variant:
11417     case OMPD_begin_declare_variant:
11418     case OMPD_end_declare_variant:
11419     case OMPD_declare_target:
11420     case OMPD_end_declare_target:
11421     case OMPD_declare_reduction:
11422     case OMPD_declare_mapper:
11423     case OMPD_taskloop:
11424     case OMPD_taskloop_simd:
11425     case OMPD_master_taskloop:
11426     case OMPD_master_taskloop_simd:
11427     case OMPD_parallel_master_taskloop:
11428     case OMPD_parallel_master_taskloop_simd:
11429     case OMPD_target:
11430     case OMPD_target_simd:
11431     case OMPD_target_teams_distribute:
11432     case OMPD_target_teams_distribute_simd:
11433     case OMPD_target_teams_distribute_parallel_for:
11434     case OMPD_target_teams_distribute_parallel_for_simd:
11435     case OMPD_target_teams:
11436     case OMPD_target_parallel:
11437     case OMPD_target_parallel_for:
11438     case OMPD_target_parallel_for_simd:
11439     case OMPD_requires:
11440     case OMPD_metadirective:
11441     case OMPD_unknown:
11442     default:
11443       llvm_unreachable("Unexpected standalone target data directive.");
11444       break;
11445     }
11446     CGF.EmitRuntimeCall(
11447         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11448         OffloadingArgs);
11449   };
11450 
11451   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11452                           &MapNamesArray](CodeGenFunction &CGF,
11453                                           PrePostActionTy &) {
11454     // Fill up the arrays with all the mapped variables.
11455     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11456 
11457     // Get map clause information.
11458     MappableExprsHandler MEHandler(D, CGF);
11459     MEHandler.generateAllInfo(CombinedInfo);
11460 
11461     TargetDataInfo Info;
11462     // Fill up the arrays and create the arguments.
11463     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11464                          /*IsNonContiguous=*/true);
11465     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11466                              D.hasClausesOfKind<OMPNowaitClause>();
11467     emitOffloadingArraysArgument(
11468         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11469         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11470         {/*ForEndTask=*/false});
11471     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11472     InputInfo.BasePointersArray =
11473         Address(Info.BasePointersArray, CGM.getPointerAlign());
11474     InputInfo.PointersArray =
11475         Address(Info.PointersArray, CGM.getPointerAlign());
11476     InputInfo.SizesArray =
11477         Address(Info.SizesArray, CGM.getPointerAlign());
11478     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11479     MapTypesArray = Info.MapTypesArray;
11480     MapNamesArray = Info.MapNamesArray;
11481     if (RequiresOuterTask)
11482       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11483     else
11484       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11485   };
11486 
11487   if (IfCond) {
11488     emitIfClause(CGF, IfCond, TargetThenGen,
11489                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11490   } else {
11491     RegionCodeGenTy ThenRCG(TargetThenGen);
11492     ThenRCG(CGF);
11493   }
11494 }
11495 
11496 namespace {
11497   /// Kind of parameter in a function with 'declare simd' directive.
11498   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11499   /// Attribute set of the parameter.
11500   struct ParamAttrTy {
11501     ParamKindTy Kind = Vector;
11502     llvm::APSInt StrideOrArg;
11503     llvm::APSInt Alignment;
11504   };
11505 } // namespace
11506 
11507 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11508                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11509   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11510   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11511   // of that clause. The VLEN value must be power of 2.
11512   // In other case the notion of the function`s "characteristic data type" (CDT)
11513   // is used to compute the vector length.
11514   // CDT is defined in the following order:
11515   //   a) For non-void function, the CDT is the return type.
11516   //   b) If the function has any non-uniform, non-linear parameters, then the
11517   //   CDT is the type of the first such parameter.
11518   //   c) If the CDT determined by a) or b) above is struct, union, or class
11519   //   type which is pass-by-value (except for the type that maps to the
11520   //   built-in complex data type), the characteristic data type is int.
11521   //   d) If none of the above three cases is applicable, the CDT is int.
11522   // The VLEN is then determined based on the CDT and the size of vector
11523   // register of that ISA for which current vector version is generated. The
11524   // VLEN is computed using the formula below:
11525   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11526   // where vector register size specified in section 3.2.1 Registers and the
11527   // Stack Frame of original AMD64 ABI document.
11528   QualType RetType = FD->getReturnType();
11529   if (RetType.isNull())
11530     return 0;
11531   ASTContext &C = FD->getASTContext();
11532   QualType CDT;
11533   if (!RetType.isNull() && !RetType->isVoidType()) {
11534     CDT = RetType;
11535   } else {
11536     unsigned Offset = 0;
11537     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11538       if (ParamAttrs[Offset].Kind == Vector)
11539         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11540       ++Offset;
11541     }
11542     if (CDT.isNull()) {
11543       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11544         if (ParamAttrs[I + Offset].Kind == Vector) {
11545           CDT = FD->getParamDecl(I)->getType();
11546           break;
11547         }
11548       }
11549     }
11550   }
11551   if (CDT.isNull())
11552     CDT = C.IntTy;
11553   CDT = CDT->getCanonicalTypeUnqualified();
11554   if (CDT->isRecordType() || CDT->isUnionType())
11555     CDT = C.IntTy;
11556   return C.getTypeSize(CDT);
11557 }
11558 
11559 static void
11560 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11561                            const llvm::APSInt &VLENVal,
11562                            ArrayRef<ParamAttrTy> ParamAttrs,
11563                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11564   struct ISADataTy {
11565     char ISA;
11566     unsigned VecRegSize;
11567   };
11568   ISADataTy ISAData[] = {
11569       {
11570           'b', 128
11571       }, // SSE
11572       {
11573           'c', 256
11574       }, // AVX
11575       {
11576           'd', 256
11577       }, // AVX2
11578       {
11579           'e', 512
11580       }, // AVX512
11581   };
11582   llvm::SmallVector<char, 2> Masked;
11583   switch (State) {
11584   case OMPDeclareSimdDeclAttr::BS_Undefined:
11585     Masked.push_back('N');
11586     Masked.push_back('M');
11587     break;
11588   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11589     Masked.push_back('N');
11590     break;
11591   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11592     Masked.push_back('M');
11593     break;
11594   }
11595   for (char Mask : Masked) {
11596     for (const ISADataTy &Data : ISAData) {
11597       SmallString<256> Buffer;
11598       llvm::raw_svector_ostream Out(Buffer);
11599       Out << "_ZGV" << Data.ISA << Mask;
11600       if (!VLENVal) {
11601         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11602         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11603         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11604       } else {
11605         Out << VLENVal;
11606       }
11607       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11608         switch (ParamAttr.Kind){
11609         case LinearWithVarStride:
11610           Out << 's' << ParamAttr.StrideOrArg;
11611           break;
11612         case Linear:
11613           Out << 'l';
11614           if (ParamAttr.StrideOrArg != 1)
11615             Out << ParamAttr.StrideOrArg;
11616           break;
11617         case Uniform:
11618           Out << 'u';
11619           break;
11620         case Vector:
11621           Out << 'v';
11622           break;
11623         }
11624         if (!!ParamAttr.Alignment)
11625           Out << 'a' << ParamAttr.Alignment;
11626       }
11627       Out << '_' << Fn->getName();
11628       Fn->addFnAttr(Out.str());
11629     }
11630   }
11631 }
11632 
11633 // This are the Functions that are needed to mangle the name of the
11634 // vector functions generated by the compiler, according to the rules
11635 // defined in the "Vector Function ABI specifications for AArch64",
11636 // available at
11637 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11638 
11639 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11640 ///
11641 /// TODO: Need to implement the behavior for reference marked with a
11642 /// var or no linear modifiers (1.b in the section). For this, we
11643 /// need to extend ParamKindTy to support the linear modifiers.
11644 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11645   QT = QT.getCanonicalType();
11646 
11647   if (QT->isVoidType())
11648     return false;
11649 
11650   if (Kind == ParamKindTy::Uniform)
11651     return false;
11652 
11653   if (Kind == ParamKindTy::Linear)
11654     return false;
11655 
11656   // TODO: Handle linear references with modifiers
11657 
11658   if (Kind == ParamKindTy::LinearWithVarStride)
11659     return false;
11660 
11661   return true;
11662 }
11663 
11664 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11665 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11666   QT = QT.getCanonicalType();
11667   unsigned Size = C.getTypeSize(QT);
11668 
11669   // Only scalars and complex within 16 bytes wide set PVB to true.
11670   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11671     return false;
11672 
11673   if (QT->isFloatingType())
11674     return true;
11675 
11676   if (QT->isIntegerType())
11677     return true;
11678 
11679   if (QT->isPointerType())
11680     return true;
11681 
11682   // TODO: Add support for complex types (section 3.1.2, item 2).
11683 
11684   return false;
11685 }
11686 
11687 /// Computes the lane size (LS) of a return type or of an input parameter,
11688 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11689 /// TODO: Add support for references, section 3.2.1, item 1.
11690 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11691   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11692     QualType PTy = QT.getCanonicalType()->getPointeeType();
11693     if (getAArch64PBV(PTy, C))
11694       return C.getTypeSize(PTy);
11695   }
11696   if (getAArch64PBV(QT, C))
11697     return C.getTypeSize(QT);
11698 
11699   return C.getTypeSize(C.getUIntPtrType());
11700 }
11701 
11702 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11703 // signature of the scalar function, as defined in 3.2.2 of the
11704 // AAVFABI.
11705 static std::tuple<unsigned, unsigned, bool>
11706 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11707   QualType RetType = FD->getReturnType().getCanonicalType();
11708 
11709   ASTContext &C = FD->getASTContext();
11710 
11711   bool OutputBecomesInput = false;
11712 
11713   llvm::SmallVector<unsigned, 8> Sizes;
11714   if (!RetType->isVoidType()) {
11715     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11716     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11717       OutputBecomesInput = true;
11718   }
11719   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11720     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11721     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11722   }
11723 
11724   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11725   // The LS of a function parameter / return value can only be a power
11726   // of 2, starting from 8 bits, up to 128.
11727   assert(llvm::all_of(Sizes,
11728                       [](unsigned Size) {
11729                         return Size == 8 || Size == 16 || Size == 32 ||
11730                                Size == 64 || Size == 128;
11731                       }) &&
11732          "Invalid size");
11733 
11734   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11735                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11736                          OutputBecomesInput);
11737 }
11738 
11739 /// Mangle the parameter part of the vector function name according to
11740 /// their OpenMP classification. The mangling function is defined in
11741 /// section 3.5 of the AAVFABI.
11742 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11743   SmallString<256> Buffer;
11744   llvm::raw_svector_ostream Out(Buffer);
11745   for (const auto &ParamAttr : ParamAttrs) {
11746     switch (ParamAttr.Kind) {
11747     case LinearWithVarStride:
11748       Out << "ls" << ParamAttr.StrideOrArg;
11749       break;
11750     case Linear:
11751       Out << 'l';
11752       // Don't print the step value if it is not present or if it is
11753       // equal to 1.
11754       if (ParamAttr.StrideOrArg != 1)
11755         Out << ParamAttr.StrideOrArg;
11756       break;
11757     case Uniform:
11758       Out << 'u';
11759       break;
11760     case Vector:
11761       Out << 'v';
11762       break;
11763     }
11764 
11765     if (!!ParamAttr.Alignment)
11766       Out << 'a' << ParamAttr.Alignment;
11767   }
11768 
11769   return std::string(Out.str());
11770 }
11771 
11772 // Function used to add the attribute. The parameter `VLEN` is
11773 // templated to allow the use of "x" when targeting scalable functions
11774 // for SVE.
11775 template <typename T>
11776 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11777                                  char ISA, StringRef ParSeq,
11778                                  StringRef MangledName, bool OutputBecomesInput,
11779                                  llvm::Function *Fn) {
11780   SmallString<256> Buffer;
11781   llvm::raw_svector_ostream Out(Buffer);
11782   Out << Prefix << ISA << LMask << VLEN;
11783   if (OutputBecomesInput)
11784     Out << "v";
11785   Out << ParSeq << "_" << MangledName;
11786   Fn->addFnAttr(Out.str());
11787 }
11788 
11789 // Helper function to generate the Advanced SIMD names depending on
11790 // the value of the NDS when simdlen is not present.
11791 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11792                                       StringRef Prefix, char ISA,
11793                                       StringRef ParSeq, StringRef MangledName,
11794                                       bool OutputBecomesInput,
11795                                       llvm::Function *Fn) {
11796   switch (NDS) {
11797   case 8:
11798     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11799                          OutputBecomesInput, Fn);
11800     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11801                          OutputBecomesInput, Fn);
11802     break;
11803   case 16:
11804     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11805                          OutputBecomesInput, Fn);
11806     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11807                          OutputBecomesInput, Fn);
11808     break;
11809   case 32:
11810     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11811                          OutputBecomesInput, Fn);
11812     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11813                          OutputBecomesInput, Fn);
11814     break;
11815   case 64:
11816   case 128:
11817     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11818                          OutputBecomesInput, Fn);
11819     break;
11820   default:
11821     llvm_unreachable("Scalar type is too wide.");
11822   }
11823 }
11824 
11825 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11826 static void emitAArch64DeclareSimdFunction(
11827     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11828     ArrayRef<ParamAttrTy> ParamAttrs,
11829     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11830     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11831 
11832   // Get basic data for building the vector signature.
11833   const auto Data = getNDSWDS(FD, ParamAttrs);
11834   const unsigned NDS = std::get<0>(Data);
11835   const unsigned WDS = std::get<1>(Data);
11836   const bool OutputBecomesInput = std::get<2>(Data);
11837 
11838   // Check the values provided via `simdlen` by the user.
11839   // 1. A `simdlen(1)` doesn't produce vector signatures,
11840   if (UserVLEN == 1) {
11841     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11842         DiagnosticsEngine::Warning,
11843         "The clause simdlen(1) has no effect when targeting aarch64.");
11844     CGM.getDiags().Report(SLoc, DiagID);
11845     return;
11846   }
11847 
11848   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11849   // Advanced SIMD output.
11850   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11851     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11852         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11853                                     "power of 2 when targeting Advanced SIMD.");
11854     CGM.getDiags().Report(SLoc, DiagID);
11855     return;
11856   }
11857 
11858   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11859   // limits.
11860   if (ISA == 's' && UserVLEN != 0) {
11861     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11862       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11863           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11864                                       "lanes in the architectural constraints "
11865                                       "for SVE (min is 128-bit, max is "
11866                                       "2048-bit, by steps of 128-bit)");
11867       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11868       return;
11869     }
11870   }
11871 
11872   // Sort out parameter sequence.
11873   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11874   StringRef Prefix = "_ZGV";
11875   // Generate simdlen from user input (if any).
11876   if (UserVLEN) {
11877     if (ISA == 's') {
11878       // SVE generates only a masked function.
11879       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11880                            OutputBecomesInput, Fn);
11881     } else {
11882       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11883       // Advanced SIMD generates one or two functions, depending on
11884       // the `[not]inbranch` clause.
11885       switch (State) {
11886       case OMPDeclareSimdDeclAttr::BS_Undefined:
11887         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11888                              OutputBecomesInput, Fn);
11889         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11890                              OutputBecomesInput, Fn);
11891         break;
11892       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11893         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11894                              OutputBecomesInput, Fn);
11895         break;
11896       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11897         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11898                              OutputBecomesInput, Fn);
11899         break;
11900       }
11901     }
11902   } else {
11903     // If no user simdlen is provided, follow the AAVFABI rules for
11904     // generating the vector length.
11905     if (ISA == 's') {
11906       // SVE, section 3.4.1, item 1.
11907       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11908                            OutputBecomesInput, Fn);
11909     } else {
11910       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11911       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11912       // two vector names depending on the use of the clause
11913       // `[not]inbranch`.
11914       switch (State) {
11915       case OMPDeclareSimdDeclAttr::BS_Undefined:
11916         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11917                                   OutputBecomesInput, Fn);
11918         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11919                                   OutputBecomesInput, Fn);
11920         break;
11921       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11922         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11923                                   OutputBecomesInput, Fn);
11924         break;
11925       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11926         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11927                                   OutputBecomesInput, Fn);
11928         break;
11929       }
11930     }
11931   }
11932 }
11933 
11934 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11935                                               llvm::Function *Fn) {
11936   ASTContext &C = CGM.getContext();
11937   FD = FD->getMostRecentDecl();
11938   // Map params to their positions in function decl.
11939   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11940   if (isa<CXXMethodDecl>(FD))
11941     ParamPositions.try_emplace(FD, 0);
11942   unsigned ParamPos = ParamPositions.size();
11943   for (const ParmVarDecl *P : FD->parameters()) {
11944     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11945     ++ParamPos;
11946   }
11947   while (FD) {
11948     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11949       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11950       // Mark uniform parameters.
11951       for (const Expr *E : Attr->uniforms()) {
11952         E = E->IgnoreParenImpCasts();
11953         unsigned Pos;
11954         if (isa<CXXThisExpr>(E)) {
11955           Pos = ParamPositions[FD];
11956         } else {
11957           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11958                                 ->getCanonicalDecl();
11959           Pos = ParamPositions[PVD];
11960         }
11961         ParamAttrs[Pos].Kind = Uniform;
11962       }
11963       // Get alignment info.
11964       auto NI = Attr->alignments_begin();
11965       for (const Expr *E : Attr->aligneds()) {
11966         E = E->IgnoreParenImpCasts();
11967         unsigned Pos;
11968         QualType ParmTy;
11969         if (isa<CXXThisExpr>(E)) {
11970           Pos = ParamPositions[FD];
11971           ParmTy = E->getType();
11972         } else {
11973           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11974                                 ->getCanonicalDecl();
11975           Pos = ParamPositions[PVD];
11976           ParmTy = PVD->getType();
11977         }
11978         ParamAttrs[Pos].Alignment =
11979             (*NI)
11980                 ? (*NI)->EvaluateKnownConstInt(C)
11981                 : llvm::APSInt::getUnsigned(
11982                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11983                           .getQuantity());
11984         ++NI;
11985       }
11986       // Mark linear parameters.
11987       auto SI = Attr->steps_begin();
11988       auto MI = Attr->modifiers_begin();
11989       for (const Expr *E : Attr->linears()) {
11990         E = E->IgnoreParenImpCasts();
11991         unsigned Pos;
11992         // Rescaling factor needed to compute the linear parameter
11993         // value in the mangled name.
11994         unsigned PtrRescalingFactor = 1;
11995         if (isa<CXXThisExpr>(E)) {
11996           Pos = ParamPositions[FD];
11997         } else {
11998           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11999                                 ->getCanonicalDecl();
12000           Pos = ParamPositions[PVD];
12001           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12002             PtrRescalingFactor = CGM.getContext()
12003                                      .getTypeSizeInChars(P->getPointeeType())
12004                                      .getQuantity();
12005         }
12006         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12007         ParamAttr.Kind = Linear;
12008         // Assuming a stride of 1, for `linear` without modifiers.
12009         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12010         if (*SI) {
12011           Expr::EvalResult Result;
12012           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12013             if (const auto *DRE =
12014                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12015               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12016                 ParamAttr.Kind = LinearWithVarStride;
12017                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12018                     ParamPositions[StridePVD->getCanonicalDecl()]);
12019               }
12020             }
12021           } else {
12022             ParamAttr.StrideOrArg = Result.Val.getInt();
12023           }
12024         }
12025         // If we are using a linear clause on a pointer, we need to
12026         // rescale the value of linear_step with the byte size of the
12027         // pointee type.
12028         if (Linear == ParamAttr.Kind)
12029           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12030         ++SI;
12031         ++MI;
12032       }
12033       llvm::APSInt VLENVal;
12034       SourceLocation ExprLoc;
12035       const Expr *VLENExpr = Attr->getSimdlen();
12036       if (VLENExpr) {
12037         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12038         ExprLoc = VLENExpr->getExprLoc();
12039       }
12040       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12041       if (CGM.getTriple().isX86()) {
12042         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12043       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12044         unsigned VLEN = VLENVal.getExtValue();
12045         StringRef MangledName = Fn->getName();
12046         if (CGM.getTarget().hasFeature("sve"))
12047           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12048                                          MangledName, 's', 128, Fn, ExprLoc);
12049         if (CGM.getTarget().hasFeature("neon"))
12050           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12051                                          MangledName, 'n', 128, Fn, ExprLoc);
12052       }
12053     }
12054     FD = FD->getPreviousDecl();
12055   }
12056 }
12057 
12058 namespace {
12059 /// Cleanup action for doacross support.
12060 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12061 public:
12062   static const int DoacrossFinArgs = 2;
12063 
12064 private:
12065   llvm::FunctionCallee RTLFn;
12066   llvm::Value *Args[DoacrossFinArgs];
12067 
12068 public:
12069   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12070                     ArrayRef<llvm::Value *> CallArgs)
12071       : RTLFn(RTLFn) {
12072     assert(CallArgs.size() == DoacrossFinArgs);
12073     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12074   }
12075   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12076     if (!CGF.HaveInsertPoint())
12077       return;
12078     CGF.EmitRuntimeCall(RTLFn, Args);
12079   }
12080 };
12081 } // namespace
12082 
12083 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12084                                        const OMPLoopDirective &D,
12085                                        ArrayRef<Expr *> NumIterations) {
12086   if (!CGF.HaveInsertPoint())
12087     return;
12088 
12089   ASTContext &C = CGM.getContext();
12090   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12091   RecordDecl *RD;
12092   if (KmpDimTy.isNull()) {
12093     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12094     //  kmp_int64 lo; // lower
12095     //  kmp_int64 up; // upper
12096     //  kmp_int64 st; // stride
12097     // };
12098     RD = C.buildImplicitRecord("kmp_dim");
12099     RD->startDefinition();
12100     addFieldToRecordDecl(C, RD, Int64Ty);
12101     addFieldToRecordDecl(C, RD, Int64Ty);
12102     addFieldToRecordDecl(C, RD, Int64Ty);
12103     RD->completeDefinition();
12104     KmpDimTy = C.getRecordType(RD);
12105   } else {
12106     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12107   }
12108   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12109   QualType ArrayTy =
12110       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12111 
12112   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12113   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12114   enum { LowerFD = 0, UpperFD, StrideFD };
12115   // Fill dims with data.
12116   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12117     LValue DimsLVal = CGF.MakeAddrLValue(
12118         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12119     // dims.upper = num_iterations;
12120     LValue UpperLVal = CGF.EmitLValueForField(
12121         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12122     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12123         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12124         Int64Ty, NumIterations[I]->getExprLoc());
12125     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12126     // dims.stride = 1;
12127     LValue StrideLVal = CGF.EmitLValueForField(
12128         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12129     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12130                           StrideLVal);
12131   }
12132 
12133   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12134   // kmp_int32 num_dims, struct kmp_dim * dims);
12135   llvm::Value *Args[] = {
12136       emitUpdateLocation(CGF, D.getBeginLoc()),
12137       getThreadID(CGF, D.getBeginLoc()),
12138       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12139       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12140           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12141           CGM.VoidPtrTy)};
12142 
12143   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12144       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12145   CGF.EmitRuntimeCall(RTLFn, Args);
12146   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12147       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12148   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12149       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12150   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12151                                              llvm::makeArrayRef(FiniArgs));
12152 }
12153 
12154 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12155                                           const OMPDependClause *C) {
12156   QualType Int64Ty =
12157       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12158   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12159   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12160       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12161   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12162   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12163     const Expr *CounterVal = C->getLoopData(I);
12164     assert(CounterVal);
12165     llvm::Value *CntVal = CGF.EmitScalarConversion(
12166         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12167         CounterVal->getExprLoc());
12168     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12169                           /*Volatile=*/false, Int64Ty);
12170   }
12171   llvm::Value *Args[] = {
12172       emitUpdateLocation(CGF, C->getBeginLoc()),
12173       getThreadID(CGF, C->getBeginLoc()),
12174       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12175   llvm::FunctionCallee RTLFn;
12176   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12177     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12178                                                   OMPRTL___kmpc_doacross_post);
12179   } else {
12180     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12181     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12182                                                   OMPRTL___kmpc_doacross_wait);
12183   }
12184   CGF.EmitRuntimeCall(RTLFn, Args);
12185 }
12186 
12187 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12188                                llvm::FunctionCallee Callee,
12189                                ArrayRef<llvm::Value *> Args) const {
12190   assert(Loc.isValid() && "Outlined function call location must be valid.");
12191   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12192 
12193   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12194     if (Fn->doesNotThrow()) {
12195       CGF.EmitNounwindRuntimeCall(Fn, Args);
12196       return;
12197     }
12198   }
12199   CGF.EmitRuntimeCall(Callee, Args);
12200 }
12201 
12202 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12203     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12204     ArrayRef<llvm::Value *> Args) const {
12205   emitCall(CGF, Loc, OutlinedFn, Args);
12206 }
12207 
12208 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12209   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12210     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12211       HasEmittedDeclareTargetRegion = true;
12212 }
12213 
12214 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12215                                              const VarDecl *NativeParam,
12216                                              const VarDecl *TargetParam) const {
12217   return CGF.GetAddrOfLocalVar(NativeParam);
12218 }
12219 
12220 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12221                                                    const VarDecl *VD) {
12222   if (!VD)
12223     return Address::invalid();
12224   Address UntiedAddr = Address::invalid();
12225   Address UntiedRealAddr = Address::invalid();
12226   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12227   if (It != FunctionToUntiedTaskStackMap.end()) {
12228     const UntiedLocalVarsAddressesMap &UntiedData =
12229         UntiedLocalVarsStack[It->second];
12230     auto I = UntiedData.find(VD);
12231     if (I != UntiedData.end()) {
12232       UntiedAddr = I->second.first;
12233       UntiedRealAddr = I->second.second;
12234     }
12235   }
12236   const VarDecl *CVD = VD->getCanonicalDecl();
12237   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12238     // Use the default allocation.
12239     if (!isAllocatableDecl(VD))
12240       return UntiedAddr;
12241     llvm::Value *Size;
12242     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12243     if (CVD->getType()->isVariablyModifiedType()) {
12244       Size = CGF.getTypeSize(CVD->getType());
12245       // Align the size: ((size + align - 1) / align) * align
12246       Size = CGF.Builder.CreateNUWAdd(
12247           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12248       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12249       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12250     } else {
12251       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12252       Size = CGM.getSize(Sz.alignTo(Align));
12253     }
12254     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12255     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12256     assert(AA->getAllocator() &&
12257            "Expected allocator expression for non-default allocator.");
12258     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12259     // According to the standard, the original allocator type is a enum
12260     // (integer). Convert to pointer type, if required.
12261     Allocator = CGF.EmitScalarConversion(
12262         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12263         AA->getAllocator()->getExprLoc());
12264     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12265 
12266     llvm::Value *Addr =
12267         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12268                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12269                             Args, getName({CVD->getName(), ".void.addr"}));
12270     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12271         CGM.getModule(), OMPRTL___kmpc_free);
12272     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12273     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12274         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12275     if (UntiedAddr.isValid())
12276       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12277 
12278     // Cleanup action for allocate support.
12279     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12280       llvm::FunctionCallee RTLFn;
12281       SourceLocation::UIntTy LocEncoding;
12282       Address Addr;
12283       const Expr *Allocator;
12284 
12285     public:
12286       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12287                            SourceLocation::UIntTy LocEncoding, Address Addr,
12288                            const Expr *Allocator)
12289           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12290             Allocator(Allocator) {}
12291       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12292         if (!CGF.HaveInsertPoint())
12293           return;
12294         llvm::Value *Args[3];
12295         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12296             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12297         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12298             Addr.getPointer(), CGF.VoidPtrTy);
12299         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12300         // According to the standard, the original allocator type is a enum
12301         // (integer). Convert to pointer type, if required.
12302         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12303                                             CGF.getContext().VoidPtrTy,
12304                                             Allocator->getExprLoc());
12305         Args[2] = AllocVal;
12306 
12307         CGF.EmitRuntimeCall(RTLFn, Args);
12308       }
12309     };
12310     Address VDAddr =
12311         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12312     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12313         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12314         VDAddr, AA->getAllocator());
12315     if (UntiedRealAddr.isValid())
12316       if (auto *Region =
12317               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12318         Region->emitUntiedSwitch(CGF);
12319     return VDAddr;
12320   }
12321   return UntiedAddr;
12322 }
12323 
12324 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12325                                              const VarDecl *VD) const {
12326   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12327   if (It == FunctionToUntiedTaskStackMap.end())
12328     return false;
12329   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12330 }
12331 
12332 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12333     CodeGenModule &CGM, const OMPLoopDirective &S)
12334     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12335   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12336   if (!NeedToPush)
12337     return;
12338   NontemporalDeclsSet &DS =
12339       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12340   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12341     for (const Stmt *Ref : C->private_refs()) {
12342       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12343       const ValueDecl *VD;
12344       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12345         VD = DRE->getDecl();
12346       } else {
12347         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12348         assert((ME->isImplicitCXXThis() ||
12349                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12350                "Expected member of current class.");
12351         VD = ME->getMemberDecl();
12352       }
12353       DS.insert(VD);
12354     }
12355   }
12356 }
12357 
12358 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12359   if (!NeedToPush)
12360     return;
12361   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12362 }
12363 
12364 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12365     CodeGenFunction &CGF,
12366     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12367                           std::pair<Address, Address>> &LocalVars)
12368     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12369   if (!NeedToPush)
12370     return;
12371   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12372       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12373   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12374 }
12375 
12376 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12377   if (!NeedToPush)
12378     return;
12379   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12380 }
12381 
12382 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12383   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12384 
12385   return llvm::any_of(
12386       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12387       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12388 }
12389 
12390 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12391     const OMPExecutableDirective &S,
12392     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12393     const {
12394   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12395   // Vars in target/task regions must be excluded completely.
12396   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12397       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12398     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12399     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12400     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12401     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12402       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12403         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12404     }
12405   }
12406   // Exclude vars in private clauses.
12407   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12408     for (const Expr *Ref : C->varlists()) {
12409       if (!Ref->getType()->isScalarType())
12410         continue;
12411       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12412       if (!DRE)
12413         continue;
12414       NeedToCheckForLPCs.insert(DRE->getDecl());
12415     }
12416   }
12417   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12418     for (const Expr *Ref : C->varlists()) {
12419       if (!Ref->getType()->isScalarType())
12420         continue;
12421       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12422       if (!DRE)
12423         continue;
12424       NeedToCheckForLPCs.insert(DRE->getDecl());
12425     }
12426   }
12427   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12428     for (const Expr *Ref : C->varlists()) {
12429       if (!Ref->getType()->isScalarType())
12430         continue;
12431       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12432       if (!DRE)
12433         continue;
12434       NeedToCheckForLPCs.insert(DRE->getDecl());
12435     }
12436   }
12437   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12438     for (const Expr *Ref : C->varlists()) {
12439       if (!Ref->getType()->isScalarType())
12440         continue;
12441       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12442       if (!DRE)
12443         continue;
12444       NeedToCheckForLPCs.insert(DRE->getDecl());
12445     }
12446   }
12447   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12448     for (const Expr *Ref : C->varlists()) {
12449       if (!Ref->getType()->isScalarType())
12450         continue;
12451       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12452       if (!DRE)
12453         continue;
12454       NeedToCheckForLPCs.insert(DRE->getDecl());
12455     }
12456   }
12457   for (const Decl *VD : NeedToCheckForLPCs) {
12458     for (const LastprivateConditionalData &Data :
12459          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12460       if (Data.DeclToUniqueName.count(VD) > 0) {
12461         if (!Data.Disabled)
12462           NeedToAddForLPCsAsDisabled.insert(VD);
12463         break;
12464       }
12465     }
12466   }
12467 }
12468 
12469 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12470     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12471     : CGM(CGF.CGM),
12472       Action((CGM.getLangOpts().OpenMP >= 50 &&
12473               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12474                            [](const OMPLastprivateClause *C) {
12475                              return C->getKind() ==
12476                                     OMPC_LASTPRIVATE_conditional;
12477                            }))
12478                  ? ActionToDo::PushAsLastprivateConditional
12479                  : ActionToDo::DoNotPush) {
12480   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12481   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12482     return;
12483   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12484          "Expected a push action.");
12485   LastprivateConditionalData &Data =
12486       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12487   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12488     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12489       continue;
12490 
12491     for (const Expr *Ref : C->varlists()) {
12492       Data.DeclToUniqueName.insert(std::make_pair(
12493           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12494           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12495     }
12496   }
12497   Data.IVLVal = IVLVal;
12498   Data.Fn = CGF.CurFn;
12499 }
12500 
12501 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12502     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12503     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12504   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12505   if (CGM.getLangOpts().OpenMP < 50)
12506     return;
12507   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12508   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12509   if (!NeedToAddForLPCsAsDisabled.empty()) {
12510     Action = ActionToDo::DisableLastprivateConditional;
12511     LastprivateConditionalData &Data =
12512         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12513     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12514       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12515     Data.Fn = CGF.CurFn;
12516     Data.Disabled = true;
12517   }
12518 }
12519 
12520 CGOpenMPRuntime::LastprivateConditionalRAII
12521 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12522     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12523   return LastprivateConditionalRAII(CGF, S);
12524 }
12525 
12526 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12527   if (CGM.getLangOpts().OpenMP < 50)
12528     return;
12529   if (Action == ActionToDo::DisableLastprivateConditional) {
12530     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12531            "Expected list of disabled private vars.");
12532     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12533   }
12534   if (Action == ActionToDo::PushAsLastprivateConditional) {
12535     assert(
12536         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12537         "Expected list of lastprivate conditional vars.");
12538     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12539   }
12540 }
12541 
12542 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12543                                                         const VarDecl *VD) {
12544   ASTContext &C = CGM.getContext();
12545   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12546   if (I == LastprivateConditionalToTypes.end())
12547     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12548   QualType NewType;
12549   const FieldDecl *VDField;
12550   const FieldDecl *FiredField;
12551   LValue BaseLVal;
12552   auto VI = I->getSecond().find(VD);
12553   if (VI == I->getSecond().end()) {
12554     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12555     RD->startDefinition();
12556     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12557     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12558     RD->completeDefinition();
12559     NewType = C.getRecordType(RD);
12560     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12561     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12562     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12563   } else {
12564     NewType = std::get<0>(VI->getSecond());
12565     VDField = std::get<1>(VI->getSecond());
12566     FiredField = std::get<2>(VI->getSecond());
12567     BaseLVal = std::get<3>(VI->getSecond());
12568   }
12569   LValue FiredLVal =
12570       CGF.EmitLValueForField(BaseLVal, FiredField);
12571   CGF.EmitStoreOfScalar(
12572       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12573       FiredLVal);
12574   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12575 }
12576 
12577 namespace {
12578 /// Checks if the lastprivate conditional variable is referenced in LHS.
12579 class LastprivateConditionalRefChecker final
12580     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12581   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12582   const Expr *FoundE = nullptr;
12583   const Decl *FoundD = nullptr;
12584   StringRef UniqueDeclName;
12585   LValue IVLVal;
12586   llvm::Function *FoundFn = nullptr;
12587   SourceLocation Loc;
12588 
12589 public:
12590   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12591     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12592          llvm::reverse(LPM)) {
12593       auto It = D.DeclToUniqueName.find(E->getDecl());
12594       if (It == D.DeclToUniqueName.end())
12595         continue;
12596       if (D.Disabled)
12597         return false;
12598       FoundE = E;
12599       FoundD = E->getDecl()->getCanonicalDecl();
12600       UniqueDeclName = It->second;
12601       IVLVal = D.IVLVal;
12602       FoundFn = D.Fn;
12603       break;
12604     }
12605     return FoundE == E;
12606   }
12607   bool VisitMemberExpr(const MemberExpr *E) {
12608     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12609       return false;
12610     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12611          llvm::reverse(LPM)) {
12612       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12613       if (It == D.DeclToUniqueName.end())
12614         continue;
12615       if (D.Disabled)
12616         return false;
12617       FoundE = E;
12618       FoundD = E->getMemberDecl()->getCanonicalDecl();
12619       UniqueDeclName = It->second;
12620       IVLVal = D.IVLVal;
12621       FoundFn = D.Fn;
12622       break;
12623     }
12624     return FoundE == E;
12625   }
12626   bool VisitStmt(const Stmt *S) {
12627     for (const Stmt *Child : S->children()) {
12628       if (!Child)
12629         continue;
12630       if (const auto *E = dyn_cast<Expr>(Child))
12631         if (!E->isGLValue())
12632           continue;
12633       if (Visit(Child))
12634         return true;
12635     }
12636     return false;
12637   }
12638   explicit LastprivateConditionalRefChecker(
12639       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12640       : LPM(LPM) {}
12641   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12642   getFoundData() const {
12643     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12644   }
12645 };
12646 } // namespace
12647 
12648 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12649                                                        LValue IVLVal,
12650                                                        StringRef UniqueDeclName,
12651                                                        LValue LVal,
12652                                                        SourceLocation Loc) {
12653   // Last updated loop counter for the lastprivate conditional var.
12654   // int<xx> last_iv = 0;
12655   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12656   llvm::Constant *LastIV =
12657       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12658   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12659       IVLVal.getAlignment().getAsAlign());
12660   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12661 
12662   // Last value of the lastprivate conditional.
12663   // decltype(priv_a) last_a;
12664   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12665       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12666   Last->setAlignment(LVal.getAlignment().getAsAlign());
12667   LValue LastLVal = CGF.MakeAddrLValue(
12668       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12669 
12670   // Global loop counter. Required to handle inner parallel-for regions.
12671   // iv
12672   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12673 
12674   // #pragma omp critical(a)
12675   // if (last_iv <= iv) {
12676   //   last_iv = iv;
12677   //   last_a = priv_a;
12678   // }
12679   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12680                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12681     Action.Enter(CGF);
12682     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12683     // (last_iv <= iv) ? Check if the variable is updated and store new
12684     // value in global var.
12685     llvm::Value *CmpRes;
12686     if (IVLVal.getType()->isSignedIntegerType()) {
12687       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12688     } else {
12689       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12690              "Loop iteration variable must be integer.");
12691       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12692     }
12693     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12694     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12695     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12696     // {
12697     CGF.EmitBlock(ThenBB);
12698 
12699     //   last_iv = iv;
12700     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12701 
12702     //   last_a = priv_a;
12703     switch (CGF.getEvaluationKind(LVal.getType())) {
12704     case TEK_Scalar: {
12705       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12706       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12707       break;
12708     }
12709     case TEK_Complex: {
12710       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12711       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12712       break;
12713     }
12714     case TEK_Aggregate:
12715       llvm_unreachable(
12716           "Aggregates are not supported in lastprivate conditional.");
12717     }
12718     // }
12719     CGF.EmitBranch(ExitBB);
12720     // There is no need to emit line number for unconditional branch.
12721     (void)ApplyDebugLocation::CreateEmpty(CGF);
12722     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12723   };
12724 
12725   if (CGM.getLangOpts().OpenMPSimd) {
12726     // Do not emit as a critical region as no parallel region could be emitted.
12727     RegionCodeGenTy ThenRCG(CodeGen);
12728     ThenRCG(CGF);
12729   } else {
12730     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12731   }
12732 }
12733 
12734 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12735                                                          const Expr *LHS) {
12736   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12737     return;
12738   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12739   if (!Checker.Visit(LHS))
12740     return;
12741   const Expr *FoundE;
12742   const Decl *FoundD;
12743   StringRef UniqueDeclName;
12744   LValue IVLVal;
12745   llvm::Function *FoundFn;
12746   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12747       Checker.getFoundData();
12748   if (FoundFn != CGF.CurFn) {
12749     // Special codegen for inner parallel regions.
12750     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12751     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12752     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12753            "Lastprivate conditional is not found in outer region.");
12754     QualType StructTy = std::get<0>(It->getSecond());
12755     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12756     LValue PrivLVal = CGF.EmitLValue(FoundE);
12757     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12758         PrivLVal.getAddress(CGF),
12759         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12760     LValue BaseLVal =
12761         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12762     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12763     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12764                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12765                         FiredLVal, llvm::AtomicOrdering::Unordered,
12766                         /*IsVolatile=*/true, /*isInit=*/false);
12767     return;
12768   }
12769 
12770   // Private address of the lastprivate conditional in the current context.
12771   // priv_a
12772   LValue LVal = CGF.EmitLValue(FoundE);
12773   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12774                                    FoundE->getExprLoc());
12775 }
12776 
12777 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12778     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12779     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12780   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12781     return;
12782   auto Range = llvm::reverse(LastprivateConditionalStack);
12783   auto It = llvm::find_if(
12784       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12785   if (It == Range.end() || It->Fn != CGF.CurFn)
12786     return;
12787   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12788   assert(LPCI != LastprivateConditionalToTypes.end() &&
12789          "Lastprivates must be registered already.");
12790   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12791   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12792   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12793   for (const auto &Pair : It->DeclToUniqueName) {
12794     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12795     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12796       continue;
12797     auto I = LPCI->getSecond().find(Pair.first);
12798     assert(I != LPCI->getSecond().end() &&
12799            "Lastprivate must be rehistered already.");
12800     // bool Cmp = priv_a.Fired != 0;
12801     LValue BaseLVal = std::get<3>(I->getSecond());
12802     LValue FiredLVal =
12803         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12804     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12805     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12806     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12807     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12808     // if (Cmp) {
12809     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12810     CGF.EmitBlock(ThenBB);
12811     Address Addr = CGF.GetAddrOfLocalVar(VD);
12812     LValue LVal;
12813     if (VD->getType()->isReferenceType())
12814       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12815                                            AlignmentSource::Decl);
12816     else
12817       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12818                                 AlignmentSource::Decl);
12819     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12820                                      D.getBeginLoc());
12821     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12822     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12823     // }
12824   }
12825 }
12826 
12827 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12828     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12829     SourceLocation Loc) {
12830   if (CGF.getLangOpts().OpenMP < 50)
12831     return;
12832   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12833   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12834          "Unknown lastprivate conditional variable.");
12835   StringRef UniqueName = It->second;
12836   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12837   // The variable was not updated in the region - exit.
12838   if (!GV)
12839     return;
12840   LValue LPLVal = CGF.MakeAddrLValue(
12841       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12842       PrivLVal.getType().getNonReferenceType());
12843   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12844   CGF.EmitStoreOfScalar(Res, PrivLVal);
12845 }
12846 
12847 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12848     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12849     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12850   llvm_unreachable("Not supported in SIMD-only mode");
12851 }
12852 
12853 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12854     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12855     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12856   llvm_unreachable("Not supported in SIMD-only mode");
12857 }
12858 
12859 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12860     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12861     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12862     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12863     bool Tied, unsigned &NumberOfParts) {
12864   llvm_unreachable("Not supported in SIMD-only mode");
12865 }
12866 
12867 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12868                                            SourceLocation Loc,
12869                                            llvm::Function *OutlinedFn,
12870                                            ArrayRef<llvm::Value *> CapturedVars,
12871                                            const Expr *IfCond,
12872                                            llvm::Value *NumThreads) {
12873   llvm_unreachable("Not supported in SIMD-only mode");
12874 }
12875 
12876 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12877     CodeGenFunction &CGF, StringRef CriticalName,
12878     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12879     const Expr *Hint) {
12880   llvm_unreachable("Not supported in SIMD-only mode");
12881 }
12882 
12883 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12884                                            const RegionCodeGenTy &MasterOpGen,
12885                                            SourceLocation Loc) {
12886   llvm_unreachable("Not supported in SIMD-only mode");
12887 }
12888 
12889 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12890                                            const RegionCodeGenTy &MasterOpGen,
12891                                            SourceLocation Loc,
12892                                            const Expr *Filter) {
12893   llvm_unreachable("Not supported in SIMD-only mode");
12894 }
12895 
12896 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12897                                             SourceLocation Loc) {
12898   llvm_unreachable("Not supported in SIMD-only mode");
12899 }
12900 
12901 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12902     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12903     SourceLocation Loc) {
12904   llvm_unreachable("Not supported in SIMD-only mode");
12905 }
12906 
12907 void CGOpenMPSIMDRuntime::emitSingleRegion(
12908     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12909     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12910     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12911     ArrayRef<const Expr *> AssignmentOps) {
12912   llvm_unreachable("Not supported in SIMD-only mode");
12913 }
12914 
12915 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12916                                             const RegionCodeGenTy &OrderedOpGen,
12917                                             SourceLocation Loc,
12918                                             bool IsThreads) {
12919   llvm_unreachable("Not supported in SIMD-only mode");
12920 }
12921 
12922 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12923                                           SourceLocation Loc,
12924                                           OpenMPDirectiveKind Kind,
12925                                           bool EmitChecks,
12926                                           bool ForceSimpleCall) {
12927   llvm_unreachable("Not supported in SIMD-only mode");
12928 }
12929 
12930 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12931     CodeGenFunction &CGF, SourceLocation Loc,
12932     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12933     bool Ordered, const DispatchRTInput &DispatchValues) {
12934   llvm_unreachable("Not supported in SIMD-only mode");
12935 }
12936 
12937 void CGOpenMPSIMDRuntime::emitForStaticInit(
12938     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12939     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12940   llvm_unreachable("Not supported in SIMD-only mode");
12941 }
12942 
12943 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12944     CodeGenFunction &CGF, SourceLocation Loc,
12945     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12946   llvm_unreachable("Not supported in SIMD-only mode");
12947 }
12948 
12949 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12950                                                      SourceLocation Loc,
12951                                                      unsigned IVSize,
12952                                                      bool IVSigned) {
12953   llvm_unreachable("Not supported in SIMD-only mode");
12954 }
12955 
12956 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12957                                               SourceLocation Loc,
12958                                               OpenMPDirectiveKind DKind) {
12959   llvm_unreachable("Not supported in SIMD-only mode");
12960 }
12961 
12962 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12963                                               SourceLocation Loc,
12964                                               unsigned IVSize, bool IVSigned,
12965                                               Address IL, Address LB,
12966                                               Address UB, Address ST) {
12967   llvm_unreachable("Not supported in SIMD-only mode");
12968 }
12969 
12970 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12971                                                llvm::Value *NumThreads,
12972                                                SourceLocation Loc) {
12973   llvm_unreachable("Not supported in SIMD-only mode");
12974 }
12975 
12976 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12977                                              ProcBindKind ProcBind,
12978                                              SourceLocation Loc) {
12979   llvm_unreachable("Not supported in SIMD-only mode");
12980 }
12981 
12982 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12983                                                     const VarDecl *VD,
12984                                                     Address VDAddr,
12985                                                     SourceLocation Loc) {
12986   llvm_unreachable("Not supported in SIMD-only mode");
12987 }
12988 
12989 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12990     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12991     CodeGenFunction *CGF) {
12992   llvm_unreachable("Not supported in SIMD-only mode");
12993 }
12994 
12995 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12996     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12997   llvm_unreachable("Not supported in SIMD-only mode");
12998 }
12999 
13000 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13001                                     ArrayRef<const Expr *> Vars,
13002                                     SourceLocation Loc,
13003                                     llvm::AtomicOrdering AO) {
13004   llvm_unreachable("Not supported in SIMD-only mode");
13005 }
13006 
13007 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13008                                        const OMPExecutableDirective &D,
13009                                        llvm::Function *TaskFunction,
13010                                        QualType SharedsTy, Address Shareds,
13011                                        const Expr *IfCond,
13012                                        const OMPTaskDataTy &Data) {
13013   llvm_unreachable("Not supported in SIMD-only mode");
13014 }
13015 
13016 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13017     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13018     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13019     const Expr *IfCond, const OMPTaskDataTy &Data) {
13020   llvm_unreachable("Not supported in SIMD-only mode");
13021 }
13022 
13023 void CGOpenMPSIMDRuntime::emitReduction(
13024     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13025     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13026     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13027   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13028   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13029                                  ReductionOps, Options);
13030 }
13031 
13032 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13033     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13034     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13035   llvm_unreachable("Not supported in SIMD-only mode");
13036 }
13037 
13038 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13039                                                 SourceLocation Loc,
13040                                                 bool IsWorksharingReduction) {
13041   llvm_unreachable("Not supported in SIMD-only mode");
13042 }
13043 
13044 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13045                                                   SourceLocation Loc,
13046                                                   ReductionCodeGen &RCG,
13047                                                   unsigned N) {
13048   llvm_unreachable("Not supported in SIMD-only mode");
13049 }
13050 
13051 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13052                                                   SourceLocation Loc,
13053                                                   llvm::Value *ReductionsPtr,
13054                                                   LValue SharedLVal) {
13055   llvm_unreachable("Not supported in SIMD-only mode");
13056 }
13057 
13058 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13059                                            SourceLocation Loc,
13060                                            const OMPTaskDataTy &Data) {
13061   llvm_unreachable("Not supported in SIMD-only mode");
13062 }
13063 
13064 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13065     CodeGenFunction &CGF, SourceLocation Loc,
13066     OpenMPDirectiveKind CancelRegion) {
13067   llvm_unreachable("Not supported in SIMD-only mode");
13068 }
13069 
13070 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13071                                          SourceLocation Loc, const Expr *IfCond,
13072                                          OpenMPDirectiveKind CancelRegion) {
13073   llvm_unreachable("Not supported in SIMD-only mode");
13074 }
13075 
13076 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13077     const OMPExecutableDirective &D, StringRef ParentName,
13078     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13079     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13080   llvm_unreachable("Not supported in SIMD-only mode");
13081 }
13082 
13083 void CGOpenMPSIMDRuntime::emitTargetCall(
13084     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13085     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13086     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13087     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13088                                      const OMPLoopDirective &D)>
13089         SizeEmitter) {
13090   llvm_unreachable("Not supported in SIMD-only mode");
13091 }
13092 
13093 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13094   llvm_unreachable("Not supported in SIMD-only mode");
13095 }
13096 
13097 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13098   llvm_unreachable("Not supported in SIMD-only mode");
13099 }
13100 
13101 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13102   return false;
13103 }
13104 
13105 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13106                                         const OMPExecutableDirective &D,
13107                                         SourceLocation Loc,
13108                                         llvm::Function *OutlinedFn,
13109                                         ArrayRef<llvm::Value *> CapturedVars) {
13110   llvm_unreachable("Not supported in SIMD-only mode");
13111 }
13112 
13113 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13114                                              const Expr *NumTeams,
13115                                              const Expr *ThreadLimit,
13116                                              SourceLocation Loc) {
13117   llvm_unreachable("Not supported in SIMD-only mode");
13118 }
13119 
13120 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13121     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13122     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13123   llvm_unreachable("Not supported in SIMD-only mode");
13124 }
13125 
13126 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13127     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13128     const Expr *Device) {
13129   llvm_unreachable("Not supported in SIMD-only mode");
13130 }
13131 
13132 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13133                                            const OMPLoopDirective &D,
13134                                            ArrayRef<Expr *> NumIterations) {
13135   llvm_unreachable("Not supported in SIMD-only mode");
13136 }
13137 
13138 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13139                                               const OMPDependClause *C) {
13140   llvm_unreachable("Not supported in SIMD-only mode");
13141 }
13142 
13143 const VarDecl *
13144 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13145                                         const VarDecl *NativeParam) const {
13146   llvm_unreachable("Not supported in SIMD-only mode");
13147 }
13148 
13149 Address
13150 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13151                                          const VarDecl *NativeParam,
13152                                          const VarDecl *TargetParam) const {
13153   llvm_unreachable("Not supported in SIMD-only mode");
13154 }
13155