1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/Format.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <numeric>
43 
44 using namespace clang;
45 using namespace CodeGen;
46 using namespace llvm::omp;
47 
48 namespace {
49 /// Base class for handling code generation inside OpenMP regions.
50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51 public:
52   /// Kinds of OpenMP regions used in codegen.
53   enum CGOpenMPRegionKind {
54     /// Region with outlined function for standalone 'parallel'
55     /// directive.
56     ParallelOutlinedRegion,
57     /// Region with outlined function for standalone 'task' directive.
58     TaskOutlinedRegion,
59     /// Region for constructs that do not require function outlining,
60     /// like 'for', 'sections', 'atomic' etc. directives.
61     InlinedRegion,
62     /// Region with outlined function for standalone 'target' directive.
63     TargetRegion,
64   };
65 
66   CGOpenMPRegionInfo(const CapturedStmt &CS,
67                      const CGOpenMPRegionKind RegionKind,
68                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69                      bool HasCancel)
70       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72 
73   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75                      bool HasCancel)
76       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77         Kind(Kind), HasCancel(HasCancel) {}
78 
79   /// Get a variable or parameter for storing global thread id
80   /// inside OpenMP construct.
81   virtual const VarDecl *getThreadIDVariable() const = 0;
82 
83   /// Emit the captured statement body.
84   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85 
86   /// Get an LValue for the current ThreadID variable.
87   /// \return LValue for thread id variable. This LValue always has type int32*.
88   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89 
90   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91 
92   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93 
94   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95 
96   bool hasCancel() const { return HasCancel; }
97 
98   static bool classof(const CGCapturedStmtInfo *Info) {
99     return Info->getKind() == CR_OpenMP;
100   }
101 
102   ~CGOpenMPRegionInfo() override = default;
103 
104 protected:
105   CGOpenMPRegionKind RegionKind;
106   RegionCodeGenTy CodeGen;
107   OpenMPDirectiveKind Kind;
108   bool HasCancel;
109 };
110 
111 /// API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115                              const RegionCodeGenTy &CodeGen,
116                              OpenMPDirectiveKind Kind, bool HasCancel,
117                              StringRef HelperName)
118       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119                            HasCancel),
120         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
122   }
123 
124   /// Get a variable or parameter for storing global thread id
125   /// inside OpenMP construct.
126   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127 
128   /// Get the name of the capture helper.
129   StringRef getHelperName() const override { return HelperName; }
130 
131   static bool classof(const CGCapturedStmtInfo *Info) {
132     return CGOpenMPRegionInfo::classof(Info) &&
133            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134                ParallelOutlinedRegion;
135   }
136 
137 private:
138   /// A variable or parameter storing global thread id for OpenMP
139   /// constructs.
140   const VarDecl *ThreadIDVar;
141   StringRef HelperName;
142 };
143 
144 /// API for captured statement code generation in OpenMP constructs.
145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146 public:
147   class UntiedTaskActionTy final : public PrePostActionTy {
148     bool Untied;
149     const VarDecl *PartIDVar;
150     const RegionCodeGenTy UntiedCodeGen;
151     llvm::SwitchInst *UntiedSwitch = nullptr;
152 
153   public:
154     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155                        const RegionCodeGenTy &UntiedCodeGen)
156         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157     void Enter(CodeGenFunction &CGF) override {
158       if (Untied) {
159         // Emit task switching point.
160         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         llvm::Value *Res =
164             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167         CGF.EmitBlock(DoneBB);
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171                               CGF.Builder.GetInsertBlock());
172         emitUntiedSwitch(CGF);
173       }
174     }
175     void emitUntiedSwitch(CodeGenFunction &CGF) const {
176       if (Untied) {
177         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178             CGF.GetAddrOfLocalVar(PartIDVar),
179             PartIDVar->getType()->castAs<PointerType>());
180         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181                               PartIdLVal);
182         UntiedCodeGen(CGF);
183         CodeGenFunction::JumpDest CurPoint =
184             CGF.getJumpDestInCurrentScope(".untied.next.");
185         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188                               CGF.Builder.GetInsertBlock());
189         CGF.EmitBranchThroughCleanup(CurPoint);
190         CGF.EmitBlock(CurPoint.getBlock());
191       }
192     }
193     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194   };
195   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196                                  const VarDecl *ThreadIDVar,
197                                  const RegionCodeGenTy &CodeGen,
198                                  OpenMPDirectiveKind Kind, bool HasCancel,
199                                  const UntiedTaskActionTy &Action)
200       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201         ThreadIDVar(ThreadIDVar), Action(Action) {
202     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
203   }
204 
205   /// Get a variable or parameter for storing global thread id
206   /// inside OpenMP construct.
207   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208 
209   /// Get an LValue for the current ThreadID variable.
210   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211 
212   /// Get the name of the capture helper.
213   StringRef getHelperName() const override { return ".omp_outlined."; }
214 
215   void emitUntiedSwitch(CodeGenFunction &CGF) override {
216     Action.emitUntiedSwitch(CGF);
217   }
218 
219   static bool classof(const CGCapturedStmtInfo *Info) {
220     return CGOpenMPRegionInfo::classof(Info) &&
221            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222                TaskOutlinedRegion;
223   }
224 
225 private:
226   /// A variable or parameter storing global thread id for OpenMP
227   /// constructs.
228   const VarDecl *ThreadIDVar;
229   /// Action for emitting code for untied tasks.
230   const UntiedTaskActionTy &Action;
231 };
232 
233 /// API for inlined captured statement code generation in OpenMP
234 /// constructs.
235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236 public:
237   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238                             const RegionCodeGenTy &CodeGen,
239                             OpenMPDirectiveKind Kind, bool HasCancel)
240       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241         OldCSI(OldCSI),
242         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243 
244   // Retrieve the value of the context parameter.
245   llvm::Value *getContextValue() const override {
246     if (OuterRegionInfo)
247       return OuterRegionInfo->getContextValue();
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   void setContextValue(llvm::Value *V) override {
252     if (OuterRegionInfo) {
253       OuterRegionInfo->setContextValue(V);
254       return;
255     }
256     llvm_unreachable("No context value for inlined OpenMP region");
257   }
258 
259   /// Lookup the captured field decl for a variable.
260   const FieldDecl *lookup(const VarDecl *VD) const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->lookup(VD);
263     // If there is no outer outlined region,no need to lookup in a list of
264     // captured variables, we can use the original one.
265     return nullptr;
266   }
267 
268   FieldDecl *getThisFieldDecl() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThisFieldDecl();
271     return nullptr;
272   }
273 
274   /// Get a variable or parameter for storing global thread id
275   /// inside OpenMP construct.
276   const VarDecl *getThreadIDVariable() const override {
277     if (OuterRegionInfo)
278       return OuterRegionInfo->getThreadIDVariable();
279     return nullptr;
280   }
281 
282   /// Get an LValue for the current ThreadID variable.
283   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284     if (OuterRegionInfo)
285       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286     llvm_unreachable("No LValue for inlined OpenMP construct");
287   }
288 
289   /// Get the name of the capture helper.
290   StringRef getHelperName() const override {
291     if (auto *OuterRegionInfo = getOldCSI())
292       return OuterRegionInfo->getHelperName();
293     llvm_unreachable("No helper name for inlined OpenMP construct");
294   }
295 
296   void emitUntiedSwitch(CodeGenFunction &CGF) override {
297     if (OuterRegionInfo)
298       OuterRegionInfo->emitUntiedSwitch(CGF);
299   }
300 
301   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302 
303   static bool classof(const CGCapturedStmtInfo *Info) {
304     return CGOpenMPRegionInfo::classof(Info) &&
305            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306   }
307 
308   ~CGOpenMPInlinedRegionInfo() override = default;
309 
310 private:
311   /// CodeGen info about outer OpenMP region.
312   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313   CGOpenMPRegionInfo *OuterRegionInfo;
314 };
315 
316 /// API for captured statement code generation in OpenMP target
317 /// constructs. For this captures, implicit parameters are used instead of the
318 /// captured fields. The name of the target region has to be unique in a given
319 /// application so it is provided by the client, because only the client has
320 /// the information to generate that.
321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322 public:
323   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
325       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326                            /*HasCancel=*/false),
327         HelperName(HelperName) {}
328 
329   /// This is unused for target regions because each starts executing
330   /// with a single thread.
331   const VarDecl *getThreadIDVariable() const override { return nullptr; }
332 
333   /// Get the name of the capture helper.
334   StringRef getHelperName() const override { return HelperName; }
335 
336   static bool classof(const CGCapturedStmtInfo *Info) {
337     return CGOpenMPRegionInfo::classof(Info) &&
338            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339   }
340 
341 private:
342   StringRef HelperName;
343 };
344 
345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346   llvm_unreachable("No codegen for expressions");
347 }
348 /// API for generation of expressions captured in a innermost OpenMP
349 /// region.
350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351 public:
352   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354                                   OMPD_unknown,
355                                   /*HasCancel=*/false),
356         PrivScope(CGF) {
357     // Make sure the globals captured in the provided statement are local by
358     // using the privatization logic. We assume the same variable is not
359     // captured more than once.
360     for (const auto &C : CS.captures()) {
361       if (!C.capturesVariable() && !C.capturesVariableByCopy())
362         continue;
363 
364       const VarDecl *VD = C.getCapturedVar();
365       if (VD->isLocalVarDeclOrParm())
366         continue;
367 
368       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369                       /*RefersToEnclosingVariableOrCapture=*/false,
370                       VD->getType().getNonReferenceType(), VK_LValue,
371                       C.getLocation());
372       PrivScope.addPrivate(
373           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 namespace {
481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
482 /// Values for bit flags for marking which requires clauses have been used.
483 enum OpenMPOffloadingRequiresDirFlags : int64_t {
484   /// flag undefined.
485   OMP_REQ_UNDEFINED               = 0x000,
486   /// no requires clause present.
487   OMP_REQ_NONE                    = 0x001,
488   /// reverse_offload clause.
489   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
490   /// unified_address clause.
491   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
492   /// unified_shared_memory clause.
493   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
494   /// dynamic_allocators clause.
495   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
496   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
497 };
498 
499 enum OpenMPOffloadingReservedDeviceIDs {
500   /// Device ID if the device was not defined, runtime should get it
501   /// from environment variables in the spec.
502   OMP_DEVICEID_UNDEF = -1,
503 };
504 } // anonymous namespace
505 
506 /// Describes ident structure that describes a source location.
507 /// All descriptions are taken from
508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509 /// Original structure:
510 /// typedef struct ident {
511 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
512 ///                                  see above  */
513 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
514 ///                                  KMP_IDENT_KMPC identifies this union
515 ///                                  member  */
516 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
517 ///                                  see above */
518 ///#if USE_ITT_BUILD
519 ///                            /*  but currently used for storing
520 ///                                region-specific ITT */
521 ///                            /*  contextual information. */
522 ///#endif /* USE_ITT_BUILD */
523 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
524 ///                                 C++  */
525 ///    char const *psource;    /**< String describing the source location.
526 ///                            The string is composed of semi-colon separated
527 //                             fields which describe the source file,
528 ///                            the function and a pair of line numbers that
529 ///                            delimit the construct.
530 ///                             */
531 /// } ident_t;
532 enum IdentFieldIndex {
533   /// might be used in Fortran
534   IdentField_Reserved_1,
535   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536   IdentField_Flags,
537   /// Not really used in Fortran any more
538   IdentField_Reserved_2,
539   /// Source[4] in Fortran, do not use for C++
540   IdentField_Reserved_3,
541   /// String describing the source location. The string is composed of
542   /// semi-colon separated fields which describe the source file, the function
543   /// and a pair of line numbers that delimit the construct.
544   IdentField_PSource
545 };
546 
547 /// Schedule types for 'omp for' loops (these enumerators are taken from
548 /// the enum sched_type in kmp.h).
549 enum OpenMPSchedType {
550   /// Lower bound for default (unordered) versions.
551   OMP_sch_lower = 32,
552   OMP_sch_static_chunked = 33,
553   OMP_sch_static = 34,
554   OMP_sch_dynamic_chunked = 35,
555   OMP_sch_guided_chunked = 36,
556   OMP_sch_runtime = 37,
557   OMP_sch_auto = 38,
558   /// static with chunk adjustment (e.g., simd)
559   OMP_sch_static_balanced_chunked = 45,
560   /// Lower bound for 'ordered' versions.
561   OMP_ord_lower = 64,
562   OMP_ord_static_chunked = 65,
563   OMP_ord_static = 66,
564   OMP_ord_dynamic_chunked = 67,
565   OMP_ord_guided_chunked = 68,
566   OMP_ord_runtime = 69,
567   OMP_ord_auto = 70,
568   OMP_sch_default = OMP_sch_static,
569   /// dist_schedule types
570   OMP_dist_sch_static_chunked = 91,
571   OMP_dist_sch_static = 92,
572   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573   /// Set if the monotonic schedule modifier was present.
574   OMP_sch_modifier_monotonic = (1 << 29),
575   /// Set if the nonmonotonic schedule modifier was present.
576   OMP_sch_modifier_nonmonotonic = (1 << 30),
577 };
578 
579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580 /// region.
581 class CleanupTy final : public EHScopeStack::Cleanup {
582   PrePostActionTy *Action;
583 
584 public:
585   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587     if (!CGF.HaveInsertPoint())
588       return;
589     Action->Exit(CGF);
590   }
591 };
592 
593 } // anonymous namespace
594 
595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596   CodeGenFunction::RunCleanupsScope Scope(CGF);
597   if (PrePostAction) {
598     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599     Callback(CodeGen, CGF, *PrePostAction);
600   } else {
601     PrePostActionTy Action;
602     Callback(CodeGen, CGF, Action);
603   }
604 }
605 
606 /// Check if the combiner is a call to UDR combiner and if it is so return the
607 /// UDR decl used for reduction.
608 static const OMPDeclareReductionDecl *
609 getReductionInit(const Expr *ReductionOp) {
610   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612       if (const auto *DRE =
613               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615           return DRD;
616   return nullptr;
617 }
618 
619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620                                              const OMPDeclareReductionDecl *DRD,
621                                              const Expr *InitOp,
622                                              Address Private, Address Original,
623                                              QualType Ty) {
624   if (DRD->getInitializer()) {
625     std::pair<llvm::Function *, llvm::Function *> Reduction =
626         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627     const auto *CE = cast<CallExpr>(InitOp);
628     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631     const auto *LHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633     const auto *RHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637                             [=]() { return Private; });
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639                             [=]() { return Original; });
640     (void)PrivateScope.Privatize();
641     RValue Func = RValue::get(Reduction.second);
642     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643     CGF.EmitIgnoredExpr(InitOp);
644   } else {
645     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647     auto *GV = new llvm::GlobalVariable(
648         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649         llvm::GlobalValue::PrivateLinkage, Init, Name);
650     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651     RValue InitRVal;
652     switch (CGF.getEvaluationKind(Ty)) {
653     case TEK_Scalar:
654       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655       break;
656     case TEK_Complex:
657       InitRVal =
658           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659       break;
660     case TEK_Aggregate: {
661       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664                            /*IsInitializer=*/false);
665       return;
666     }
667     }
668     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671                          /*IsInitializer=*/false);
672   }
673 }
674 
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681                                  QualType Type, bool EmitDeclareReductionInit,
682                                  const Expr *Init,
683                                  const OMPDeclareReductionDecl *DRD,
684                                  Address SrcAddr = Address::invalid()) {
685   // Perform element-by-element initialization.
686   QualType ElementTy;
687 
688   // Drill down to the base element type on both arrays.
689   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691   DestAddr =
692       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
693   if (DRD)
694     SrcAddr =
695         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
696 
697   llvm::Value *SrcBegin = nullptr;
698   if (DRD)
699     SrcBegin = SrcAddr.getPointer();
700   llvm::Value *DestBegin = DestAddr.getPointer();
701   // Cast from pointer to array type to pointer to single element.
702   llvm::Value *DestEnd =
703       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
704   // The basic structure here is a while-do loop.
705   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
706   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
707   llvm::Value *IsEmpty =
708       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
709   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
710 
711   // Enter the loop body, making that address the current address.
712   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
713   CGF.EmitBlock(BodyBB);
714 
715   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
716 
717   llvm::PHINode *SrcElementPHI = nullptr;
718   Address SrcElementCurrent = Address::invalid();
719   if (DRD) {
720     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
721                                           "omp.arraycpy.srcElementPast");
722     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
723     SrcElementCurrent =
724         Address(SrcElementPHI,
725                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726   }
727   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
728       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
729   DestElementPHI->addIncoming(DestBegin, EntryBB);
730   Address DestElementCurrent =
731       Address(DestElementPHI,
732               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
733 
734   // Emit copy.
735   {
736     CodeGenFunction::RunCleanupsScope InitScope(CGF);
737     if (EmitDeclareReductionInit) {
738       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
739                                        SrcElementCurrent, ElementTy);
740     } else
741       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
742                            /*IsInitializer=*/false);
743   }
744 
745   if (DRD) {
746     // Shift the address forward by one element.
747     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
748         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
749         "omp.arraycpy.dest.element");
750     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
751   }
752 
753   // Shift the address forward by one element.
754   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
755       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
756       "omp.arraycpy.dest.element");
757   // Check whether we've reached the end.
758   llvm::Value *Done =
759       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
760   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
761   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
762 
763   // Done.
764   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
765 }
766 
767 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
768   return CGF.EmitOMPSharedLValue(E);
769 }
770 
771 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
772                                             const Expr *E) {
773   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
774     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
775   return LValue();
776 }
777 
778 void ReductionCodeGen::emitAggregateInitialization(
779     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
780     const OMPDeclareReductionDecl *DRD) {
781   // Emit VarDecl with copy init for arrays.
782   // Get the address of the original variable captured in current
783   // captured region.
784   const auto *PrivateVD =
785       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
786   bool EmitDeclareReductionInit =
787       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
788   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
789                        EmitDeclareReductionInit,
790                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
791                                                 : PrivateVD->getInit(),
792                        DRD, SharedLVal.getAddress(CGF));
793 }
794 
795 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
796                                    ArrayRef<const Expr *> Origs,
797                                    ArrayRef<const Expr *> Privates,
798                                    ArrayRef<const Expr *> ReductionOps) {
799   ClausesData.reserve(Shareds.size());
800   SharedAddresses.reserve(Shareds.size());
801   Sizes.reserve(Shareds.size());
802   BaseDecls.reserve(Shareds.size());
803   const auto *IOrig = Origs.begin();
804   const auto *IPriv = Privates.begin();
805   const auto *IRed = ReductionOps.begin();
806   for (const Expr *Ref : Shareds) {
807     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
808     std::advance(IOrig, 1);
809     std::advance(IPriv, 1);
810     std::advance(IRed, 1);
811   }
812 }
813 
814 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
815   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
816          "Number of generated lvalues must be exactly N.");
817   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
818   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
819   SharedAddresses.emplace_back(First, Second);
820   if (ClausesData[N].Shared == ClausesData[N].Ref) {
821     OrigAddresses.emplace_back(First, Second);
822   } else {
823     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
824     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
825     OrigAddresses.emplace_back(First, Second);
826   }
827 }
828 
829 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
830   const auto *PrivateVD =
831       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
832   QualType PrivateType = PrivateVD->getType();
833   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
834   if (!PrivateType->isVariablyModifiedType()) {
835     Sizes.emplace_back(
836         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
837         nullptr);
838     return;
839   }
840   llvm::Value *Size;
841   llvm::Value *SizeInChars;
842   auto *ElemType =
843       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
844           ->getElementType();
845   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
846   if (AsArraySection) {
847     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
848                                      OrigAddresses[N].first.getPointer(CGF));
849     Size = CGF.Builder.CreateNUWAdd(
850         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
851     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
852   } else {
853     SizeInChars =
854         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
855     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
856   }
857   Sizes.emplace_back(SizeInChars, Size);
858   CodeGenFunction::OpaqueValueMapping OpaqueMap(
859       CGF,
860       cast<OpaqueValueExpr>(
861           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
862       RValue::get(Size));
863   CGF.EmitVariablyModifiedType(PrivateType);
864 }
865 
866 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
867                                          llvm::Value *Size) {
868   const auto *PrivateVD =
869       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
870   QualType PrivateType = PrivateVD->getType();
871   if (!PrivateType->isVariablyModifiedType()) {
872     assert(!Size && !Sizes[N].second &&
873            "Size should be nullptr for non-variably modified reduction "
874            "items.");
875     return;
876   }
877   CodeGenFunction::OpaqueValueMapping OpaqueMap(
878       CGF,
879       cast<OpaqueValueExpr>(
880           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
881       RValue::get(Size));
882   CGF.EmitVariablyModifiedType(PrivateType);
883 }
884 
885 void ReductionCodeGen::emitInitialization(
886     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
887     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
888   assert(SharedAddresses.size() > N && "No variable was generated");
889   const auto *PrivateVD =
890       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
891   const OMPDeclareReductionDecl *DRD =
892       getReductionInit(ClausesData[N].ReductionOp);
893   QualType PrivateType = PrivateVD->getType();
894   PrivateAddr = CGF.Builder.CreateElementBitCast(
895       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
896   QualType SharedType = SharedAddresses[N].first.getType();
897   SharedLVal = CGF.MakeAddrLValue(
898       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
899                                        CGF.ConvertTypeForMem(SharedType)),
900       SharedType, SharedAddresses[N].first.getBaseInfo(),
901       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
902   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
903     if (DRD && DRD->getInitializer())
904       (void)DefaultInit(CGF);
905     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
906   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
907     (void)DefaultInit(CGF);
908     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
909                                      PrivateAddr, SharedLVal.getAddress(CGF),
910                                      SharedLVal.getType());
911   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
912              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
913     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
914                          PrivateVD->getType().getQualifiers(),
915                          /*IsInitializer=*/false);
916   }
917 }
918 
919 bool ReductionCodeGen::needCleanups(unsigned N) {
920   const auto *PrivateVD =
921       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922   QualType PrivateType = PrivateVD->getType();
923   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
924   return DTorKind != QualType::DK_none;
925 }
926 
927 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
928                                     Address PrivateAddr) {
929   const auto *PrivateVD =
930       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
931   QualType PrivateType = PrivateVD->getType();
932   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
933   if (needCleanups(N)) {
934     PrivateAddr = CGF.Builder.CreateElementBitCast(
935         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
936     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
937   }
938 }
939 
940 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
941                           LValue BaseLV) {
942   BaseTy = BaseTy.getNonReferenceType();
943   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
944          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
945     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
946       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
947     } else {
948       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
949       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
950     }
951     BaseTy = BaseTy->getPointeeType();
952   }
953   return CGF.MakeAddrLValue(
954       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
955                                        CGF.ConvertTypeForMem(ElTy)),
956       BaseLV.getType(), BaseLV.getBaseInfo(),
957       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
958 }
959 
960 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
961                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
962                           llvm::Value *Addr) {
963   Address Tmp = Address::invalid();
964   Address TopTmp = Address::invalid();
965   Address MostTopTmp = Address::invalid();
966   BaseTy = BaseTy.getNonReferenceType();
967   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
968          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
969     Tmp = CGF.CreateMemTemp(BaseTy);
970     if (TopTmp.isValid())
971       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
972     else
973       MostTopTmp = Tmp;
974     TopTmp = Tmp;
975     BaseTy = BaseTy->getPointeeType();
976   }
977   llvm::Type *Ty = BaseLVType;
978   if (Tmp.isValid())
979     Ty = Tmp.getElementType();
980   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
981   if (Tmp.isValid()) {
982     CGF.Builder.CreateStore(Addr, Tmp);
983     return MostTopTmp;
984   }
985   return Address(Addr, BaseLVAlignment);
986 }
987 
988 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
989   const VarDecl *OrigVD = nullptr;
990   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
991     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
992     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
993       Base = TempOASE->getBase()->IgnoreParenImpCasts();
994     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
995       Base = TempASE->getBase()->IgnoreParenImpCasts();
996     DE = cast<DeclRefExpr>(Base);
997     OrigVD = cast<VarDecl>(DE->getDecl());
998   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
999     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1000     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1001       Base = TempASE->getBase()->IgnoreParenImpCasts();
1002     DE = cast<DeclRefExpr>(Base);
1003     OrigVD = cast<VarDecl>(DE->getDecl());
1004   }
1005   return OrigVD;
1006 }
1007 
1008 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1009                                                Address PrivateAddr) {
1010   const DeclRefExpr *DE;
1011   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1012     BaseDecls.emplace_back(OrigVD);
1013     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1014     LValue BaseLValue =
1015         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1016                     OriginalBaseLValue);
1017     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1018     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1019         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1020     llvm::Value *PrivatePointer =
1021         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1022             PrivateAddr.getPointer(), SharedAddr.getType());
1023     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1024         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1025     return castToBase(CGF, OrigVD->getType(),
1026                       SharedAddresses[N].first.getType(),
1027                       OriginalBaseLValue.getAddress(CGF).getType(),
1028                       OriginalBaseLValue.getAlignment(), Ptr);
1029   }
1030   BaseDecls.emplace_back(
1031       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1032   return PrivateAddr;
1033 }
1034 
1035 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1036   const OMPDeclareReductionDecl *DRD =
1037       getReductionInit(ClausesData[N].ReductionOp);
1038   return DRD && DRD->getInitializer();
1039 }
1040 
1041 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1042   return CGF.EmitLoadOfPointerLValue(
1043       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1044       getThreadIDVariable()->getType()->castAs<PointerType>());
1045 }
1046 
1047 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1048   if (!CGF.HaveInsertPoint())
1049     return;
1050   // 1.2.2 OpenMP Language Terminology
1051   // Structured block - An executable statement with a single entry at the
1052   // top and a single exit at the bottom.
1053   // The point of exit cannot be a branch out of the structured block.
1054   // longjmp() and throw() must not violate the entry/exit criteria.
1055   CGF.EHStack.pushTerminate();
1056   if (S)
1057     CGF.incrementProfileCounter(S);
1058   CodeGen(CGF);
1059   CGF.EHStack.popTerminate();
1060 }
1061 
1062 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1063     CodeGenFunction &CGF) {
1064   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1065                             getThreadIDVariable()->getType(),
1066                             AlignmentSource::Decl);
1067 }
1068 
1069 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1070                                        QualType FieldTy) {
1071   auto *Field = FieldDecl::Create(
1072       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1073       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1074       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1075   Field->setAccess(AS_public);
1076   DC->addDecl(Field);
1077   return Field;
1078 }
1079 
1080 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1081                                  StringRef Separator)
1082     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1083       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1084   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1085 
1086   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1087   OMPBuilder.initialize();
1088   loadOffloadInfoMetadata();
1089 }
1090 
1091 void CGOpenMPRuntime::clear() {
1092   InternalVars.clear();
1093   // Clean non-target variable declarations possibly used only in debug info.
1094   for (const auto &Data : EmittedNonTargetVariables) {
1095     if (!Data.getValue().pointsToAliveValue())
1096       continue;
1097     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1098     if (!GV)
1099       continue;
1100     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1101       continue;
1102     GV->eraseFromParent();
1103   }
1104 }
1105 
1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1107   SmallString<128> Buffer;
1108   llvm::raw_svector_ostream OS(Buffer);
1109   StringRef Sep = FirstSeparator;
1110   for (StringRef Part : Parts) {
1111     OS << Sep << Part;
1112     Sep = Separator;
1113   }
1114   return std::string(OS.str());
1115 }
1116 
1117 static llvm::Function *
1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1119                           const Expr *CombinerInitializer, const VarDecl *In,
1120                           const VarDecl *Out, bool IsCombiner) {
1121   // void .omp_combiner.(Ty *in, Ty *out);
1122   ASTContext &C = CGM.getContext();
1123   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1124   FunctionArgList Args;
1125   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1126                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1127   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1128                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1129   Args.push_back(&OmpOutParm);
1130   Args.push_back(&OmpInParm);
1131   const CGFunctionInfo &FnInfo =
1132       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1133   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1134   std::string Name = CGM.getOpenMPRuntime().getName(
1135       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1136   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1137                                     Name, &CGM.getModule());
1138   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1139   if (CGM.getLangOpts().Optimize) {
1140     Fn->removeFnAttr(llvm::Attribute::NoInline);
1141     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1142     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1143   }
1144   CodeGenFunction CGF(CGM);
1145   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1146   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1147   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1148                     Out->getLocation());
1149   CodeGenFunction::OMPPrivateScope Scope(CGF);
1150   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1151   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1156   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1157     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1158         .getAddress(CGF);
1159   });
1160   (void)Scope.Privatize();
1161   if (!IsCombiner && Out->hasInit() &&
1162       !CGF.isTrivialInitializer(Out->getInit())) {
1163     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1164                          Out->getType().getQualifiers(),
1165                          /*IsInitializer=*/true);
1166   }
1167   if (CombinerInitializer)
1168     CGF.EmitIgnoredExpr(CombinerInitializer);
1169   Scope.ForceCleanup();
1170   CGF.FinishFunction();
1171   return Fn;
1172 }
1173 
1174 void CGOpenMPRuntime::emitUserDefinedReduction(
1175     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1176   if (UDRMap.count(D) > 0)
1177     return;
1178   llvm::Function *Combiner = emitCombinerOrInitializer(
1179       CGM, D->getType(), D->getCombiner(),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1181       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1182       /*IsCombiner=*/true);
1183   llvm::Function *Initializer = nullptr;
1184   if (const Expr *Init = D->getInitializer()) {
1185     Initializer = emitCombinerOrInitializer(
1186         CGM, D->getType(),
1187         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1188                                                                      : nullptr,
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1190         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1191         /*IsCombiner=*/false);
1192   }
1193   UDRMap.try_emplace(D, Combiner, Initializer);
1194   if (CGF) {
1195     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1196     Decls.second.push_back(D);
1197   }
1198 }
1199 
1200 std::pair<llvm::Function *, llvm::Function *>
1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1202   auto I = UDRMap.find(D);
1203   if (I != UDRMap.end())
1204     return I->second;
1205   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1206   return UDRMap.lookup(D);
1207 }
1208 
1209 namespace {
1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1211 // Builder if one is present.
1212 struct PushAndPopStackRAII {
1213   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1214                       bool HasCancel, llvm::omp::Directive Kind)
1215       : OMPBuilder(OMPBuilder) {
1216     if (!OMPBuilder)
1217       return;
1218 
1219     // The following callback is the crucial part of clangs cleanup process.
1220     //
1221     // NOTE:
1222     // Once the OpenMPIRBuilder is used to create parallel regions (and
1223     // similar), the cancellation destination (Dest below) is determined via
1224     // IP. That means if we have variables to finalize we split the block at IP,
1225     // use the new block (=BB) as destination to build a JumpDest (via
1226     // getJumpDestInCurrentScope(BB)) which then is fed to
1227     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1228     // to push & pop an FinalizationInfo object.
1229     // The FiniCB will still be needed but at the point where the
1230     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1231     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1232       assert(IP.getBlock()->end() == IP.getPoint() &&
1233              "Clang CG should cause non-terminated block!");
1234       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1235       CGF.Builder.restoreIP(IP);
1236       CodeGenFunction::JumpDest Dest =
1237           CGF.getOMPCancelDestination(OMPD_parallel);
1238       CGF.EmitBranchThroughCleanup(Dest);
1239     };
1240 
1241     // TODO: Remove this once we emit parallel regions through the
1242     //       OpenMPIRBuilder as it can do this setup internally.
1243     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1244     OMPBuilder->pushFinalizationCB(std::move(FI));
1245   }
1246   ~PushAndPopStackRAII() {
1247     if (OMPBuilder)
1248       OMPBuilder->popFinalizationCB();
1249   }
1250   llvm::OpenMPIRBuilder *OMPBuilder;
1251 };
1252 } // namespace
1253 
1254 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1255     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1256     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1257     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1258   assert(ThreadIDVar->getType()->isPointerType() &&
1259          "thread id variable must be of type kmp_int32 *");
1260   CodeGenFunction CGF(CGM, true);
1261   bool HasCancel = false;
1262   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1263     HasCancel = OPD->hasCancel();
1264   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1265     HasCancel = OPD->hasCancel();
1266   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1267     HasCancel = OPSD->hasCancel();
1268   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1269     HasCancel = OPFD->hasCancel();
1270   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1273     HasCancel = OPFD->hasCancel();
1274   else if (const auto *OPFD =
1275                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1276     HasCancel = OPFD->hasCancel();
1277   else if (const auto *OPFD =
1278                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1279     HasCancel = OPFD->hasCancel();
1280 
1281   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1282   //       parallel region to make cancellation barriers work properly.
1283   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1284   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1285   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1286                                     HasCancel, OutlinedHelperName);
1287   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1288   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1289 }
1290 
1291 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1292     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1295   return emitParallelOrTeamsOutlinedFunction(
1296       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297 }
1298 
1299 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1300     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1302   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1303   return emitParallelOrTeamsOutlinedFunction(
1304       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1305 }
1306 
1307 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1308     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1309     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1310     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1311     bool Tied, unsigned &NumberOfParts) {
1312   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1313                                               PrePostActionTy &) {
1314     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1315     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1316     llvm::Value *TaskArgs[] = {
1317         UpLoc, ThreadID,
1318         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1319                                     TaskTVar->getType()->castAs<PointerType>())
1320             .getPointer(CGF)};
1321     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1322                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1323                         TaskArgs);
1324   };
1325   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1326                                                             UntiedCodeGen);
1327   CodeGen.setAction(Action);
1328   assert(!ThreadIDVar->getType()->isPointerType() &&
1329          "thread id variable must be of type kmp_int32 for tasks");
1330   const OpenMPDirectiveKind Region =
1331       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1332                                                       : OMPD_task;
1333   const CapturedStmt *CS = D.getCapturedStmt(Region);
1334   bool HasCancel = false;
1335   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1336     HasCancel = TD->hasCancel();
1337   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1338     HasCancel = TD->hasCancel();
1339   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1340     HasCancel = TD->hasCancel();
1341   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1342     HasCancel = TD->hasCancel();
1343 
1344   CodeGenFunction CGF(CGM, true);
1345   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1346                                         InnermostKind, HasCancel, Action);
1347   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1348   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1349   if (!Tied)
1350     NumberOfParts = Action.getNumberOfParts();
1351   return Res;
1352 }
1353 
1354 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1355                              const RecordDecl *RD, const CGRecordLayout &RL,
1356                              ArrayRef<llvm::Constant *> Data) {
1357   llvm::StructType *StructTy = RL.getLLVMType();
1358   unsigned PrevIdx = 0;
1359   ConstantInitBuilder CIBuilder(CGM);
1360   auto DI = Data.begin();
1361   for (const FieldDecl *FD : RD->fields()) {
1362     unsigned Idx = RL.getLLVMFieldNo(FD);
1363     // Fill the alignment.
1364     for (unsigned I = PrevIdx; I < Idx; ++I)
1365       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1366     PrevIdx = Idx + 1;
1367     Fields.add(*DI);
1368     ++DI;
1369   }
1370 }
1371 
1372 template <class... As>
1373 static llvm::GlobalVariable *
1374 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1375                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1376                    As &&... Args) {
1377   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1378   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1379   ConstantInitBuilder CIBuilder(CGM);
1380   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1381   buildStructValue(Fields, CGM, RD, RL, Data);
1382   return Fields.finishAndCreateGlobal(
1383       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1384       std::forward<As>(Args)...);
1385 }
1386 
1387 template <typename T>
1388 static void
1389 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1390                                          ArrayRef<llvm::Constant *> Data,
1391                                          T &Parent) {
1392   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1393   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1394   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1395   buildStructValue(Fields, CGM, RD, RL, Data);
1396   Fields.finishAndAddTo(Parent);
1397 }
1398 
1399 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1400                                              bool AtCurrentPoint) {
1401   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1402   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1403 
1404   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1405   if (AtCurrentPoint) {
1406     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1407         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1408   } else {
1409     Elem.second.ServiceInsertPt =
1410         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1411     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1412   }
1413 }
1414 
1415 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1416   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1417   if (Elem.second.ServiceInsertPt) {
1418     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1419     Elem.second.ServiceInsertPt = nullptr;
1420     Ptr->eraseFromParent();
1421   }
1422 }
1423 
1424 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1425                                                   SourceLocation Loc,
1426                                                   SmallString<128> &Buffer) {
1427   llvm::raw_svector_ostream OS(Buffer);
1428   // Build debug location
1429   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1430   OS << ";" << PLoc.getFilename() << ";";
1431   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1432     OS << FD->getQualifiedNameAsString();
1433   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1434   return OS.str();
1435 }
1436 
1437 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1438                                                  SourceLocation Loc,
1439                                                  unsigned Flags) {
1440   llvm::Constant *SrcLocStr;
1441   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1442       Loc.isInvalid()) {
1443     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1444   } else {
1445     std::string FunctionName = "";
1446     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1447       FunctionName = FD->getQualifiedNameAsString();
1448     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1449     const char *FileName = PLoc.getFilename();
1450     unsigned Line = PLoc.getLine();
1451     unsigned Column = PLoc.getColumn();
1452     SrcLocStr =
1453         OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1454   }
1455   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1456   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1457                                      Reserved2Flags);
1458 }
1459 
1460 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1461                                           SourceLocation Loc) {
1462   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1464   // the clang invariants used below might be broken.
1465   if (CGM.getLangOpts().OpenMPIRBuilder) {
1466     SmallString<128> Buffer;
1467     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1468     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1469         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1470     return OMPBuilder.getOrCreateThreadID(
1471         OMPBuilder.getOrCreateIdent(SrcLocStr));
1472   }
1473 
1474   llvm::Value *ThreadID = nullptr;
1475   // Check whether we've already cached a load of the thread id in this
1476   // function.
1477   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1478   if (I != OpenMPLocThreadIDMap.end()) {
1479     ThreadID = I->second.ThreadID;
1480     if (ThreadID != nullptr)
1481       return ThreadID;
1482   }
1483   // If exceptions are enabled, do not use parameter to avoid possible crash.
1484   if (auto *OMPRegionInfo =
1485           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1486     if (OMPRegionInfo->getThreadIDVariable()) {
1487       // Check if this an outlined function with thread id passed as argument.
1488       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1489       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1490       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1491           !CGF.getLangOpts().CXXExceptions ||
1492           CGF.Builder.GetInsertBlock() == TopBlock ||
1493           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1494           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1495               TopBlock ||
1496           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1497               CGF.Builder.GetInsertBlock()) {
1498         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1499         // If value loaded in entry block, cache it and use it everywhere in
1500         // function.
1501         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1502           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1503           Elem.second.ThreadID = ThreadID;
1504         }
1505         return ThreadID;
1506       }
1507     }
1508   }
1509 
1510   // This is not an outlined function region - need to call __kmpc_int32
1511   // kmpc_global_thread_num(ident_t *loc).
1512   // Generate thread id value and cache this value for use across the
1513   // function.
1514   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1515   if (!Elem.second.ServiceInsertPt)
1516     setLocThreadIdInsertPt(CGF);
1517   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1518   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1519   llvm::CallInst *Call = CGF.Builder.CreateCall(
1520       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1521                                             OMPRTL___kmpc_global_thread_num),
1522       emitUpdateLocation(CGF, Loc));
1523   Call->setCallingConv(CGF.getRuntimeCC());
1524   Elem.second.ThreadID = Call;
1525   return Call;
1526 }
1527 
1528 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1529   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1530   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1531     clearLocThreadIdInsertPt(CGF);
1532     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1533   }
1534   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1535     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1536       UDRMap.erase(D);
1537     FunctionUDRMap.erase(CGF.CurFn);
1538   }
1539   auto I = FunctionUDMMap.find(CGF.CurFn);
1540   if (I != FunctionUDMMap.end()) {
1541     for(const auto *D : I->second)
1542       UDMMap.erase(D);
1543     FunctionUDMMap.erase(I);
1544   }
1545   LastprivateConditionalToTypes.erase(CGF.CurFn);
1546   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1547 }
1548 
1549 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1550   return OMPBuilder.IdentPtr;
1551 }
1552 
1553 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1554   if (!Kmpc_MicroTy) {
1555     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1556     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1557                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1558     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1559   }
1560   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1561 }
1562 
1563 llvm::FunctionCallee
1564 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1565                                              bool IsGPUDistribute) {
1566   assert((IVSize == 32 || IVSize == 64) &&
1567          "IV size is not compatible with the omp runtime");
1568   StringRef Name;
1569   if (IsGPUDistribute)
1570     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1571                                     : "__kmpc_distribute_static_init_4u")
1572                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1573                                     : "__kmpc_distribute_static_init_8u");
1574   else
1575     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1576                                     : "__kmpc_for_static_init_4u")
1577                         : (IVSigned ? "__kmpc_for_static_init_8"
1578                                     : "__kmpc_for_static_init_8u");
1579 
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582   llvm::Type *TypeParams[] = {
1583     getIdentTyPointerTy(),                     // loc
1584     CGM.Int32Ty,                               // tid
1585     CGM.Int32Ty,                               // schedtype
1586     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1587     PtrTy,                                     // p_lower
1588     PtrTy,                                     // p_upper
1589     PtrTy,                                     // p_stride
1590     ITy,                                       // incr
1591     ITy                                        // chunk
1592   };
1593   auto *FnTy =
1594       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1595   return CGM.CreateRuntimeFunction(FnTy, Name);
1596 }
1597 
1598 llvm::FunctionCallee
1599 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1600   assert((IVSize == 32 || IVSize == 64) &&
1601          "IV size is not compatible with the omp runtime");
1602   StringRef Name =
1603       IVSize == 32
1604           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1605           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1606   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1607   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1608                                CGM.Int32Ty,           // tid
1609                                CGM.Int32Ty,           // schedtype
1610                                ITy,                   // lower
1611                                ITy,                   // upper
1612                                ITy,                   // stride
1613                                ITy                    // chunk
1614   };
1615   auto *FnTy =
1616       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1617   return CGM.CreateRuntimeFunction(FnTy, Name);
1618 }
1619 
1620 llvm::FunctionCallee
1621 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1622   assert((IVSize == 32 || IVSize == 64) &&
1623          "IV size is not compatible with the omp runtime");
1624   StringRef Name =
1625       IVSize == 32
1626           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1627           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1628   llvm::Type *TypeParams[] = {
1629       getIdentTyPointerTy(), // loc
1630       CGM.Int32Ty,           // tid
1631   };
1632   auto *FnTy =
1633       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1634   return CGM.CreateRuntimeFunction(FnTy, Name);
1635 }
1636 
1637 llvm::FunctionCallee
1638 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1639   assert((IVSize == 32 || IVSize == 64) &&
1640          "IV size is not compatible with the omp runtime");
1641   StringRef Name =
1642       IVSize == 32
1643           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1644           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1645   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1646   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1647   llvm::Type *TypeParams[] = {
1648     getIdentTyPointerTy(),                     // loc
1649     CGM.Int32Ty,                               // tid
1650     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1651     PtrTy,                                     // p_lower
1652     PtrTy,                                     // p_upper
1653     PtrTy                                      // p_stride
1654   };
1655   auto *FnTy =
1656       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1657   return CGM.CreateRuntimeFunction(FnTy, Name);
1658 }
1659 
1660 /// Obtain information that uniquely identifies a target entry. This
1661 /// consists of the file and device IDs as well as line number associated with
1662 /// the relevant entry source location.
1663 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1664                                      unsigned &DeviceID, unsigned &FileID,
1665                                      unsigned &LineNum) {
1666   SourceManager &SM = C.getSourceManager();
1667 
1668   // The loc should be always valid and have a file ID (the user cannot use
1669   // #pragma directives in macros)
1670 
1671   assert(Loc.isValid() && "Source location is expected to be always valid.");
1672 
1673   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1674   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1675 
1676   llvm::sys::fs::UniqueID ID;
1677   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1678     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1679     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1680     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1681       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1682           << PLoc.getFilename() << EC.message();
1683   }
1684 
1685   DeviceID = ID.getDevice();
1686   FileID = ID.getFile();
1687   LineNum = PLoc.getLine();
1688 }
1689 
1690 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1691   if (CGM.getLangOpts().OpenMPSimd)
1692     return Address::invalid();
1693   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1694       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1695   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1696               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1697                HasRequiresUnifiedSharedMemory))) {
1698     SmallString<64> PtrName;
1699     {
1700       llvm::raw_svector_ostream OS(PtrName);
1701       OS << CGM.getMangledName(GlobalDecl(VD));
1702       if (!VD->isExternallyVisible()) {
1703         unsigned DeviceID, FileID, Line;
1704         getTargetEntryUniqueInfo(CGM.getContext(),
1705                                  VD->getCanonicalDecl()->getBeginLoc(),
1706                                  DeviceID, FileID, Line);
1707         OS << llvm::format("_%x", FileID);
1708       }
1709       OS << "_decl_tgt_ref_ptr";
1710     }
1711     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1712     if (!Ptr) {
1713       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1714       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1715                                         PtrName);
1716 
1717       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1718       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1719 
1720       if (!CGM.getLangOpts().OpenMPIsDevice)
1721         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1722       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1723     }
1724     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1725   }
1726   return Address::invalid();
1727 }
1728 
1729 llvm::Constant *
1730 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1731   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1732          !CGM.getContext().getTargetInfo().isTLSSupported());
1733   // Lookup the entry, lazily creating it if necessary.
1734   std::string Suffix = getName({"cache", ""});
1735   return getOrCreateInternalVariable(
1736       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1737 }
1738 
1739 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1740                                                 const VarDecl *VD,
1741                                                 Address VDAddr,
1742                                                 SourceLocation Loc) {
1743   if (CGM.getLangOpts().OpenMPUseTLS &&
1744       CGM.getContext().getTargetInfo().isTLSSupported())
1745     return VDAddr;
1746 
1747   llvm::Type *VarTy = VDAddr.getElementType();
1748   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1749                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1750                                                        CGM.Int8PtrTy),
1751                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1752                          getOrCreateThreadPrivateCache(VD)};
1753   return Address(CGF.EmitRuntimeCall(
1754                      OMPBuilder.getOrCreateRuntimeFunction(
1755                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1756                      Args),
1757                  VDAddr.getAlignment());
1758 }
1759 
1760 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1761     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1762     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1763   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1764   // library.
1765   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1766   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1767                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1768                       OMPLoc);
1769   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1770   // to register constructor/destructor for variable.
1771   llvm::Value *Args[] = {
1772       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1773       Ctor, CopyCtor, Dtor};
1774   CGF.EmitRuntimeCall(
1775       OMPBuilder.getOrCreateRuntimeFunction(
1776           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1777       Args);
1778 }
1779 
1780 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1781     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1782     bool PerformInit, CodeGenFunction *CGF) {
1783   if (CGM.getLangOpts().OpenMPUseTLS &&
1784       CGM.getContext().getTargetInfo().isTLSSupported())
1785     return nullptr;
1786 
1787   VD = VD->getDefinition(CGM.getContext());
1788   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1789     QualType ASTTy = VD->getType();
1790 
1791     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1792     const Expr *Init = VD->getAnyInitializer();
1793     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1794       // Generate function that re-emits the declaration's initializer into the
1795       // threadprivate copy of the variable VD
1796       CodeGenFunction CtorCGF(CGM);
1797       FunctionArgList Args;
1798       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1799                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1800                             ImplicitParamDecl::Other);
1801       Args.push_back(&Dst);
1802 
1803       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1804           CGM.getContext().VoidPtrTy, Args);
1805       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1806       std::string Name = getName({"__kmpc_global_ctor_", ""});
1807       llvm::Function *Fn =
1808           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1809       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1810                             Args, Loc, Loc);
1811       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1812           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1813           CGM.getContext().VoidPtrTy, Dst.getLocation());
1814       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1815       Arg = CtorCGF.Builder.CreateElementBitCast(
1816           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1817       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1818                                /*IsInitializer=*/true);
1819       ArgVal = CtorCGF.EmitLoadOfScalar(
1820           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1821           CGM.getContext().VoidPtrTy, Dst.getLocation());
1822       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1823       CtorCGF.FinishFunction();
1824       Ctor = Fn;
1825     }
1826     if (VD->getType().isDestructedType() != QualType::DK_none) {
1827       // Generate function that emits destructor call for the threadprivate copy
1828       // of the variable VD
1829       CodeGenFunction DtorCGF(CGM);
1830       FunctionArgList Args;
1831       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1832                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1833                             ImplicitParamDecl::Other);
1834       Args.push_back(&Dst);
1835 
1836       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1837           CGM.getContext().VoidTy, Args);
1838       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1839       std::string Name = getName({"__kmpc_global_dtor_", ""});
1840       llvm::Function *Fn =
1841           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1842       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1843       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1844                             Loc, Loc);
1845       // Create a scope with an artificial location for the body of this function.
1846       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1847       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1848           DtorCGF.GetAddrOfLocalVar(&Dst),
1849           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1850       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1851                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1852                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1853       DtorCGF.FinishFunction();
1854       Dtor = Fn;
1855     }
1856     // Do not emit init function if it is not required.
1857     if (!Ctor && !Dtor)
1858       return nullptr;
1859 
1860     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1861     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1862                                                /*isVarArg=*/false)
1863                            ->getPointerTo();
1864     // Copying constructor for the threadprivate variable.
1865     // Must be NULL - reserved by runtime, but currently it requires that this
1866     // parameter is always NULL. Otherwise it fires assertion.
1867     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1868     if (Ctor == nullptr) {
1869       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1870                                              /*isVarArg=*/false)
1871                          ->getPointerTo();
1872       Ctor = llvm::Constant::getNullValue(CtorTy);
1873     }
1874     if (Dtor == nullptr) {
1875       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1876                                              /*isVarArg=*/false)
1877                          ->getPointerTo();
1878       Dtor = llvm::Constant::getNullValue(DtorTy);
1879     }
1880     if (!CGF) {
1881       auto *InitFunctionTy =
1882           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1883       std::string Name = getName({"__omp_threadprivate_init_", ""});
1884       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1885           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1886       CodeGenFunction InitCGF(CGM);
1887       FunctionArgList ArgList;
1888       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1889                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1890                             Loc, Loc);
1891       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1892       InitCGF.FinishFunction();
1893       return InitFunction;
1894     }
1895     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1896   }
1897   return nullptr;
1898 }
1899 
1900 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1901                                                      llvm::GlobalVariable *Addr,
1902                                                      bool PerformInit) {
1903   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1904       !CGM.getLangOpts().OpenMPIsDevice)
1905     return false;
1906   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1907       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1908   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1909       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1910        HasRequiresUnifiedSharedMemory))
1911     return CGM.getLangOpts().OpenMPIsDevice;
1912   VD = VD->getDefinition(CGM.getContext());
1913   assert(VD && "Unknown VarDecl");
1914 
1915   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1916     return CGM.getLangOpts().OpenMPIsDevice;
1917 
1918   QualType ASTTy = VD->getType();
1919   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1920 
1921   // Produce the unique prefix to identify the new target regions. We use
1922   // the source location of the variable declaration which we know to not
1923   // conflict with any target region.
1924   unsigned DeviceID;
1925   unsigned FileID;
1926   unsigned Line;
1927   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1928   SmallString<128> Buffer, Out;
1929   {
1930     llvm::raw_svector_ostream OS(Buffer);
1931     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1932        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1933   }
1934 
1935   const Expr *Init = VD->getAnyInitializer();
1936   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1937     llvm::Constant *Ctor;
1938     llvm::Constant *ID;
1939     if (CGM.getLangOpts().OpenMPIsDevice) {
1940       // Generate function that re-emits the declaration's initializer into
1941       // the threadprivate copy of the variable VD
1942       CodeGenFunction CtorCGF(CGM);
1943 
1944       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1945       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1946       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1947           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1948       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1949       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1950                             FunctionArgList(), Loc, Loc);
1951       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1952       CtorCGF.EmitAnyExprToMem(Init,
1953                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1954                                Init->getType().getQualifiers(),
1955                                /*IsInitializer=*/true);
1956       CtorCGF.FinishFunction();
1957       Ctor = Fn;
1958       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1959       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1960     } else {
1961       Ctor = new llvm::GlobalVariable(
1962           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1963           llvm::GlobalValue::PrivateLinkage,
1964           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1965       ID = Ctor;
1966     }
1967 
1968     // Register the information for the entry associated with the constructor.
1969     Out.clear();
1970     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1971         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1972         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1973   }
1974   if (VD->getType().isDestructedType() != QualType::DK_none) {
1975     llvm::Constant *Dtor;
1976     llvm::Constant *ID;
1977     if (CGM.getLangOpts().OpenMPIsDevice) {
1978       // Generate function that emits destructor call for the threadprivate
1979       // copy of the variable VD
1980       CodeGenFunction DtorCGF(CGM);
1981 
1982       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1983       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1984       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1985           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1986       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1987       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1988                             FunctionArgList(), Loc, Loc);
1989       // Create a scope with an artificial location for the body of this
1990       // function.
1991       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1992       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1993                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1994                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1995       DtorCGF.FinishFunction();
1996       Dtor = Fn;
1997       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1998       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1999     } else {
2000       Dtor = new llvm::GlobalVariable(
2001           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2002           llvm::GlobalValue::PrivateLinkage,
2003           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2004       ID = Dtor;
2005     }
2006     // Register the information for the entry associated with the destructor.
2007     Out.clear();
2008     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2009         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2010         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2011   }
2012   return CGM.getLangOpts().OpenMPIsDevice;
2013 }
2014 
2015 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2016                                                           QualType VarType,
2017                                                           StringRef Name) {
2018   std::string Suffix = getName({"artificial", ""});
2019   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2020   llvm::Value *GAddr =
2021       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2022   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2023       CGM.getTarget().isTLSSupported()) {
2024     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2025     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2026   }
2027   std::string CacheSuffix = getName({"cache", ""});
2028   llvm::Value *Args[] = {
2029       emitUpdateLocation(CGF, SourceLocation()),
2030       getThreadID(CGF, SourceLocation()),
2031       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2032       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2033                                 /*isSigned=*/false),
2034       getOrCreateInternalVariable(
2035           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2036   return Address(
2037       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2038           CGF.EmitRuntimeCall(
2039               OMPBuilder.getOrCreateRuntimeFunction(
2040                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2041               Args),
2042           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2043       CGM.getContext().getTypeAlignInChars(VarType));
2044 }
2045 
2046 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2047                                    const RegionCodeGenTy &ThenGen,
2048                                    const RegionCodeGenTy &ElseGen) {
2049   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2050 
2051   // If the condition constant folds and can be elided, try to avoid emitting
2052   // the condition and the dead arm of the if/else.
2053   bool CondConstant;
2054   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2055     if (CondConstant)
2056       ThenGen(CGF);
2057     else
2058       ElseGen(CGF);
2059     return;
2060   }
2061 
2062   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2063   // emit the conditional branch.
2064   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2065   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2066   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2067   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2068 
2069   // Emit the 'then' code.
2070   CGF.EmitBlock(ThenBlock);
2071   ThenGen(CGF);
2072   CGF.EmitBranch(ContBlock);
2073   // Emit the 'else' code if present.
2074   // There is no need to emit line number for unconditional branch.
2075   (void)ApplyDebugLocation::CreateEmpty(CGF);
2076   CGF.EmitBlock(ElseBlock);
2077   ElseGen(CGF);
2078   // There is no need to emit line number for unconditional branch.
2079   (void)ApplyDebugLocation::CreateEmpty(CGF);
2080   CGF.EmitBranch(ContBlock);
2081   // Emit the continuation block for code after the if.
2082   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2083 }
2084 
2085 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2086                                        llvm::Function *OutlinedFn,
2087                                        ArrayRef<llvm::Value *> CapturedVars,
2088                                        const Expr *IfCond) {
2089   if (!CGF.HaveInsertPoint())
2090     return;
2091   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2092   auto &M = CGM.getModule();
2093   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2094                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2095     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2096     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2097     llvm::Value *Args[] = {
2098         RTLoc,
2099         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2100         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2101     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2102     RealArgs.append(std::begin(Args), std::end(Args));
2103     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2104 
2105     llvm::FunctionCallee RTLFn =
2106         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2107     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2108   };
2109   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2110                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2111     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2112     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2113     // Build calls:
2114     // __kmpc_serialized_parallel(&Loc, GTid);
2115     llvm::Value *Args[] = {RTLoc, ThreadID};
2116     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2117                             M, OMPRTL___kmpc_serialized_parallel),
2118                         Args);
2119 
2120     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2121     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2122     Address ZeroAddrBound =
2123         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2124                                          /*Name=*/".bound.zero.addr");
2125     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2126     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2127     // ThreadId for serialized parallels is 0.
2128     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2129     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2130     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2131 
2132     // Ensure we do not inline the function. This is trivially true for the ones
2133     // passed to __kmpc_fork_call but the ones called in serialized regions
2134     // could be inlined. This is not a perfect but it is closer to the invariant
2135     // we want, namely, every data environment starts with a new function.
2136     // TODO: We should pass the if condition to the runtime function and do the
2137     //       handling there. Much cleaner code.
2138     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2139     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2140     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2141 
2142     // __kmpc_end_serialized_parallel(&Loc, GTid);
2143     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2144     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2145                             M, OMPRTL___kmpc_end_serialized_parallel),
2146                         EndArgs);
2147   };
2148   if (IfCond) {
2149     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2150   } else {
2151     RegionCodeGenTy ThenRCG(ThenGen);
2152     ThenRCG(CGF);
2153   }
2154 }
2155 
2156 // If we're inside an (outlined) parallel region, use the region info's
2157 // thread-ID variable (it is passed in a first argument of the outlined function
2158 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2159 // regular serial code region, get thread ID by calling kmp_int32
2160 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2161 // return the address of that temp.
2162 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2163                                              SourceLocation Loc) {
2164   if (auto *OMPRegionInfo =
2165           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2166     if (OMPRegionInfo->getThreadIDVariable())
2167       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2168 
2169   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2170   QualType Int32Ty =
2171       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2172   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2173   CGF.EmitStoreOfScalar(ThreadID,
2174                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2175 
2176   return ThreadIDTemp;
2177 }
2178 
2179 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2180     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2181   SmallString<256> Buffer;
2182   llvm::raw_svector_ostream Out(Buffer);
2183   Out << Name;
2184   StringRef RuntimeName = Out.str();
2185   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2186   if (Elem.second) {
2187     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2188            "OMP internal variable has different type than requested");
2189     return &*Elem.second;
2190   }
2191 
2192   return Elem.second = new llvm::GlobalVariable(
2193              CGM.getModule(), Ty, /*IsConstant*/ false,
2194              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2195              Elem.first(), /*InsertBefore=*/nullptr,
2196              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2197 }
2198 
2199 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2200   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2201   std::string Name = getName({Prefix, "var"});
2202   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2203 }
2204 
2205 namespace {
2206 /// Common pre(post)-action for different OpenMP constructs.
2207 class CommonActionTy final : public PrePostActionTy {
2208   llvm::FunctionCallee EnterCallee;
2209   ArrayRef<llvm::Value *> EnterArgs;
2210   llvm::FunctionCallee ExitCallee;
2211   ArrayRef<llvm::Value *> ExitArgs;
2212   bool Conditional;
2213   llvm::BasicBlock *ContBlock = nullptr;
2214 
2215 public:
2216   CommonActionTy(llvm::FunctionCallee EnterCallee,
2217                  ArrayRef<llvm::Value *> EnterArgs,
2218                  llvm::FunctionCallee ExitCallee,
2219                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2220       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2221         ExitArgs(ExitArgs), Conditional(Conditional) {}
2222   void Enter(CodeGenFunction &CGF) override {
2223     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2224     if (Conditional) {
2225       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2226       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2227       ContBlock = CGF.createBasicBlock("omp_if.end");
2228       // Generate the branch (If-stmt)
2229       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2230       CGF.EmitBlock(ThenBlock);
2231     }
2232   }
2233   void Done(CodeGenFunction &CGF) {
2234     // Emit the rest of blocks/branches
2235     CGF.EmitBranch(ContBlock);
2236     CGF.EmitBlock(ContBlock, true);
2237   }
2238   void Exit(CodeGenFunction &CGF) override {
2239     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2240   }
2241 };
2242 } // anonymous namespace
2243 
2244 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2245                                          StringRef CriticalName,
2246                                          const RegionCodeGenTy &CriticalOpGen,
2247                                          SourceLocation Loc, const Expr *Hint) {
2248   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2249   // CriticalOpGen();
2250   // __kmpc_end_critical(ident_t *, gtid, Lock);
2251   // Prepare arguments and build a call to __kmpc_critical
2252   if (!CGF.HaveInsertPoint())
2253     return;
2254   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2255                          getCriticalRegionLock(CriticalName)};
2256   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2257                                                 std::end(Args));
2258   if (Hint) {
2259     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2260         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2261   }
2262   CommonActionTy Action(
2263       OMPBuilder.getOrCreateRuntimeFunction(
2264           CGM.getModule(),
2265           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2266       EnterArgs,
2267       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2268                                             OMPRTL___kmpc_end_critical),
2269       Args);
2270   CriticalOpGen.setAction(Action);
2271   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2272 }
2273 
2274 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2275                                        const RegionCodeGenTy &MasterOpGen,
2276                                        SourceLocation Loc) {
2277   if (!CGF.HaveInsertPoint())
2278     return;
2279   // if(__kmpc_master(ident_t *, gtid)) {
2280   //   MasterOpGen();
2281   //   __kmpc_end_master(ident_t *, gtid);
2282   // }
2283   // Prepare arguments and build a call to __kmpc_master
2284   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2285   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2286                             CGM.getModule(), OMPRTL___kmpc_master),
2287                         Args,
2288                         OMPBuilder.getOrCreateRuntimeFunction(
2289                             CGM.getModule(), OMPRTL___kmpc_end_master),
2290                         Args,
2291                         /*Conditional=*/true);
2292   MasterOpGen.setAction(Action);
2293   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2294   Action.Done(CGF);
2295 }
2296 
2297 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2298                                        const RegionCodeGenTy &MaskedOpGen,
2299                                        SourceLocation Loc, const Expr *Filter) {
2300   if (!CGF.HaveInsertPoint())
2301     return;
2302   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2303   //   MaskedOpGen();
2304   //   __kmpc_end_masked(iden_t *, gtid);
2305   // }
2306   // Prepare arguments and build a call to __kmpc_masked
2307   llvm::Value *FilterVal = Filter
2308                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2309                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2310   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2311                          FilterVal};
2312   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2313                             getThreadID(CGF, Loc)};
2314   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2315                             CGM.getModule(), OMPRTL___kmpc_masked),
2316                         Args,
2317                         OMPBuilder.getOrCreateRuntimeFunction(
2318                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2319                         ArgsEnd,
2320                         /*Conditional=*/true);
2321   MaskedOpGen.setAction(Action);
2322   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2323   Action.Done(CGF);
2324 }
2325 
2326 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2327                                         SourceLocation Loc) {
2328   if (!CGF.HaveInsertPoint())
2329     return;
2330   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2331     OMPBuilder.createTaskyield(CGF.Builder);
2332   } else {
2333     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2334     llvm::Value *Args[] = {
2335         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2336         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2337     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2338                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2339                         Args);
2340   }
2341 
2342   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2343     Region->emitUntiedSwitch(CGF);
2344 }
2345 
2346 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2347                                           const RegionCodeGenTy &TaskgroupOpGen,
2348                                           SourceLocation Loc) {
2349   if (!CGF.HaveInsertPoint())
2350     return;
2351   // __kmpc_taskgroup(ident_t *, gtid);
2352   // TaskgroupOpGen();
2353   // __kmpc_end_taskgroup(ident_t *, gtid);
2354   // Prepare arguments and build a call to __kmpc_taskgroup
2355   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2356   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2357                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2358                         Args,
2359                         OMPBuilder.getOrCreateRuntimeFunction(
2360                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2361                         Args);
2362   TaskgroupOpGen.setAction(Action);
2363   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2364 }
2365 
2366 /// Given an array of pointers to variables, project the address of a
2367 /// given variable.
2368 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2369                                       unsigned Index, const VarDecl *Var) {
2370   // Pull out the pointer to the variable.
2371   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2372   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2373 
2374   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2375   Addr = CGF.Builder.CreateElementBitCast(
2376       Addr, CGF.ConvertTypeForMem(Var->getType()));
2377   return Addr;
2378 }
2379 
2380 static llvm::Value *emitCopyprivateCopyFunction(
2381     CodeGenModule &CGM, llvm::Type *ArgsType,
2382     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2383     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2384     SourceLocation Loc) {
2385   ASTContext &C = CGM.getContext();
2386   // void copy_func(void *LHSArg, void *RHSArg);
2387   FunctionArgList Args;
2388   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2389                            ImplicitParamDecl::Other);
2390   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2391                            ImplicitParamDecl::Other);
2392   Args.push_back(&LHSArg);
2393   Args.push_back(&RHSArg);
2394   const auto &CGFI =
2395       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2396   std::string Name =
2397       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2398   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2399                                     llvm::GlobalValue::InternalLinkage, Name,
2400                                     &CGM.getModule());
2401   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2402   Fn->setDoesNotRecurse();
2403   CodeGenFunction CGF(CGM);
2404   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2405   // Dest = (void*[n])(LHSArg);
2406   // Src = (void*[n])(RHSArg);
2407   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2408       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2409       ArgsType), CGF.getPointerAlign());
2410   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2411       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2412       ArgsType), CGF.getPointerAlign());
2413   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2414   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2415   // ...
2416   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2417   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2418     const auto *DestVar =
2419         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2420     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2421 
2422     const auto *SrcVar =
2423         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2424     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2425 
2426     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2427     QualType Type = VD->getType();
2428     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2429   }
2430   CGF.FinishFunction();
2431   return Fn;
2432 }
2433 
2434 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2435                                        const RegionCodeGenTy &SingleOpGen,
2436                                        SourceLocation Loc,
2437                                        ArrayRef<const Expr *> CopyprivateVars,
2438                                        ArrayRef<const Expr *> SrcExprs,
2439                                        ArrayRef<const Expr *> DstExprs,
2440                                        ArrayRef<const Expr *> AssignmentOps) {
2441   if (!CGF.HaveInsertPoint())
2442     return;
2443   assert(CopyprivateVars.size() == SrcExprs.size() &&
2444          CopyprivateVars.size() == DstExprs.size() &&
2445          CopyprivateVars.size() == AssignmentOps.size());
2446   ASTContext &C = CGM.getContext();
2447   // int32 did_it = 0;
2448   // if(__kmpc_single(ident_t *, gtid)) {
2449   //   SingleOpGen();
2450   //   __kmpc_end_single(ident_t *, gtid);
2451   //   did_it = 1;
2452   // }
2453   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2454   // <copy_func>, did_it);
2455 
2456   Address DidIt = Address::invalid();
2457   if (!CopyprivateVars.empty()) {
2458     // int32 did_it = 0;
2459     QualType KmpInt32Ty =
2460         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2461     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2462     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2463   }
2464   // Prepare arguments and build a call to __kmpc_single
2465   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2466   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2467                             CGM.getModule(), OMPRTL___kmpc_single),
2468                         Args,
2469                         OMPBuilder.getOrCreateRuntimeFunction(
2470                             CGM.getModule(), OMPRTL___kmpc_end_single),
2471                         Args,
2472                         /*Conditional=*/true);
2473   SingleOpGen.setAction(Action);
2474   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2475   if (DidIt.isValid()) {
2476     // did_it = 1;
2477     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2478   }
2479   Action.Done(CGF);
2480   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2481   // <copy_func>, did_it);
2482   if (DidIt.isValid()) {
2483     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2484     QualType CopyprivateArrayTy = C.getConstantArrayType(
2485         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2486         /*IndexTypeQuals=*/0);
2487     // Create a list of all private variables for copyprivate.
2488     Address CopyprivateList =
2489         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2490     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2491       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2492       CGF.Builder.CreateStore(
2493           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2494               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2495               CGF.VoidPtrTy),
2496           Elem);
2497     }
2498     // Build function that copies private values from single region to all other
2499     // threads in the corresponding parallel region.
2500     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2501         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2502         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2503     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2504     Address CL =
2505       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2506                                                       CGF.VoidPtrTy);
2507     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2508     llvm::Value *Args[] = {
2509         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2510         getThreadID(CGF, Loc),        // i32 <gtid>
2511         BufSize,                      // size_t <buf_size>
2512         CL.getPointer(),              // void *<copyprivate list>
2513         CpyFn,                        // void (*) (void *, void *) <copy_func>
2514         DidItVal                      // i32 did_it
2515     };
2516     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2517                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2518                         Args);
2519   }
2520 }
2521 
2522 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2523                                         const RegionCodeGenTy &OrderedOpGen,
2524                                         SourceLocation Loc, bool IsThreads) {
2525   if (!CGF.HaveInsertPoint())
2526     return;
2527   // __kmpc_ordered(ident_t *, gtid);
2528   // OrderedOpGen();
2529   // __kmpc_end_ordered(ident_t *, gtid);
2530   // Prepare arguments and build a call to __kmpc_ordered
2531   if (IsThreads) {
2532     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2533     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2534                               CGM.getModule(), OMPRTL___kmpc_ordered),
2535                           Args,
2536                           OMPBuilder.getOrCreateRuntimeFunction(
2537                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2538                           Args);
2539     OrderedOpGen.setAction(Action);
2540     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2541     return;
2542   }
2543   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2544 }
2545 
2546 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2547   unsigned Flags;
2548   if (Kind == OMPD_for)
2549     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2550   else if (Kind == OMPD_sections)
2551     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2552   else if (Kind == OMPD_single)
2553     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2554   else if (Kind == OMPD_barrier)
2555     Flags = OMP_IDENT_BARRIER_EXPL;
2556   else
2557     Flags = OMP_IDENT_BARRIER_IMPL;
2558   return Flags;
2559 }
2560 
2561 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2562     CodeGenFunction &CGF, const OMPLoopDirective &S,
2563     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2564   // Check if the loop directive is actually a doacross loop directive. In this
2565   // case choose static, 1 schedule.
2566   if (llvm::any_of(
2567           S.getClausesOfKind<OMPOrderedClause>(),
2568           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2569     ScheduleKind = OMPC_SCHEDULE_static;
2570     // Chunk size is 1 in this case.
2571     llvm::APInt ChunkSize(32, 1);
2572     ChunkExpr = IntegerLiteral::Create(
2573         CGF.getContext(), ChunkSize,
2574         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2575         SourceLocation());
2576   }
2577 }
2578 
2579 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2580                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2581                                       bool ForceSimpleCall) {
2582   // Check if we should use the OMPBuilder
2583   auto *OMPRegionInfo =
2584       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2585   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2586     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2587         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2588     return;
2589   }
2590 
2591   if (!CGF.HaveInsertPoint())
2592     return;
2593   // Build call __kmpc_cancel_barrier(loc, thread_id);
2594   // Build call __kmpc_barrier(loc, thread_id);
2595   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2596   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2597   // thread_id);
2598   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2599                          getThreadID(CGF, Loc)};
2600   if (OMPRegionInfo) {
2601     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2602       llvm::Value *Result = CGF.EmitRuntimeCall(
2603           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2604                                                 OMPRTL___kmpc_cancel_barrier),
2605           Args);
2606       if (EmitChecks) {
2607         // if (__kmpc_cancel_barrier()) {
2608         //   exit from construct;
2609         // }
2610         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2611         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2612         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2613         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2614         CGF.EmitBlock(ExitBB);
2615         //   exit from construct;
2616         CodeGenFunction::JumpDest CancelDestination =
2617             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2618         CGF.EmitBranchThroughCleanup(CancelDestination);
2619         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2620       }
2621       return;
2622     }
2623   }
2624   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2625                           CGM.getModule(), OMPRTL___kmpc_barrier),
2626                       Args);
2627 }
2628 
2629 /// Map the OpenMP loop schedule to the runtime enumeration.
2630 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2631                                           bool Chunked, bool Ordered) {
2632   switch (ScheduleKind) {
2633   case OMPC_SCHEDULE_static:
2634     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2635                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2636   case OMPC_SCHEDULE_dynamic:
2637     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2638   case OMPC_SCHEDULE_guided:
2639     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2640   case OMPC_SCHEDULE_runtime:
2641     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2642   case OMPC_SCHEDULE_auto:
2643     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2644   case OMPC_SCHEDULE_unknown:
2645     assert(!Chunked && "chunk was specified but schedule kind not known");
2646     return Ordered ? OMP_ord_static : OMP_sch_static;
2647   }
2648   llvm_unreachable("Unexpected runtime schedule");
2649 }
2650 
2651 /// Map the OpenMP distribute schedule to the runtime enumeration.
2652 static OpenMPSchedType
2653 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2654   // only static is allowed for dist_schedule
2655   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2656 }
2657 
2658 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2659                                          bool Chunked) const {
2660   OpenMPSchedType Schedule =
2661       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2662   return Schedule == OMP_sch_static;
2663 }
2664 
2665 bool CGOpenMPRuntime::isStaticNonchunked(
2666     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2667   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2668   return Schedule == OMP_dist_sch_static;
2669 }
2670 
2671 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2672                                       bool Chunked) const {
2673   OpenMPSchedType Schedule =
2674       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2675   return Schedule == OMP_sch_static_chunked;
2676 }
2677 
2678 bool CGOpenMPRuntime::isStaticChunked(
2679     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2680   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2681   return Schedule == OMP_dist_sch_static_chunked;
2682 }
2683 
2684 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2685   OpenMPSchedType Schedule =
2686       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2687   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2688   return Schedule != OMP_sch_static;
2689 }
2690 
2691 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2692                                   OpenMPScheduleClauseModifier M1,
2693                                   OpenMPScheduleClauseModifier M2) {
2694   int Modifier = 0;
2695   switch (M1) {
2696   case OMPC_SCHEDULE_MODIFIER_monotonic:
2697     Modifier = OMP_sch_modifier_monotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2700     Modifier = OMP_sch_modifier_nonmonotonic;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_simd:
2703     if (Schedule == OMP_sch_static_chunked)
2704       Schedule = OMP_sch_static_balanced_chunked;
2705     break;
2706   case OMPC_SCHEDULE_MODIFIER_last:
2707   case OMPC_SCHEDULE_MODIFIER_unknown:
2708     break;
2709   }
2710   switch (M2) {
2711   case OMPC_SCHEDULE_MODIFIER_monotonic:
2712     Modifier = OMP_sch_modifier_monotonic;
2713     break;
2714   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2715     Modifier = OMP_sch_modifier_nonmonotonic;
2716     break;
2717   case OMPC_SCHEDULE_MODIFIER_simd:
2718     if (Schedule == OMP_sch_static_chunked)
2719       Schedule = OMP_sch_static_balanced_chunked;
2720     break;
2721   case OMPC_SCHEDULE_MODIFIER_last:
2722   case OMPC_SCHEDULE_MODIFIER_unknown:
2723     break;
2724   }
2725   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2726   // If the static schedule kind is specified or if the ordered clause is
2727   // specified, and if the nonmonotonic modifier is not specified, the effect is
2728   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2729   // modifier is specified, the effect is as if the nonmonotonic modifier is
2730   // specified.
2731   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2732     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2733           Schedule == OMP_sch_static_balanced_chunked ||
2734           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2735           Schedule == OMP_dist_sch_static_chunked ||
2736           Schedule == OMP_dist_sch_static))
2737       Modifier = OMP_sch_modifier_nonmonotonic;
2738   }
2739   return Schedule | Modifier;
2740 }
2741 
2742 void CGOpenMPRuntime::emitForDispatchInit(
2743     CodeGenFunction &CGF, SourceLocation Loc,
2744     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2745     bool Ordered, const DispatchRTInput &DispatchValues) {
2746   if (!CGF.HaveInsertPoint())
2747     return;
2748   OpenMPSchedType Schedule = getRuntimeSchedule(
2749       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2750   assert(Ordered ||
2751          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2752           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2753           Schedule != OMP_sch_static_balanced_chunked));
2754   // Call __kmpc_dispatch_init(
2755   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2756   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2757   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2758 
2759   // If the Chunk was not specified in the clause - use default value 1.
2760   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2761                                             : CGF.Builder.getIntN(IVSize, 1);
2762   llvm::Value *Args[] = {
2763       emitUpdateLocation(CGF, Loc),
2764       getThreadID(CGF, Loc),
2765       CGF.Builder.getInt32(addMonoNonMonoModifier(
2766           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2767       DispatchValues.LB,                                     // Lower
2768       DispatchValues.UB,                                     // Upper
2769       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2770       Chunk                                                  // Chunk
2771   };
2772   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2773 }
2774 
2775 static void emitForStaticInitCall(
2776     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2777     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2778     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2779     const CGOpenMPRuntime::StaticRTInput &Values) {
2780   if (!CGF.HaveInsertPoint())
2781     return;
2782 
2783   assert(!Values.Ordered);
2784   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2785          Schedule == OMP_sch_static_balanced_chunked ||
2786          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2787          Schedule == OMP_dist_sch_static ||
2788          Schedule == OMP_dist_sch_static_chunked);
2789 
2790   // Call __kmpc_for_static_init(
2791   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2792   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2793   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2794   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2795   llvm::Value *Chunk = Values.Chunk;
2796   if (Chunk == nullptr) {
2797     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2798             Schedule == OMP_dist_sch_static) &&
2799            "expected static non-chunked schedule");
2800     // If the Chunk was not specified in the clause - use default value 1.
2801     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2802   } else {
2803     assert((Schedule == OMP_sch_static_chunked ||
2804             Schedule == OMP_sch_static_balanced_chunked ||
2805             Schedule == OMP_ord_static_chunked ||
2806             Schedule == OMP_dist_sch_static_chunked) &&
2807            "expected static chunked schedule");
2808   }
2809   llvm::Value *Args[] = {
2810       UpdateLocation,
2811       ThreadId,
2812       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2813                                                   M2)), // Schedule type
2814       Values.IL.getPointer(),                           // &isLastIter
2815       Values.LB.getPointer(),                           // &LB
2816       Values.UB.getPointer(),                           // &UB
2817       Values.ST.getPointer(),                           // &Stride
2818       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2819       Chunk                                             // Chunk
2820   };
2821   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2822 }
2823 
2824 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2825                                         SourceLocation Loc,
2826                                         OpenMPDirectiveKind DKind,
2827                                         const OpenMPScheduleTy &ScheduleKind,
2828                                         const StaticRTInput &Values) {
2829   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2830       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2831   assert(isOpenMPWorksharingDirective(DKind) &&
2832          "Expected loop-based or sections-based directive.");
2833   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2834                                              isOpenMPLoopDirective(DKind)
2835                                                  ? OMP_IDENT_WORK_LOOP
2836                                                  : OMP_IDENT_WORK_SECTIONS);
2837   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2838   llvm::FunctionCallee StaticInitFunction =
2839       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2840   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2841   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2842                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2843 }
2844 
2845 void CGOpenMPRuntime::emitDistributeStaticInit(
2846     CodeGenFunction &CGF, SourceLocation Loc,
2847     OpenMPDistScheduleClauseKind SchedKind,
2848     const CGOpenMPRuntime::StaticRTInput &Values) {
2849   OpenMPSchedType ScheduleNum =
2850       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2851   llvm::Value *UpdatedLocation =
2852       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2853   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2854   llvm::FunctionCallee StaticInitFunction;
2855   bool isGPUDistribute =
2856       CGM.getLangOpts().OpenMPIsDevice &&
2857       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2858   StaticInitFunction = createForStaticInitFunction(
2859       Values.IVSize, Values.IVSigned, isGPUDistribute);
2860 
2861   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2862                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2863                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2864 }
2865 
2866 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2867                                           SourceLocation Loc,
2868                                           OpenMPDirectiveKind DKind) {
2869   if (!CGF.HaveInsertPoint())
2870     return;
2871   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2872   llvm::Value *Args[] = {
2873       emitUpdateLocation(CGF, Loc,
2874                          isOpenMPDistributeDirective(DKind)
2875                              ? OMP_IDENT_WORK_DISTRIBUTE
2876                              : isOpenMPLoopDirective(DKind)
2877                                    ? OMP_IDENT_WORK_LOOP
2878                                    : OMP_IDENT_WORK_SECTIONS),
2879       getThreadID(CGF, Loc)};
2880   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2881   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2882       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2883     CGF.EmitRuntimeCall(
2884         OMPBuilder.getOrCreateRuntimeFunction(
2885             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2886         Args);
2887   else
2888     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2889                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2890                         Args);
2891 }
2892 
2893 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2894                                                  SourceLocation Loc,
2895                                                  unsigned IVSize,
2896                                                  bool IVSigned) {
2897   if (!CGF.HaveInsertPoint())
2898     return;
2899   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2900   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2901   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2902 }
2903 
2904 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2905                                           SourceLocation Loc, unsigned IVSize,
2906                                           bool IVSigned, Address IL,
2907                                           Address LB, Address UB,
2908                                           Address ST) {
2909   // Call __kmpc_dispatch_next(
2910   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2911   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2912   //          kmp_int[32|64] *p_stride);
2913   llvm::Value *Args[] = {
2914       emitUpdateLocation(CGF, Loc),
2915       getThreadID(CGF, Loc),
2916       IL.getPointer(), // &isLastIter
2917       LB.getPointer(), // &Lower
2918       UB.getPointer(), // &Upper
2919       ST.getPointer()  // &Stride
2920   };
2921   llvm::Value *Call =
2922       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2923   return CGF.EmitScalarConversion(
2924       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2925       CGF.getContext().BoolTy, Loc);
2926 }
2927 
2928 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2929                                            llvm::Value *NumThreads,
2930                                            SourceLocation Loc) {
2931   if (!CGF.HaveInsertPoint())
2932     return;
2933   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2934   llvm::Value *Args[] = {
2935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2936       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2937   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2938                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2939                       Args);
2940 }
2941 
2942 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2943                                          ProcBindKind ProcBind,
2944                                          SourceLocation Loc) {
2945   if (!CGF.HaveInsertPoint())
2946     return;
2947   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2948   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2949   llvm::Value *Args[] = {
2950       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2951       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2952   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2953                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2954                       Args);
2955 }
2956 
2957 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2958                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2959   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2960     OMPBuilder.createFlush(CGF.Builder);
2961   } else {
2962     if (!CGF.HaveInsertPoint())
2963       return;
2964     // Build call void __kmpc_flush(ident_t *loc)
2965     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2966                             CGM.getModule(), OMPRTL___kmpc_flush),
2967                         emitUpdateLocation(CGF, Loc));
2968   }
2969 }
2970 
2971 namespace {
2972 /// Indexes of fields for type kmp_task_t.
2973 enum KmpTaskTFields {
2974   /// List of shared variables.
2975   KmpTaskTShareds,
2976   /// Task routine.
2977   KmpTaskTRoutine,
2978   /// Partition id for the untied tasks.
2979   KmpTaskTPartId,
2980   /// Function with call of destructors for private variables.
2981   Data1,
2982   /// Task priority.
2983   Data2,
2984   /// (Taskloops only) Lower bound.
2985   KmpTaskTLowerBound,
2986   /// (Taskloops only) Upper bound.
2987   KmpTaskTUpperBound,
2988   /// (Taskloops only) Stride.
2989   KmpTaskTStride,
2990   /// (Taskloops only) Is last iteration flag.
2991   KmpTaskTLastIter,
2992   /// (Taskloops only) Reduction data.
2993   KmpTaskTReductions,
2994 };
2995 } // anonymous namespace
2996 
2997 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2998   return OffloadEntriesTargetRegion.empty() &&
2999          OffloadEntriesDeviceGlobalVar.empty();
3000 }
3001 
3002 /// Initialize target region entry.
3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3004     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3005                                     StringRef ParentName, unsigned LineNum,
3006                                     unsigned Order) {
3007   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3008                                              "only required for the device "
3009                                              "code generation.");
3010   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3011       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3012                                    OMPTargetRegionEntryTargetRegion);
3013   ++OffloadingEntriesNum;
3014 }
3015 
3016 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3017     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3018                                   StringRef ParentName, unsigned LineNum,
3019                                   llvm::Constant *Addr, llvm::Constant *ID,
3020                                   OMPTargetRegionEntryKind Flags) {
3021   // If we are emitting code for a target, the entry is already initialized,
3022   // only has to be registered.
3023   if (CGM.getLangOpts().OpenMPIsDevice) {
3024     // This could happen if the device compilation is invoked standalone.
3025     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3026       return;
3027     auto &Entry =
3028         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3029     Entry.setAddress(Addr);
3030     Entry.setID(ID);
3031     Entry.setFlags(Flags);
3032   } else {
3033     if (Flags ==
3034             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3035         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3036                                  /*IgnoreAddressId*/ true))
3037       return;
3038     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3039            "Target region entry already registered!");
3040     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3041     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3042     ++OffloadingEntriesNum;
3043   }
3044 }
3045 
3046 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3047     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3048     bool IgnoreAddressId) const {
3049   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3050   if (PerDevice == OffloadEntriesTargetRegion.end())
3051     return false;
3052   auto PerFile = PerDevice->second.find(FileID);
3053   if (PerFile == PerDevice->second.end())
3054     return false;
3055   auto PerParentName = PerFile->second.find(ParentName);
3056   if (PerParentName == PerFile->second.end())
3057     return false;
3058   auto PerLine = PerParentName->second.find(LineNum);
3059   if (PerLine == PerParentName->second.end())
3060     return false;
3061   // Fail if this entry is already registered.
3062   if (!IgnoreAddressId &&
3063       (PerLine->second.getAddress() || PerLine->second.getID()))
3064     return false;
3065   return true;
3066 }
3067 
3068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3069     const OffloadTargetRegionEntryInfoActTy &Action) {
3070   // Scan all target region entries and perform the provided action.
3071   for (const auto &D : OffloadEntriesTargetRegion)
3072     for (const auto &F : D.second)
3073       for (const auto &P : F.second)
3074         for (const auto &L : P.second)
3075           Action(D.first, F.first, P.first(), L.first, L.second);
3076 }
3077 
3078 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3079     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3080                                        OMPTargetGlobalVarEntryKind Flags,
3081                                        unsigned Order) {
3082   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3083                                              "only required for the device "
3084                                              "code generation.");
3085   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3086   ++OffloadingEntriesNum;
3087 }
3088 
3089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3090     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3091                                      CharUnits VarSize,
3092                                      OMPTargetGlobalVarEntryKind Flags,
3093                                      llvm::GlobalValue::LinkageTypes Linkage) {
3094   if (CGM.getLangOpts().OpenMPIsDevice) {
3095     // This could happen if the device compilation is invoked standalone.
3096     if (!hasDeviceGlobalVarEntryInfo(VarName))
3097       return;
3098     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3099     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3100       if (Entry.getVarSize().isZero()) {
3101         Entry.setVarSize(VarSize);
3102         Entry.setLinkage(Linkage);
3103       }
3104       return;
3105     }
3106     Entry.setVarSize(VarSize);
3107     Entry.setLinkage(Linkage);
3108     Entry.setAddress(Addr);
3109   } else {
3110     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3111       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3112       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3113              "Entry not initialized!");
3114       if (Entry.getVarSize().isZero()) {
3115         Entry.setVarSize(VarSize);
3116         Entry.setLinkage(Linkage);
3117       }
3118       return;
3119     }
3120     OffloadEntriesDeviceGlobalVar.try_emplace(
3121         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3122     ++OffloadingEntriesNum;
3123   }
3124 }
3125 
3126 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3127     actOnDeviceGlobalVarEntriesInfo(
3128         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3129   // Scan all target region entries and perform the provided action.
3130   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3131     Action(E.getKey(), E.getValue());
3132 }
3133 
3134 void CGOpenMPRuntime::createOffloadEntry(
3135     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3136     llvm::GlobalValue::LinkageTypes Linkage) {
3137   StringRef Name = Addr->getName();
3138   llvm::Module &M = CGM.getModule();
3139   llvm::LLVMContext &C = M.getContext();
3140 
3141   // Create constant string with the name.
3142   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3143 
3144   std::string StringName = getName({"omp_offloading", "entry_name"});
3145   auto *Str = new llvm::GlobalVariable(
3146       M, StrPtrInit->getType(), /*isConstant=*/true,
3147       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3148   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3149 
3150   llvm::Constant *Data[] = {
3151       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3152       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3153       llvm::ConstantInt::get(CGM.SizeTy, Size),
3154       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3155       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3156   std::string EntryName = getName({"omp_offloading", "entry", ""});
3157   llvm::GlobalVariable *Entry = createGlobalStruct(
3158       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3159       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3160 
3161   // The entry has to be created in the section the linker expects it to be.
3162   Entry->setSection("omp_offloading_entries");
3163 }
3164 
3165 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3166   // Emit the offloading entries and metadata so that the device codegen side
3167   // can easily figure out what to emit. The produced metadata looks like
3168   // this:
3169   //
3170   // !omp_offload.info = !{!1, ...}
3171   //
3172   // Right now we only generate metadata for function that contain target
3173   // regions.
3174 
3175   // If we are in simd mode or there are no entries, we don't need to do
3176   // anything.
3177   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3178     return;
3179 
3180   llvm::Module &M = CGM.getModule();
3181   llvm::LLVMContext &C = M.getContext();
3182   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3183                          SourceLocation, StringRef>,
3184               16>
3185       OrderedEntries(OffloadEntriesInfoManager.size());
3186   llvm::SmallVector<StringRef, 16> ParentFunctions(
3187       OffloadEntriesInfoManager.size());
3188 
3189   // Auxiliary methods to create metadata values and strings.
3190   auto &&GetMDInt = [this](unsigned V) {
3191     return llvm::ConstantAsMetadata::get(
3192         llvm::ConstantInt::get(CGM.Int32Ty, V));
3193   };
3194 
3195   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3196 
3197   // Create the offloading info metadata node.
3198   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3199 
3200   // Create function that emits metadata for each target region entry;
3201   auto &&TargetRegionMetadataEmitter =
3202       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3203        &GetMDString](
3204           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3205           unsigned Line,
3206           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3207         // Generate metadata for target regions. Each entry of this metadata
3208         // contains:
3209         // - Entry 0 -> Kind of this type of metadata (0).
3210         // - Entry 1 -> Device ID of the file where the entry was identified.
3211         // - Entry 2 -> File ID of the file where the entry was identified.
3212         // - Entry 3 -> Mangled name of the function where the entry was
3213         // identified.
3214         // - Entry 4 -> Line in the file where the entry was identified.
3215         // - Entry 5 -> Order the entry was created.
3216         // The first element of the metadata node is the kind.
3217         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3218                                  GetMDInt(FileID),      GetMDString(ParentName),
3219                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3220 
3221         SourceLocation Loc;
3222         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3223                   E = CGM.getContext().getSourceManager().fileinfo_end();
3224              I != E; ++I) {
3225           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3226               I->getFirst()->getUniqueID().getFile() == FileID) {
3227             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3228                 I->getFirst(), Line, 1);
3229             break;
3230           }
3231         }
3232         // Save this entry in the right position of the ordered entries array.
3233         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3234         ParentFunctions[E.getOrder()] = ParentName;
3235 
3236         // Add metadata to the named metadata node.
3237         MD->addOperand(llvm::MDNode::get(C, Ops));
3238       };
3239 
3240   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3241       TargetRegionMetadataEmitter);
3242 
3243   // Create function that emits metadata for each device global variable entry;
3244   auto &&DeviceGlobalVarMetadataEmitter =
3245       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3246        MD](StringRef MangledName,
3247            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3248                &E) {
3249         // Generate metadata for global variables. Each entry of this metadata
3250         // contains:
3251         // - Entry 0 -> Kind of this type of metadata (1).
3252         // - Entry 1 -> Mangled name of the variable.
3253         // - Entry 2 -> Declare target kind.
3254         // - Entry 3 -> Order the entry was created.
3255         // The first element of the metadata node is the kind.
3256         llvm::Metadata *Ops[] = {
3257             GetMDInt(E.getKind()), GetMDString(MangledName),
3258             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3259 
3260         // Save this entry in the right position of the ordered entries array.
3261         OrderedEntries[E.getOrder()] =
3262             std::make_tuple(&E, SourceLocation(), MangledName);
3263 
3264         // Add metadata to the named metadata node.
3265         MD->addOperand(llvm::MDNode::get(C, Ops));
3266       };
3267 
3268   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3269       DeviceGlobalVarMetadataEmitter);
3270 
3271   for (const auto &E : OrderedEntries) {
3272     assert(std::get<0>(E) && "All ordered entries must exist!");
3273     if (const auto *CE =
3274             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3275                 std::get<0>(E))) {
3276       if (!CE->getID() || !CE->getAddress()) {
3277         // Do not blame the entry if the parent funtion is not emitted.
3278         StringRef FnName = ParentFunctions[CE->getOrder()];
3279         if (!CGM.GetGlobalValue(FnName))
3280           continue;
3281         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3282             DiagnosticsEngine::Error,
3283             "Offloading entry for target region in %0 is incorrect: either the "
3284             "address or the ID is invalid.");
3285         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3286         continue;
3287       }
3288       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3289                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3290     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3291                                              OffloadEntryInfoDeviceGlobalVar>(
3292                    std::get<0>(E))) {
3293       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3294           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3295               CE->getFlags());
3296       switch (Flags) {
3297       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3298         if (CGM.getLangOpts().OpenMPIsDevice &&
3299             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3300           continue;
3301         if (!CE->getAddress()) {
3302           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303               DiagnosticsEngine::Error, "Offloading entry for declare target "
3304                                         "variable %0 is incorrect: the "
3305                                         "address is invalid.");
3306           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3307           continue;
3308         }
3309         // The vaiable has no definition - no need to add the entry.
3310         if (CE->getVarSize().isZero())
3311           continue;
3312         break;
3313       }
3314       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3315         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3316                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3317                "Declaret target link address is set.");
3318         if (CGM.getLangOpts().OpenMPIsDevice)
3319           continue;
3320         if (!CE->getAddress()) {
3321           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3322               DiagnosticsEngine::Error,
3323               "Offloading entry for declare target variable is incorrect: the "
3324               "address is invalid.");
3325           CGM.getDiags().Report(DiagID);
3326           continue;
3327         }
3328         break;
3329       }
3330       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3331                          CE->getVarSize().getQuantity(), Flags,
3332                          CE->getLinkage());
3333     } else {
3334       llvm_unreachable("Unsupported entry kind.");
3335     }
3336   }
3337 }
3338 
3339 /// Loads all the offload entries information from the host IR
3340 /// metadata.
3341 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3342   // If we are in target mode, load the metadata from the host IR. This code has
3343   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3344 
3345   if (!CGM.getLangOpts().OpenMPIsDevice)
3346     return;
3347 
3348   if (CGM.getLangOpts().OMPHostIRFile.empty())
3349     return;
3350 
3351   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3352   if (auto EC = Buf.getError()) {
3353     CGM.getDiags().Report(diag::err_cannot_open_file)
3354         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3355     return;
3356   }
3357 
3358   llvm::LLVMContext C;
3359   auto ME = expectedToErrorOrAndEmitErrors(
3360       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3361 
3362   if (auto EC = ME.getError()) {
3363     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3364         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3365     CGM.getDiags().Report(DiagID)
3366         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3367     return;
3368   }
3369 
3370   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3371   if (!MD)
3372     return;
3373 
3374   for (llvm::MDNode *MN : MD->operands()) {
3375     auto &&GetMDInt = [MN](unsigned Idx) {
3376       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3377       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3378     };
3379 
3380     auto &&GetMDString = [MN](unsigned Idx) {
3381       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3382       return V->getString();
3383     };
3384 
3385     switch (GetMDInt(0)) {
3386     default:
3387       llvm_unreachable("Unexpected metadata!");
3388       break;
3389     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3390         OffloadingEntryInfoTargetRegion:
3391       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3392           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3393           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3394           /*Order=*/GetMDInt(5));
3395       break;
3396     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3397         OffloadingEntryInfoDeviceGlobalVar:
3398       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3399           /*MangledName=*/GetMDString(1),
3400           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3401               /*Flags=*/GetMDInt(2)),
3402           /*Order=*/GetMDInt(3));
3403       break;
3404     }
3405   }
3406 }
3407 
3408 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3409   if (!KmpRoutineEntryPtrTy) {
3410     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3411     ASTContext &C = CGM.getContext();
3412     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3413     FunctionProtoType::ExtProtoInfo EPI;
3414     KmpRoutineEntryPtrQTy = C.getPointerType(
3415         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3416     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3417   }
3418 }
3419 
3420 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3421   // Make sure the type of the entry is already created. This is the type we
3422   // have to create:
3423   // struct __tgt_offload_entry{
3424   //   void      *addr;       // Pointer to the offload entry info.
3425   //                          // (function or global)
3426   //   char      *name;       // Name of the function or global.
3427   //   size_t     size;       // Size of the entry info (0 if it a function).
3428   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3429   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3430   // };
3431   if (TgtOffloadEntryQTy.isNull()) {
3432     ASTContext &C = CGM.getContext();
3433     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3434     RD->startDefinition();
3435     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3436     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3437     addFieldToRecordDecl(C, RD, C.getSizeType());
3438     addFieldToRecordDecl(
3439         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3440     addFieldToRecordDecl(
3441         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3442     RD->completeDefinition();
3443     RD->addAttr(PackedAttr::CreateImplicit(C));
3444     TgtOffloadEntryQTy = C.getRecordType(RD);
3445   }
3446   return TgtOffloadEntryQTy;
3447 }
3448 
3449 namespace {
3450 struct PrivateHelpersTy {
3451   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3452                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3453       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3454         PrivateElemInit(PrivateElemInit) {}
3455   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3456   const Expr *OriginalRef = nullptr;
3457   const VarDecl *Original = nullptr;
3458   const VarDecl *PrivateCopy = nullptr;
3459   const VarDecl *PrivateElemInit = nullptr;
3460   bool isLocalPrivate() const {
3461     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3462   }
3463 };
3464 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3465 } // anonymous namespace
3466 
3467 static bool isAllocatableDecl(const VarDecl *VD) {
3468   const VarDecl *CVD = VD->getCanonicalDecl();
3469   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3470     return false;
3471   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3472   // Use the default allocation.
3473   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3474             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3475            !AA->getAllocator());
3476 }
3477 
3478 static RecordDecl *
3479 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3480   if (!Privates.empty()) {
3481     ASTContext &C = CGM.getContext();
3482     // Build struct .kmp_privates_t. {
3483     //         /*  private vars  */
3484     //       };
3485     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3486     RD->startDefinition();
3487     for (const auto &Pair : Privates) {
3488       const VarDecl *VD = Pair.second.Original;
3489       QualType Type = VD->getType().getNonReferenceType();
3490       // If the private variable is a local variable with lvalue ref type,
3491       // allocate the pointer instead of the pointee type.
3492       if (Pair.second.isLocalPrivate()) {
3493         if (VD->getType()->isLValueReferenceType())
3494           Type = C.getPointerType(Type);
3495         if (isAllocatableDecl(VD))
3496           Type = C.getPointerType(Type);
3497       }
3498       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3499       if (VD->hasAttrs()) {
3500         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3501              E(VD->getAttrs().end());
3502              I != E; ++I)
3503           FD->addAttr(*I);
3504       }
3505     }
3506     RD->completeDefinition();
3507     return RD;
3508   }
3509   return nullptr;
3510 }
3511 
3512 static RecordDecl *
3513 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3514                          QualType KmpInt32Ty,
3515                          QualType KmpRoutineEntryPointerQTy) {
3516   ASTContext &C = CGM.getContext();
3517   // Build struct kmp_task_t {
3518   //         void *              shareds;
3519   //         kmp_routine_entry_t routine;
3520   //         kmp_int32           part_id;
3521   //         kmp_cmplrdata_t data1;
3522   //         kmp_cmplrdata_t data2;
3523   // For taskloops additional fields:
3524   //         kmp_uint64          lb;
3525   //         kmp_uint64          ub;
3526   //         kmp_int64           st;
3527   //         kmp_int32           liter;
3528   //         void *              reductions;
3529   //       };
3530   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3531   UD->startDefinition();
3532   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3533   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3534   UD->completeDefinition();
3535   QualType KmpCmplrdataTy = C.getRecordType(UD);
3536   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3537   RD->startDefinition();
3538   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3539   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3540   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3541   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3542   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3543   if (isOpenMPTaskLoopDirective(Kind)) {
3544     QualType KmpUInt64Ty =
3545         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3546     QualType KmpInt64Ty =
3547         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3548     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3549     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3550     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3551     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3552     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3553   }
3554   RD->completeDefinition();
3555   return RD;
3556 }
3557 
3558 static RecordDecl *
3559 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3560                                      ArrayRef<PrivateDataTy> Privates) {
3561   ASTContext &C = CGM.getContext();
3562   // Build struct kmp_task_t_with_privates {
3563   //         kmp_task_t task_data;
3564   //         .kmp_privates_t. privates;
3565   //       };
3566   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3567   RD->startDefinition();
3568   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3569   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3570     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3571   RD->completeDefinition();
3572   return RD;
3573 }
3574 
3575 /// Emit a proxy function which accepts kmp_task_t as the second
3576 /// argument.
3577 /// \code
3578 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3579 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3580 ///   For taskloops:
3581 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3582 ///   tt->reductions, tt->shareds);
3583 ///   return 0;
3584 /// }
3585 /// \endcode
3586 static llvm::Function *
3587 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3588                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3589                       QualType KmpTaskTWithPrivatesPtrQTy,
3590                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3591                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3592                       llvm::Value *TaskPrivatesMap) {
3593   ASTContext &C = CGM.getContext();
3594   FunctionArgList Args;
3595   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3596                             ImplicitParamDecl::Other);
3597   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3598                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3599                                 ImplicitParamDecl::Other);
3600   Args.push_back(&GtidArg);
3601   Args.push_back(&TaskTypeArg);
3602   const auto &TaskEntryFnInfo =
3603       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3604   llvm::FunctionType *TaskEntryTy =
3605       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3606   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3607   auto *TaskEntry = llvm::Function::Create(
3608       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3609   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3610   TaskEntry->setDoesNotRecurse();
3611   CodeGenFunction CGF(CGM);
3612   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3613                     Loc, Loc);
3614 
3615   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3616   // tt,
3617   // For taskloops:
3618   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3619   // tt->task_data.shareds);
3620   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3621       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3622   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3623       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3624       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3625   const auto *KmpTaskTWithPrivatesQTyRD =
3626       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3627   LValue Base =
3628       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3629   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3630   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3631   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3632   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3633 
3634   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3635   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3636   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3637       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3638       CGF.ConvertTypeForMem(SharedsPtrTy));
3639 
3640   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3641   llvm::Value *PrivatesParam;
3642   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3643     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3644     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3645         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3646   } else {
3647     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3648   }
3649 
3650   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3651                                TaskPrivatesMap,
3652                                CGF.Builder
3653                                    .CreatePointerBitCastOrAddrSpaceCast(
3654                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3655                                    .getPointer()};
3656   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3657                                           std::end(CommonArgs));
3658   if (isOpenMPTaskLoopDirective(Kind)) {
3659     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3660     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3661     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3662     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3663     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3664     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3665     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3666     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3667     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3668     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3669     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3670     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3671     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3672     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3673     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3674     CallArgs.push_back(LBParam);
3675     CallArgs.push_back(UBParam);
3676     CallArgs.push_back(StParam);
3677     CallArgs.push_back(LIParam);
3678     CallArgs.push_back(RParam);
3679   }
3680   CallArgs.push_back(SharedsParam);
3681 
3682   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3683                                                   CallArgs);
3684   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3685                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3686   CGF.FinishFunction();
3687   return TaskEntry;
3688 }
3689 
3690 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3691                                             SourceLocation Loc,
3692                                             QualType KmpInt32Ty,
3693                                             QualType KmpTaskTWithPrivatesPtrQTy,
3694                                             QualType KmpTaskTWithPrivatesQTy) {
3695   ASTContext &C = CGM.getContext();
3696   FunctionArgList Args;
3697   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3698                             ImplicitParamDecl::Other);
3699   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3700                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3701                                 ImplicitParamDecl::Other);
3702   Args.push_back(&GtidArg);
3703   Args.push_back(&TaskTypeArg);
3704   const auto &DestructorFnInfo =
3705       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3706   llvm::FunctionType *DestructorFnTy =
3707       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3708   std::string Name =
3709       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3710   auto *DestructorFn =
3711       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3712                              Name, &CGM.getModule());
3713   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3714                                     DestructorFnInfo);
3715   DestructorFn->setDoesNotRecurse();
3716   CodeGenFunction CGF(CGM);
3717   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3718                     Args, Loc, Loc);
3719 
3720   LValue Base = CGF.EmitLoadOfPointerLValue(
3721       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3722       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3723   const auto *KmpTaskTWithPrivatesQTyRD =
3724       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3725   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3726   Base = CGF.EmitLValueForField(Base, *FI);
3727   for (const auto *Field :
3728        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3729     if (QualType::DestructionKind DtorKind =
3730             Field->getType().isDestructedType()) {
3731       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3732       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3733     }
3734   }
3735   CGF.FinishFunction();
3736   return DestructorFn;
3737 }
3738 
3739 /// Emit a privates mapping function for correct handling of private and
3740 /// firstprivate variables.
3741 /// \code
3742 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3743 /// **noalias priv1,...,  <tyn> **noalias privn) {
3744 ///   *priv1 = &.privates.priv1;
3745 ///   ...;
3746 ///   *privn = &.privates.privn;
3747 /// }
3748 /// \endcode
3749 static llvm::Value *
3750 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3751                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3752                                ArrayRef<PrivateDataTy> Privates) {
3753   ASTContext &C = CGM.getContext();
3754   FunctionArgList Args;
3755   ImplicitParamDecl TaskPrivatesArg(
3756       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3757       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3758       ImplicitParamDecl::Other);
3759   Args.push_back(&TaskPrivatesArg);
3760   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3761   unsigned Counter = 1;
3762   for (const Expr *E : Data.PrivateVars) {
3763     Args.push_back(ImplicitParamDecl::Create(
3764         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765         C.getPointerType(C.getPointerType(E->getType()))
3766             .withConst()
3767             .withRestrict(),
3768         ImplicitParamDecl::Other));
3769     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3770     PrivateVarsPos[VD] = Counter;
3771     ++Counter;
3772   }
3773   for (const Expr *E : Data.FirstprivateVars) {
3774     Args.push_back(ImplicitParamDecl::Create(
3775         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3776         C.getPointerType(C.getPointerType(E->getType()))
3777             .withConst()
3778             .withRestrict(),
3779         ImplicitParamDecl::Other));
3780     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3781     PrivateVarsPos[VD] = Counter;
3782     ++Counter;
3783   }
3784   for (const Expr *E : Data.LastprivateVars) {
3785     Args.push_back(ImplicitParamDecl::Create(
3786         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3787         C.getPointerType(C.getPointerType(E->getType()))
3788             .withConst()
3789             .withRestrict(),
3790         ImplicitParamDecl::Other));
3791     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3792     PrivateVarsPos[VD] = Counter;
3793     ++Counter;
3794   }
3795   for (const VarDecl *VD : Data.PrivateLocals) {
3796     QualType Ty = VD->getType().getNonReferenceType();
3797     if (VD->getType()->isLValueReferenceType())
3798       Ty = C.getPointerType(Ty);
3799     if (isAllocatableDecl(VD))
3800       Ty = C.getPointerType(Ty);
3801     Args.push_back(ImplicitParamDecl::Create(
3802         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3803         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3804         ImplicitParamDecl::Other));
3805     PrivateVarsPos[VD] = Counter;
3806     ++Counter;
3807   }
3808   const auto &TaskPrivatesMapFnInfo =
3809       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3810   llvm::FunctionType *TaskPrivatesMapTy =
3811       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3812   std::string Name =
3813       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3814   auto *TaskPrivatesMap = llvm::Function::Create(
3815       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3816       &CGM.getModule());
3817   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3818                                     TaskPrivatesMapFnInfo);
3819   if (CGM.getLangOpts().Optimize) {
3820     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3821     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3822     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3823   }
3824   CodeGenFunction CGF(CGM);
3825   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3826                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3827 
3828   // *privi = &.privates.privi;
3829   LValue Base = CGF.EmitLoadOfPointerLValue(
3830       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3831       TaskPrivatesArg.getType()->castAs<PointerType>());
3832   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3833   Counter = 0;
3834   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3835     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3836     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3837     LValue RefLVal =
3838         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3839     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3840         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3841     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3842     ++Counter;
3843   }
3844   CGF.FinishFunction();
3845   return TaskPrivatesMap;
3846 }
3847 
3848 /// Emit initialization for private variables in task-based directives.
3849 static void emitPrivatesInit(CodeGenFunction &CGF,
3850                              const OMPExecutableDirective &D,
3851                              Address KmpTaskSharedsPtr, LValue TDBase,
3852                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3853                              QualType SharedsTy, QualType SharedsPtrTy,
3854                              const OMPTaskDataTy &Data,
3855                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3856   ASTContext &C = CGF.getContext();
3857   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3858   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3859   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3860                                  ? OMPD_taskloop
3861                                  : OMPD_task;
3862   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3863   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3864   LValue SrcBase;
3865   bool IsTargetTask =
3866       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3867       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3868   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3869   // PointersArray, SizesArray, and MappersArray. The original variables for
3870   // these arrays are not captured and we get their addresses explicitly.
3871   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3872       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3873     SrcBase = CGF.MakeAddrLValue(
3874         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3875             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3876         SharedsTy);
3877   }
3878   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3879   for (const PrivateDataTy &Pair : Privates) {
3880     // Do not initialize private locals.
3881     if (Pair.second.isLocalPrivate()) {
3882       ++FI;
3883       continue;
3884     }
3885     const VarDecl *VD = Pair.second.PrivateCopy;
3886     const Expr *Init = VD->getAnyInitializer();
3887     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3888                              !CGF.isTrivialInitializer(Init)))) {
3889       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3890       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3891         const VarDecl *OriginalVD = Pair.second.Original;
3892         // Check if the variable is the target-based BasePointersArray,
3893         // PointersArray, SizesArray, or MappersArray.
3894         LValue SharedRefLValue;
3895         QualType Type = PrivateLValue.getType();
3896         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3897         if (IsTargetTask && !SharedField) {
3898           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3899                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3900                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3901                          ->getNumParams() == 0 &&
3902                  isa<TranslationUnitDecl>(
3903                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3904                          ->getDeclContext()) &&
3905                  "Expected artificial target data variable.");
3906           SharedRefLValue =
3907               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3908         } else if (ForDup) {
3909           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3910           SharedRefLValue = CGF.MakeAddrLValue(
3911               Address(SharedRefLValue.getPointer(CGF),
3912                       C.getDeclAlign(OriginalVD)),
3913               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3914               SharedRefLValue.getTBAAInfo());
3915         } else if (CGF.LambdaCaptureFields.count(
3916                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3917                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3918           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3919         } else {
3920           // Processing for implicitly captured variables.
3921           InlinedOpenMPRegionRAII Region(
3922               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3923               /*HasCancel=*/false, /*NoInheritance=*/true);
3924           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3925         }
3926         if (Type->isArrayType()) {
3927           // Initialize firstprivate array.
3928           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3929             // Perform simple memcpy.
3930             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3931           } else {
3932             // Initialize firstprivate array using element-by-element
3933             // initialization.
3934             CGF.EmitOMPAggregateAssign(
3935                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3936                 Type,
3937                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3938                                                   Address SrcElement) {
3939                   // Clean up any temporaries needed by the initialization.
3940                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3941                   InitScope.addPrivate(
3942                       Elem, [SrcElement]() -> Address { return SrcElement; });
3943                   (void)InitScope.Privatize();
3944                   // Emit initialization for single element.
3945                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3946                       CGF, &CapturesInfo);
3947                   CGF.EmitAnyExprToMem(Init, DestElement,
3948                                        Init->getType().getQualifiers(),
3949                                        /*IsInitializer=*/false);
3950                 });
3951           }
3952         } else {
3953           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3954           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3955             return SharedRefLValue.getAddress(CGF);
3956           });
3957           (void)InitScope.Privatize();
3958           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3959           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3960                              /*capturedByInit=*/false);
3961         }
3962       } else {
3963         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3964       }
3965     }
3966     ++FI;
3967   }
3968 }
3969 
3970 /// Check if duplication function is required for taskloops.
3971 static bool checkInitIsRequired(CodeGenFunction &CGF,
3972                                 ArrayRef<PrivateDataTy> Privates) {
3973   bool InitRequired = false;
3974   for (const PrivateDataTy &Pair : Privates) {
3975     if (Pair.second.isLocalPrivate())
3976       continue;
3977     const VarDecl *VD = Pair.second.PrivateCopy;
3978     const Expr *Init = VD->getAnyInitializer();
3979     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3980                                     !CGF.isTrivialInitializer(Init));
3981     if (InitRequired)
3982       break;
3983   }
3984   return InitRequired;
3985 }
3986 
3987 
3988 /// Emit task_dup function (for initialization of
3989 /// private/firstprivate/lastprivate vars and last_iter flag)
3990 /// \code
3991 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3992 /// lastpriv) {
3993 /// // setup lastprivate flag
3994 ///    task_dst->last = lastpriv;
3995 /// // could be constructor calls here...
3996 /// }
3997 /// \endcode
3998 static llvm::Value *
3999 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4000                     const OMPExecutableDirective &D,
4001                     QualType KmpTaskTWithPrivatesPtrQTy,
4002                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4003                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4004                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4005                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4006   ASTContext &C = CGM.getContext();
4007   FunctionArgList Args;
4008   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4009                            KmpTaskTWithPrivatesPtrQTy,
4010                            ImplicitParamDecl::Other);
4011   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4012                            KmpTaskTWithPrivatesPtrQTy,
4013                            ImplicitParamDecl::Other);
4014   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4015                                 ImplicitParamDecl::Other);
4016   Args.push_back(&DstArg);
4017   Args.push_back(&SrcArg);
4018   Args.push_back(&LastprivArg);
4019   const auto &TaskDupFnInfo =
4020       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4021   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4022   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4023   auto *TaskDup = llvm::Function::Create(
4024       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4025   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4026   TaskDup->setDoesNotRecurse();
4027   CodeGenFunction CGF(CGM);
4028   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4029                     Loc);
4030 
4031   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4032       CGF.GetAddrOfLocalVar(&DstArg),
4033       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4034   // task_dst->liter = lastpriv;
4035   if (WithLastIter) {
4036     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4037     LValue Base = CGF.EmitLValueForField(
4038         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4039     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4040     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4041         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4042     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4043   }
4044 
4045   // Emit initial values for private copies (if any).
4046   assert(!Privates.empty());
4047   Address KmpTaskSharedsPtr = Address::invalid();
4048   if (!Data.FirstprivateVars.empty()) {
4049     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4050         CGF.GetAddrOfLocalVar(&SrcArg),
4051         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4052     LValue Base = CGF.EmitLValueForField(
4053         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4054     KmpTaskSharedsPtr = Address(
4055         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4056                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4057                                                   KmpTaskTShareds)),
4058                              Loc),
4059         CGM.getNaturalTypeAlignment(SharedsTy));
4060   }
4061   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4062                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4063   CGF.FinishFunction();
4064   return TaskDup;
4065 }
4066 
4067 /// Checks if destructor function is required to be generated.
4068 /// \return true if cleanups are required, false otherwise.
4069 static bool
4070 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4071                          ArrayRef<PrivateDataTy> Privates) {
4072   for (const PrivateDataTy &P : Privates) {
4073     if (P.second.isLocalPrivate())
4074       continue;
4075     QualType Ty = P.second.Original->getType().getNonReferenceType();
4076     if (Ty.isDestructedType())
4077       return true;
4078   }
4079   return false;
4080 }
4081 
4082 namespace {
4083 /// Loop generator for OpenMP iterator expression.
4084 class OMPIteratorGeneratorScope final
4085     : public CodeGenFunction::OMPPrivateScope {
4086   CodeGenFunction &CGF;
4087   const OMPIteratorExpr *E = nullptr;
4088   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4089   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4090   OMPIteratorGeneratorScope() = delete;
4091   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4092 
4093 public:
4094   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4095       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4096     if (!E)
4097       return;
4098     SmallVector<llvm::Value *, 4> Uppers;
4099     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4100       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4101       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4102       addPrivate(VD, [&CGF, VD]() {
4103         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4104       });
4105       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4106       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4107         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4108                                  "counter.addr");
4109       });
4110     }
4111     Privatize();
4112 
4113     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4114       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4115       LValue CLVal =
4116           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4117                              HelperData.CounterVD->getType());
4118       // Counter = 0;
4119       CGF.EmitStoreOfScalar(
4120           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4121           CLVal);
4122       CodeGenFunction::JumpDest &ContDest =
4123           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4124       CodeGenFunction::JumpDest &ExitDest =
4125           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4126       // N = <number-of_iterations>;
4127       llvm::Value *N = Uppers[I];
4128       // cont:
4129       // if (Counter < N) goto body; else goto exit;
4130       CGF.EmitBlock(ContDest.getBlock());
4131       auto *CVal =
4132           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4133       llvm::Value *Cmp =
4134           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4135               ? CGF.Builder.CreateICmpSLT(CVal, N)
4136               : CGF.Builder.CreateICmpULT(CVal, N);
4137       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4138       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4139       // body:
4140       CGF.EmitBlock(BodyBB);
4141       // Iteri = Begini + Counter * Stepi;
4142       CGF.EmitIgnoredExpr(HelperData.Update);
4143     }
4144   }
4145   ~OMPIteratorGeneratorScope() {
4146     if (!E)
4147       return;
4148     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4149       // Counter = Counter + 1;
4150       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4151       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4152       // goto cont;
4153       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4154       // exit:
4155       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4156     }
4157   }
4158 };
4159 } // namespace
4160 
4161 static std::pair<llvm::Value *, llvm::Value *>
4162 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4163   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4164   llvm::Value *Addr;
4165   if (OASE) {
4166     const Expr *Base = OASE->getBase();
4167     Addr = CGF.EmitScalarExpr(Base);
4168   } else {
4169     Addr = CGF.EmitLValue(E).getPointer(CGF);
4170   }
4171   llvm::Value *SizeVal;
4172   QualType Ty = E->getType();
4173   if (OASE) {
4174     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4175     for (const Expr *SE : OASE->getDimensions()) {
4176       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4177       Sz = CGF.EmitScalarConversion(
4178           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4179       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4180     }
4181   } else if (const auto *ASE =
4182                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4183     LValue UpAddrLVal =
4184         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4185     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4186     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4187         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4188     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4189     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4190     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4191   } else {
4192     SizeVal = CGF.getTypeSize(Ty);
4193   }
4194   return std::make_pair(Addr, SizeVal);
4195 }
4196 
4197 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4198 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4199   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4200   if (KmpTaskAffinityInfoTy.isNull()) {
4201     RecordDecl *KmpAffinityInfoRD =
4202         C.buildImplicitRecord("kmp_task_affinity_info_t");
4203     KmpAffinityInfoRD->startDefinition();
4204     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4206     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4207     KmpAffinityInfoRD->completeDefinition();
4208     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4209   }
4210 }
4211 
4212 CGOpenMPRuntime::TaskResultTy
4213 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4214                               const OMPExecutableDirective &D,
4215                               llvm::Function *TaskFunction, QualType SharedsTy,
4216                               Address Shareds, const OMPTaskDataTy &Data) {
4217   ASTContext &C = CGM.getContext();
4218   llvm::SmallVector<PrivateDataTy, 4> Privates;
4219   // Aggregate privates and sort them by the alignment.
4220   const auto *I = Data.PrivateCopies.begin();
4221   for (const Expr *E : Data.PrivateVars) {
4222     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4223     Privates.emplace_back(
4224         C.getDeclAlign(VD),
4225         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4226                          /*PrivateElemInit=*/nullptr));
4227     ++I;
4228   }
4229   I = Data.FirstprivateCopies.begin();
4230   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4231   for (const Expr *E : Data.FirstprivateVars) {
4232     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4233     Privates.emplace_back(
4234         C.getDeclAlign(VD),
4235         PrivateHelpersTy(
4236             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4237             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4238     ++I;
4239     ++IElemInitRef;
4240   }
4241   I = Data.LastprivateCopies.begin();
4242   for (const Expr *E : Data.LastprivateVars) {
4243     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4244     Privates.emplace_back(
4245         C.getDeclAlign(VD),
4246         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4247                          /*PrivateElemInit=*/nullptr));
4248     ++I;
4249   }
4250   for (const VarDecl *VD : Data.PrivateLocals) {
4251     if (isAllocatableDecl(VD))
4252       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4253     else
4254       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4255   }
4256   llvm::stable_sort(Privates,
4257                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4258                       return L.first > R.first;
4259                     });
4260   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4261   // Build type kmp_routine_entry_t (if not built yet).
4262   emitKmpRoutineEntryT(KmpInt32Ty);
4263   // Build type kmp_task_t (if not built yet).
4264   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4265     if (SavedKmpTaskloopTQTy.isNull()) {
4266       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4267           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4268     }
4269     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4270   } else {
4271     assert((D.getDirectiveKind() == OMPD_task ||
4272             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4273             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4274            "Expected taskloop, task or target directive");
4275     if (SavedKmpTaskTQTy.isNull()) {
4276       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4277           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4278     }
4279     KmpTaskTQTy = SavedKmpTaskTQTy;
4280   }
4281   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4282   // Build particular struct kmp_task_t for the given task.
4283   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4284       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4285   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4286   QualType KmpTaskTWithPrivatesPtrQTy =
4287       C.getPointerType(KmpTaskTWithPrivatesQTy);
4288   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4289   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4290       KmpTaskTWithPrivatesTy->getPointerTo();
4291   llvm::Value *KmpTaskTWithPrivatesTySize =
4292       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4293   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4294 
4295   // Emit initial values for private copies (if any).
4296   llvm::Value *TaskPrivatesMap = nullptr;
4297   llvm::Type *TaskPrivatesMapTy =
4298       std::next(TaskFunction->arg_begin(), 3)->getType();
4299   if (!Privates.empty()) {
4300     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4301     TaskPrivatesMap =
4302         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4303     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4304         TaskPrivatesMap, TaskPrivatesMapTy);
4305   } else {
4306     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4307         cast<llvm::PointerType>(TaskPrivatesMapTy));
4308   }
4309   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4310   // kmp_task_t *tt);
4311   llvm::Function *TaskEntry = emitProxyTaskFunction(
4312       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4313       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4314       TaskPrivatesMap);
4315 
4316   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4317   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4318   // kmp_routine_entry_t *task_entry);
4319   // Task flags. Format is taken from
4320   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4321   // description of kmp_tasking_flags struct.
4322   enum {
4323     TiedFlag = 0x1,
4324     FinalFlag = 0x2,
4325     DestructorsFlag = 0x8,
4326     PriorityFlag = 0x20,
4327     DetachableFlag = 0x40,
4328   };
4329   unsigned Flags = Data.Tied ? TiedFlag : 0;
4330   bool NeedsCleanup = false;
4331   if (!Privates.empty()) {
4332     NeedsCleanup =
4333         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4334     if (NeedsCleanup)
4335       Flags = Flags | DestructorsFlag;
4336   }
4337   if (Data.Priority.getInt())
4338     Flags = Flags | PriorityFlag;
4339   if (D.hasClausesOfKind<OMPDetachClause>())
4340     Flags = Flags | DetachableFlag;
4341   llvm::Value *TaskFlags =
4342       Data.Final.getPointer()
4343           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4344                                      CGF.Builder.getInt32(FinalFlag),
4345                                      CGF.Builder.getInt32(/*C=*/0))
4346           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4347   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4348   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4349   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4350       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4351       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4352           TaskEntry, KmpRoutineEntryPtrTy)};
4353   llvm::Value *NewTask;
4354   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4355     // Check if we have any device clause associated with the directive.
4356     const Expr *Device = nullptr;
4357     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4358       Device = C->getDevice();
4359     // Emit device ID if any otherwise use default value.
4360     llvm::Value *DeviceID;
4361     if (Device)
4362       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4363                                            CGF.Int64Ty, /*isSigned=*/true);
4364     else
4365       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4366     AllocArgs.push_back(DeviceID);
4367     NewTask = CGF.EmitRuntimeCall(
4368         OMPBuilder.getOrCreateRuntimeFunction(
4369             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4370         AllocArgs);
4371   } else {
4372     NewTask =
4373         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4374                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4375                             AllocArgs);
4376   }
4377   // Emit detach clause initialization.
4378   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4379   // task_descriptor);
4380   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4381     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4382     LValue EvtLVal = CGF.EmitLValue(Evt);
4383 
4384     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4385     // int gtid, kmp_task_t *task);
4386     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4387     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4388     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4389     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4390         OMPBuilder.getOrCreateRuntimeFunction(
4391             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4392         {Loc, Tid, NewTask});
4393     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4394                                       Evt->getExprLoc());
4395     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4396   }
4397   // Process affinity clauses.
4398   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4399     // Process list of affinity data.
4400     ASTContext &C = CGM.getContext();
4401     Address AffinitiesArray = Address::invalid();
4402     // Calculate number of elements to form the array of affinity data.
4403     llvm::Value *NumOfElements = nullptr;
4404     unsigned NumAffinities = 0;
4405     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4406       if (const Expr *Modifier = C->getModifier()) {
4407         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4408         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4409           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4410           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4411           NumOfElements =
4412               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4413         }
4414       } else {
4415         NumAffinities += C->varlist_size();
4416       }
4417     }
4418     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4419     // Fields ids in kmp_task_affinity_info record.
4420     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4421 
4422     QualType KmpTaskAffinityInfoArrayTy;
4423     if (NumOfElements) {
4424       NumOfElements = CGF.Builder.CreateNUWAdd(
4425           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4426       auto *OVE = new (C) OpaqueValueExpr(
4427           Loc,
4428           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4429           VK_PRValue);
4430       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4431                                                     RValue::get(NumOfElements));
4432       KmpTaskAffinityInfoArrayTy =
4433           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4434                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4435       // Properly emit variable-sized array.
4436       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4437                                            ImplicitParamDecl::Other);
4438       CGF.EmitVarDecl(*PD);
4439       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4440       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4441                                                 /*isSigned=*/false);
4442     } else {
4443       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4444           KmpTaskAffinityInfoTy,
4445           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4446           ArrayType::Normal, /*IndexTypeQuals=*/0);
4447       AffinitiesArray =
4448           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4449       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4450       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4451                                              /*isSigned=*/false);
4452     }
4453 
4454     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4455     // Fill array by elements without iterators.
4456     unsigned Pos = 0;
4457     bool HasIterator = false;
4458     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4459       if (C->getModifier()) {
4460         HasIterator = true;
4461         continue;
4462       }
4463       for (const Expr *E : C->varlists()) {
4464         llvm::Value *Addr;
4465         llvm::Value *Size;
4466         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4467         LValue Base =
4468             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4469                                KmpTaskAffinityInfoTy);
4470         // affs[i].base_addr = &<Affinities[i].second>;
4471         LValue BaseAddrLVal = CGF.EmitLValueForField(
4472             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4473         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4474                               BaseAddrLVal);
4475         // affs[i].len = sizeof(<Affinities[i].second>);
4476         LValue LenLVal = CGF.EmitLValueForField(
4477             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4478         CGF.EmitStoreOfScalar(Size, LenLVal);
4479         ++Pos;
4480       }
4481     }
4482     LValue PosLVal;
4483     if (HasIterator) {
4484       PosLVal = CGF.MakeAddrLValue(
4485           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4486           C.getSizeType());
4487       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4488     }
4489     // Process elements with iterators.
4490     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4491       const Expr *Modifier = C->getModifier();
4492       if (!Modifier)
4493         continue;
4494       OMPIteratorGeneratorScope IteratorScope(
4495           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4496       for (const Expr *E : C->varlists()) {
4497         llvm::Value *Addr;
4498         llvm::Value *Size;
4499         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4500         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4501         LValue Base = CGF.MakeAddrLValue(
4502             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4503                                           AffinitiesArray.getPointer(), Idx),
4504                     AffinitiesArray.getAlignment()),
4505             KmpTaskAffinityInfoTy);
4506         // affs[i].base_addr = &<Affinities[i].second>;
4507         LValue BaseAddrLVal = CGF.EmitLValueForField(
4508             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4509         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4510                               BaseAddrLVal);
4511         // affs[i].len = sizeof(<Affinities[i].second>);
4512         LValue LenLVal = CGF.EmitLValueForField(
4513             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4514         CGF.EmitStoreOfScalar(Size, LenLVal);
4515         Idx = CGF.Builder.CreateNUWAdd(
4516             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4517         CGF.EmitStoreOfScalar(Idx, PosLVal);
4518       }
4519     }
4520     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4521     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4522     // naffins, kmp_task_affinity_info_t *affin_list);
4523     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4524     llvm::Value *GTid = getThreadID(CGF, Loc);
4525     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4526         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4527     // FIXME: Emit the function and ignore its result for now unless the
4528     // runtime function is properly implemented.
4529     (void)CGF.EmitRuntimeCall(
4530         OMPBuilder.getOrCreateRuntimeFunction(
4531             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4532         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4533   }
4534   llvm::Value *NewTaskNewTaskTTy =
4535       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4536           NewTask, KmpTaskTWithPrivatesPtrTy);
4537   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4538                                                KmpTaskTWithPrivatesQTy);
4539   LValue TDBase =
4540       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4541   // Fill the data in the resulting kmp_task_t record.
4542   // Copy shareds if there are any.
4543   Address KmpTaskSharedsPtr = Address::invalid();
4544   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4545     KmpTaskSharedsPtr =
4546         Address(CGF.EmitLoadOfScalar(
4547                     CGF.EmitLValueForField(
4548                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4549                                            KmpTaskTShareds)),
4550                     Loc),
4551                 CGM.getNaturalTypeAlignment(SharedsTy));
4552     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4553     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4554     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4555   }
4556   // Emit initial values for private copies (if any).
4557   TaskResultTy Result;
4558   if (!Privates.empty()) {
4559     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4560                      SharedsTy, SharedsPtrTy, Data, Privates,
4561                      /*ForDup=*/false);
4562     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4563         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4564       Result.TaskDupFn = emitTaskDupFunction(
4565           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4566           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4567           /*WithLastIter=*/!Data.LastprivateVars.empty());
4568     }
4569   }
4570   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4571   enum { Priority = 0, Destructors = 1 };
4572   // Provide pointer to function with destructors for privates.
4573   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4574   const RecordDecl *KmpCmplrdataUD =
4575       (*FI)->getType()->getAsUnionType()->getDecl();
4576   if (NeedsCleanup) {
4577     llvm::Value *DestructorFn = emitDestructorsFunction(
4578         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4579         KmpTaskTWithPrivatesQTy);
4580     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4581     LValue DestructorsLV = CGF.EmitLValueForField(
4582         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4583     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4584                               DestructorFn, KmpRoutineEntryPtrTy),
4585                           DestructorsLV);
4586   }
4587   // Set priority.
4588   if (Data.Priority.getInt()) {
4589     LValue Data2LV = CGF.EmitLValueForField(
4590         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4591     LValue PriorityLV = CGF.EmitLValueForField(
4592         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4593     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4594   }
4595   Result.NewTask = NewTask;
4596   Result.TaskEntry = TaskEntry;
4597   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4598   Result.TDBase = TDBase;
4599   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4600   return Result;
4601 }
4602 
4603 namespace {
4604 /// Dependence kind for RTL.
4605 enum RTLDependenceKindTy {
4606   DepIn = 0x01,
4607   DepInOut = 0x3,
4608   DepMutexInOutSet = 0x4
4609 };
4610 /// Fields ids in kmp_depend_info record.
4611 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4612 } // namespace
4613 
4614 /// Translates internal dependency kind into the runtime kind.
4615 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4616   RTLDependenceKindTy DepKind;
4617   switch (K) {
4618   case OMPC_DEPEND_in:
4619     DepKind = DepIn;
4620     break;
4621   // Out and InOut dependencies must use the same code.
4622   case OMPC_DEPEND_out:
4623   case OMPC_DEPEND_inout:
4624     DepKind = DepInOut;
4625     break;
4626   case OMPC_DEPEND_mutexinoutset:
4627     DepKind = DepMutexInOutSet;
4628     break;
4629   case OMPC_DEPEND_source:
4630   case OMPC_DEPEND_sink:
4631   case OMPC_DEPEND_depobj:
4632   case OMPC_DEPEND_unknown:
4633     llvm_unreachable("Unknown task dependence type");
4634   }
4635   return DepKind;
4636 }
4637 
4638 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4639 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4640                            QualType &FlagsTy) {
4641   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4642   if (KmpDependInfoTy.isNull()) {
4643     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4644     KmpDependInfoRD->startDefinition();
4645     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4646     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4647     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4648     KmpDependInfoRD->completeDefinition();
4649     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4650   }
4651 }
4652 
4653 std::pair<llvm::Value *, LValue>
4654 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4655                                    SourceLocation Loc) {
4656   ASTContext &C = CGM.getContext();
4657   QualType FlagsTy;
4658   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4659   RecordDecl *KmpDependInfoRD =
4660       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4661   LValue Base = CGF.EmitLoadOfPointerLValue(
4662       DepobjLVal.getAddress(CGF),
4663       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4664   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4665   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4666           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4667   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4668                             Base.getTBAAInfo());
4669   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4670       Addr.getElementType(), Addr.getPointer(),
4671       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4672   LValue NumDepsBase = CGF.MakeAddrLValue(
4673       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4674       Base.getBaseInfo(), Base.getTBAAInfo());
4675   // NumDeps = deps[i].base_addr;
4676   LValue BaseAddrLVal = CGF.EmitLValueForField(
4677       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4678   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4679   return std::make_pair(NumDeps, Base);
4680 }
4681 
4682 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4683                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4684                            const OMPTaskDataTy::DependData &Data,
4685                            Address DependenciesArray) {
4686   CodeGenModule &CGM = CGF.CGM;
4687   ASTContext &C = CGM.getContext();
4688   QualType FlagsTy;
4689   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4690   RecordDecl *KmpDependInfoRD =
4691       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4692   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4693 
4694   OMPIteratorGeneratorScope IteratorScope(
4695       CGF, cast_or_null<OMPIteratorExpr>(
4696                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4697                                  : nullptr));
4698   for (const Expr *E : Data.DepExprs) {
4699     llvm::Value *Addr;
4700     llvm::Value *Size;
4701     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4702     LValue Base;
4703     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4704       Base = CGF.MakeAddrLValue(
4705           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4706     } else {
4707       LValue &PosLVal = *Pos.get<LValue *>();
4708       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4709       Base = CGF.MakeAddrLValue(
4710           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4711                                         DependenciesArray.getPointer(), Idx),
4712                   DependenciesArray.getAlignment()),
4713           KmpDependInfoTy);
4714     }
4715     // deps[i].base_addr = &<Dependencies[i].second>;
4716     LValue BaseAddrLVal = CGF.EmitLValueForField(
4717         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4718     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4719                           BaseAddrLVal);
4720     // deps[i].len = sizeof(<Dependencies[i].second>);
4721     LValue LenLVal = CGF.EmitLValueForField(
4722         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4723     CGF.EmitStoreOfScalar(Size, LenLVal);
4724     // deps[i].flags = <Dependencies[i].first>;
4725     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4726     LValue FlagsLVal = CGF.EmitLValueForField(
4727         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4728     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4729                           FlagsLVal);
4730     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4731       ++(*P);
4732     } else {
4733       LValue &PosLVal = *Pos.get<LValue *>();
4734       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4735       Idx = CGF.Builder.CreateNUWAdd(Idx,
4736                                      llvm::ConstantInt::get(Idx->getType(), 1));
4737       CGF.EmitStoreOfScalar(Idx, PosLVal);
4738     }
4739   }
4740 }
4741 
4742 static SmallVector<llvm::Value *, 4>
4743 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4744                         const OMPTaskDataTy::DependData &Data) {
4745   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4746          "Expected depobj dependecy kind.");
4747   SmallVector<llvm::Value *, 4> Sizes;
4748   SmallVector<LValue, 4> SizeLVals;
4749   ASTContext &C = CGF.getContext();
4750   QualType FlagsTy;
4751   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4752   RecordDecl *KmpDependInfoRD =
4753       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4754   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4755   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4756   {
4757     OMPIteratorGeneratorScope IteratorScope(
4758         CGF, cast_or_null<OMPIteratorExpr>(
4759                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4760                                    : nullptr));
4761     for (const Expr *E : Data.DepExprs) {
4762       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4763       LValue Base = CGF.EmitLoadOfPointerLValue(
4764           DepobjLVal.getAddress(CGF),
4765           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4766       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4767           Base.getAddress(CGF), KmpDependInfoPtrT);
4768       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4769                                 Base.getTBAAInfo());
4770       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4771           Addr.getElementType(), Addr.getPointer(),
4772           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4773       LValue NumDepsBase = CGF.MakeAddrLValue(
4774           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4775           Base.getBaseInfo(), Base.getTBAAInfo());
4776       // NumDeps = deps[i].base_addr;
4777       LValue BaseAddrLVal = CGF.EmitLValueForField(
4778           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4779       llvm::Value *NumDeps =
4780           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4781       LValue NumLVal = CGF.MakeAddrLValue(
4782           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4783           C.getUIntPtrType());
4784       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4785                               NumLVal.getAddress(CGF));
4786       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4787       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4788       CGF.EmitStoreOfScalar(Add, NumLVal);
4789       SizeLVals.push_back(NumLVal);
4790     }
4791   }
4792   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4793     llvm::Value *Size =
4794         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4795     Sizes.push_back(Size);
4796   }
4797   return Sizes;
4798 }
4799 
4800 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4801                                LValue PosLVal,
4802                                const OMPTaskDataTy::DependData &Data,
4803                                Address DependenciesArray) {
4804   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4805          "Expected depobj dependecy kind.");
4806   ASTContext &C = CGF.getContext();
4807   QualType FlagsTy;
4808   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4809   RecordDecl *KmpDependInfoRD =
4810       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4811   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4812   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4813   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4814   {
4815     OMPIteratorGeneratorScope IteratorScope(
4816         CGF, cast_or_null<OMPIteratorExpr>(
4817                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4818                                    : nullptr));
4819     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4820       const Expr *E = Data.DepExprs[I];
4821       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4822       LValue Base = CGF.EmitLoadOfPointerLValue(
4823           DepobjLVal.getAddress(CGF),
4824           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4825       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4826           Base.getAddress(CGF), KmpDependInfoPtrT);
4827       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4828                                 Base.getTBAAInfo());
4829 
4830       // Get number of elements in a single depobj.
4831       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4832           Addr.getElementType(), Addr.getPointer(),
4833           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4834       LValue NumDepsBase = CGF.MakeAddrLValue(
4835           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4836           Base.getBaseInfo(), Base.getTBAAInfo());
4837       // NumDeps = deps[i].base_addr;
4838       LValue BaseAddrLVal = CGF.EmitLValueForField(
4839           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4840       llvm::Value *NumDeps =
4841           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4842 
4843       // memcopy dependency data.
4844       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4845           ElSize,
4846           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4847       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4848       Address DepAddr =
4849           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4850                                         DependenciesArray.getPointer(), Pos),
4851                   DependenciesArray.getAlignment());
4852       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4853 
4854       // Increase pos.
4855       // pos += size;
4856       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4857       CGF.EmitStoreOfScalar(Add, PosLVal);
4858     }
4859   }
4860 }
4861 
4862 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4863     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4864     SourceLocation Loc) {
4865   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4866         return D.DepExprs.empty();
4867       }))
4868     return std::make_pair(nullptr, Address::invalid());
4869   // Process list of dependencies.
4870   ASTContext &C = CGM.getContext();
4871   Address DependenciesArray = Address::invalid();
4872   llvm::Value *NumOfElements = nullptr;
4873   unsigned NumDependencies = std::accumulate(
4874       Dependencies.begin(), Dependencies.end(), 0,
4875       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4876         return D.DepKind == OMPC_DEPEND_depobj
4877                    ? V
4878                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4879       });
4880   QualType FlagsTy;
4881   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4882   bool HasDepobjDeps = false;
4883   bool HasRegularWithIterators = false;
4884   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4885   llvm::Value *NumOfRegularWithIterators =
4886       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4887   // Calculate number of depobj dependecies and regular deps with the iterators.
4888   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4889     if (D.DepKind == OMPC_DEPEND_depobj) {
4890       SmallVector<llvm::Value *, 4> Sizes =
4891           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4892       for (llvm::Value *Size : Sizes) {
4893         NumOfDepobjElements =
4894             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4895       }
4896       HasDepobjDeps = true;
4897       continue;
4898     }
4899     // Include number of iterations, if any.
4900 
4901     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4902       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4903         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4904         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4905         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4906             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4907         NumOfRegularWithIterators =
4908             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4909       }
4910       HasRegularWithIterators = true;
4911       continue;
4912     }
4913   }
4914 
4915   QualType KmpDependInfoArrayTy;
4916   if (HasDepobjDeps || HasRegularWithIterators) {
4917     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4918                                            /*isSigned=*/false);
4919     if (HasDepobjDeps) {
4920       NumOfElements =
4921           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4922     }
4923     if (HasRegularWithIterators) {
4924       NumOfElements =
4925           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4926     }
4927     auto *OVE = new (C) OpaqueValueExpr(
4928         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4929         VK_PRValue);
4930     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4931                                                   RValue::get(NumOfElements));
4932     KmpDependInfoArrayTy =
4933         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4934                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4935     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4936     // Properly emit variable-sized array.
4937     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4938                                          ImplicitParamDecl::Other);
4939     CGF.EmitVarDecl(*PD);
4940     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4941     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4942                                               /*isSigned=*/false);
4943   } else {
4944     KmpDependInfoArrayTy = C.getConstantArrayType(
4945         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4946         ArrayType::Normal, /*IndexTypeQuals=*/0);
4947     DependenciesArray =
4948         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4949     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4950     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4951                                            /*isSigned=*/false);
4952   }
4953   unsigned Pos = 0;
4954   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4955     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4956         Dependencies[I].IteratorExpr)
4957       continue;
4958     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4959                    DependenciesArray);
4960   }
4961   // Copy regular dependecies with iterators.
4962   LValue PosLVal = CGF.MakeAddrLValue(
4963       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4964   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4965   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4966     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4967         !Dependencies[I].IteratorExpr)
4968       continue;
4969     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4970                    DependenciesArray);
4971   }
4972   // Copy final depobj arrays without iterators.
4973   if (HasDepobjDeps) {
4974     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4975       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4976         continue;
4977       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4978                          DependenciesArray);
4979     }
4980   }
4981   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4982       DependenciesArray, CGF.VoidPtrTy);
4983   return std::make_pair(NumOfElements, DependenciesArray);
4984 }
4985 
4986 Address CGOpenMPRuntime::emitDepobjDependClause(
4987     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4988     SourceLocation Loc) {
4989   if (Dependencies.DepExprs.empty())
4990     return Address::invalid();
4991   // Process list of dependencies.
4992   ASTContext &C = CGM.getContext();
4993   Address DependenciesArray = Address::invalid();
4994   unsigned NumDependencies = Dependencies.DepExprs.size();
4995   QualType FlagsTy;
4996   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4997   RecordDecl *KmpDependInfoRD =
4998       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4999 
5000   llvm::Value *Size;
5001   // Define type kmp_depend_info[<Dependencies.size()>];
5002   // For depobj reserve one extra element to store the number of elements.
5003   // It is required to handle depobj(x) update(in) construct.
5004   // kmp_depend_info[<Dependencies.size()>] deps;
5005   llvm::Value *NumDepsVal;
5006   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5007   if (const auto *IE =
5008           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5009     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5010     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5011       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5012       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5013       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5014     }
5015     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5016                                     NumDepsVal);
5017     CharUnits SizeInBytes =
5018         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5019     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5020     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5021     NumDepsVal =
5022         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5023   } else {
5024     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5025         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5026         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5027     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5028     Size = CGM.getSize(Sz.alignTo(Align));
5029     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5030   }
5031   // Need to allocate on the dynamic memory.
5032   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5033   // Use default allocator.
5034   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5035   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5036 
5037   llvm::Value *Addr =
5038       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5039                               CGM.getModule(), OMPRTL___kmpc_alloc),
5040                           Args, ".dep.arr.addr");
5041   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5042       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5043   DependenciesArray = Address(Addr, Align);
5044   // Write number of elements in the first element of array for depobj.
5045   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5046   // deps[i].base_addr = NumDependencies;
5047   LValue BaseAddrLVal = CGF.EmitLValueForField(
5048       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5049   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5050   llvm::PointerUnion<unsigned *, LValue *> Pos;
5051   unsigned Idx = 1;
5052   LValue PosLVal;
5053   if (Dependencies.IteratorExpr) {
5054     PosLVal = CGF.MakeAddrLValue(
5055         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5056         C.getSizeType());
5057     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5058                           /*IsInit=*/true);
5059     Pos = &PosLVal;
5060   } else {
5061     Pos = &Idx;
5062   }
5063   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5064   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5065       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5066   return DependenciesArray;
5067 }
5068 
5069 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5070                                         SourceLocation Loc) {
5071   ASTContext &C = CGM.getContext();
5072   QualType FlagsTy;
5073   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5074   LValue Base = CGF.EmitLoadOfPointerLValue(
5075       DepobjLVal.getAddress(CGF),
5076       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5077   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5078   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5079       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5080   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5081       Addr.getElementType(), Addr.getPointer(),
5082       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5083   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5084                                                                CGF.VoidPtrTy);
5085   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5086   // Use default allocator.
5087   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5088   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5089 
5090   // _kmpc_free(gtid, addr, nullptr);
5091   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5092                                 CGM.getModule(), OMPRTL___kmpc_free),
5093                             Args);
5094 }
5095 
5096 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5097                                        OpenMPDependClauseKind NewDepKind,
5098                                        SourceLocation Loc) {
5099   ASTContext &C = CGM.getContext();
5100   QualType FlagsTy;
5101   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5102   RecordDecl *KmpDependInfoRD =
5103       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5104   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5105   llvm::Value *NumDeps;
5106   LValue Base;
5107   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5108 
5109   Address Begin = Base.getAddress(CGF);
5110   // Cast from pointer to array type to pointer to single element.
5111   llvm::Value *End = CGF.Builder.CreateGEP(
5112       Begin.getElementType(), Begin.getPointer(), NumDeps);
5113   // The basic structure here is a while-do loop.
5114   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5115   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5116   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5117   CGF.EmitBlock(BodyBB);
5118   llvm::PHINode *ElementPHI =
5119       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5120   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5121   Begin = Address(ElementPHI, Begin.getAlignment());
5122   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5123                             Base.getTBAAInfo());
5124   // deps[i].flags = NewDepKind;
5125   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5126   LValue FlagsLVal = CGF.EmitLValueForField(
5127       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5128   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5129                         FlagsLVal);
5130 
5131   // Shift the address forward by one element.
5132   Address ElementNext =
5133       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5134   ElementPHI->addIncoming(ElementNext.getPointer(),
5135                           CGF.Builder.GetInsertBlock());
5136   llvm::Value *IsEmpty =
5137       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5138   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5139   // Done.
5140   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5141 }
5142 
5143 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5144                                    const OMPExecutableDirective &D,
5145                                    llvm::Function *TaskFunction,
5146                                    QualType SharedsTy, Address Shareds,
5147                                    const Expr *IfCond,
5148                                    const OMPTaskDataTy &Data) {
5149   if (!CGF.HaveInsertPoint())
5150     return;
5151 
5152   TaskResultTy Result =
5153       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5154   llvm::Value *NewTask = Result.NewTask;
5155   llvm::Function *TaskEntry = Result.TaskEntry;
5156   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5157   LValue TDBase = Result.TDBase;
5158   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5159   // Process list of dependences.
5160   Address DependenciesArray = Address::invalid();
5161   llvm::Value *NumOfElements;
5162   std::tie(NumOfElements, DependenciesArray) =
5163       emitDependClause(CGF, Data.Dependences, Loc);
5164 
5165   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5166   // libcall.
5167   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5168   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5169   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5170   // list is not empty
5171   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5172   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5173   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5174   llvm::Value *DepTaskArgs[7];
5175   if (!Data.Dependences.empty()) {
5176     DepTaskArgs[0] = UpLoc;
5177     DepTaskArgs[1] = ThreadID;
5178     DepTaskArgs[2] = NewTask;
5179     DepTaskArgs[3] = NumOfElements;
5180     DepTaskArgs[4] = DependenciesArray.getPointer();
5181     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5182     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5183   }
5184   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5185                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5186     if (!Data.Tied) {
5187       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5188       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5189       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5190     }
5191     if (!Data.Dependences.empty()) {
5192       CGF.EmitRuntimeCall(
5193           OMPBuilder.getOrCreateRuntimeFunction(
5194               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5195           DepTaskArgs);
5196     } else {
5197       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5198                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5199                           TaskArgs);
5200     }
5201     // Check if parent region is untied and build return for untied task;
5202     if (auto *Region =
5203             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5204       Region->emitUntiedSwitch(CGF);
5205   };
5206 
5207   llvm::Value *DepWaitTaskArgs[6];
5208   if (!Data.Dependences.empty()) {
5209     DepWaitTaskArgs[0] = UpLoc;
5210     DepWaitTaskArgs[1] = ThreadID;
5211     DepWaitTaskArgs[2] = NumOfElements;
5212     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5213     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5214     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5215   }
5216   auto &M = CGM.getModule();
5217   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5218                         TaskEntry, &Data, &DepWaitTaskArgs,
5219                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5220     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5221     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5222     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5223     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5224     // is specified.
5225     if (!Data.Dependences.empty())
5226       CGF.EmitRuntimeCall(
5227           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5228           DepWaitTaskArgs);
5229     // Call proxy_task_entry(gtid, new_task);
5230     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5231                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5232       Action.Enter(CGF);
5233       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5234       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5235                                                           OutlinedFnArgs);
5236     };
5237 
5238     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5239     // kmp_task_t *new_task);
5240     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5241     // kmp_task_t *new_task);
5242     RegionCodeGenTy RCG(CodeGen);
5243     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5244                               M, OMPRTL___kmpc_omp_task_begin_if0),
5245                           TaskArgs,
5246                           OMPBuilder.getOrCreateRuntimeFunction(
5247                               M, OMPRTL___kmpc_omp_task_complete_if0),
5248                           TaskArgs);
5249     RCG.setAction(Action);
5250     RCG(CGF);
5251   };
5252 
5253   if (IfCond) {
5254     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5255   } else {
5256     RegionCodeGenTy ThenRCG(ThenCodeGen);
5257     ThenRCG(CGF);
5258   }
5259 }
5260 
5261 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5262                                        const OMPLoopDirective &D,
5263                                        llvm::Function *TaskFunction,
5264                                        QualType SharedsTy, Address Shareds,
5265                                        const Expr *IfCond,
5266                                        const OMPTaskDataTy &Data) {
5267   if (!CGF.HaveInsertPoint())
5268     return;
5269   TaskResultTy Result =
5270       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5271   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5272   // libcall.
5273   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5274   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5275   // sched, kmp_uint64 grainsize, void *task_dup);
5276   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5277   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5278   llvm::Value *IfVal;
5279   if (IfCond) {
5280     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5281                                       /*isSigned=*/true);
5282   } else {
5283     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5284   }
5285 
5286   LValue LBLVal = CGF.EmitLValueForField(
5287       Result.TDBase,
5288       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5289   const auto *LBVar =
5290       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5291   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5292                        LBLVal.getQuals(),
5293                        /*IsInitializer=*/true);
5294   LValue UBLVal = CGF.EmitLValueForField(
5295       Result.TDBase,
5296       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5297   const auto *UBVar =
5298       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5299   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5300                        UBLVal.getQuals(),
5301                        /*IsInitializer=*/true);
5302   LValue StLVal = CGF.EmitLValueForField(
5303       Result.TDBase,
5304       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5305   const auto *StVar =
5306       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5307   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5308                        StLVal.getQuals(),
5309                        /*IsInitializer=*/true);
5310   // Store reductions address.
5311   LValue RedLVal = CGF.EmitLValueForField(
5312       Result.TDBase,
5313       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5314   if (Data.Reductions) {
5315     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5316   } else {
5317     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5318                                CGF.getContext().VoidPtrTy);
5319   }
5320   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5321   llvm::Value *TaskArgs[] = {
5322       UpLoc,
5323       ThreadID,
5324       Result.NewTask,
5325       IfVal,
5326       LBLVal.getPointer(CGF),
5327       UBLVal.getPointer(CGF),
5328       CGF.EmitLoadOfScalar(StLVal, Loc),
5329       llvm::ConstantInt::getSigned(
5330           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5331       llvm::ConstantInt::getSigned(
5332           CGF.IntTy, Data.Schedule.getPointer()
5333                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5334                          : NoSchedule),
5335       Data.Schedule.getPointer()
5336           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5337                                       /*isSigned=*/false)
5338           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5339       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5340                              Result.TaskDupFn, CGF.VoidPtrTy)
5341                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5342   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5343                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5344                       TaskArgs);
5345 }
5346 
5347 /// Emit reduction operation for each element of array (required for
5348 /// array sections) LHS op = RHS.
5349 /// \param Type Type of array.
5350 /// \param LHSVar Variable on the left side of the reduction operation
5351 /// (references element of array in original variable).
5352 /// \param RHSVar Variable on the right side of the reduction operation
5353 /// (references element of array in original variable).
5354 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5355 /// RHSVar.
5356 static void EmitOMPAggregateReduction(
5357     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5358     const VarDecl *RHSVar,
5359     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5360                                   const Expr *, const Expr *)> &RedOpGen,
5361     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5362     const Expr *UpExpr = nullptr) {
5363   // Perform element-by-element initialization.
5364   QualType ElementTy;
5365   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5366   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5367 
5368   // Drill down to the base element type on both arrays.
5369   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5370   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5371 
5372   llvm::Value *RHSBegin = RHSAddr.getPointer();
5373   llvm::Value *LHSBegin = LHSAddr.getPointer();
5374   // Cast from pointer to array type to pointer to single element.
5375   llvm::Value *LHSEnd =
5376       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5377   // The basic structure here is a while-do loop.
5378   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5379   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5380   llvm::Value *IsEmpty =
5381       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5382   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5383 
5384   // Enter the loop body, making that address the current address.
5385   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5386   CGF.EmitBlock(BodyBB);
5387 
5388   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5389 
5390   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5391       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5392   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5393   Address RHSElementCurrent =
5394       Address(RHSElementPHI,
5395               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5396 
5397   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5398       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5399   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5400   Address LHSElementCurrent =
5401       Address(LHSElementPHI,
5402               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5403 
5404   // Emit copy.
5405   CodeGenFunction::OMPPrivateScope Scope(CGF);
5406   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5407   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5408   Scope.Privatize();
5409   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5410   Scope.ForceCleanup();
5411 
5412   // Shift the address forward by one element.
5413   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5414       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5415       "omp.arraycpy.dest.element");
5416   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5417       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5418       "omp.arraycpy.src.element");
5419   // Check whether we've reached the end.
5420   llvm::Value *Done =
5421       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5422   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5423   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5424   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5425 
5426   // Done.
5427   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5428 }
5429 
5430 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5431 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5432 /// UDR combiner function.
5433 static void emitReductionCombiner(CodeGenFunction &CGF,
5434                                   const Expr *ReductionOp) {
5435   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5436     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5437       if (const auto *DRE =
5438               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5439         if (const auto *DRD =
5440                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5441           std::pair<llvm::Function *, llvm::Function *> Reduction =
5442               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5443           RValue Func = RValue::get(Reduction.first);
5444           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5445           CGF.EmitIgnoredExpr(ReductionOp);
5446           return;
5447         }
5448   CGF.EmitIgnoredExpr(ReductionOp);
5449 }
5450 
5451 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5452     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5453     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5454     ArrayRef<const Expr *> ReductionOps) {
5455   ASTContext &C = CGM.getContext();
5456 
5457   // void reduction_func(void *LHSArg, void *RHSArg);
5458   FunctionArgList Args;
5459   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5460                            ImplicitParamDecl::Other);
5461   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5462                            ImplicitParamDecl::Other);
5463   Args.push_back(&LHSArg);
5464   Args.push_back(&RHSArg);
5465   const auto &CGFI =
5466       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5467   std::string Name = getName({"omp", "reduction", "reduction_func"});
5468   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5469                                     llvm::GlobalValue::InternalLinkage, Name,
5470                                     &CGM.getModule());
5471   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5472   Fn->setDoesNotRecurse();
5473   CodeGenFunction CGF(CGM);
5474   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5475 
5476   // Dst = (void*[n])(LHSArg);
5477   // Src = (void*[n])(RHSArg);
5478   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5479       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5480       ArgsType), CGF.getPointerAlign());
5481   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5482       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5483       ArgsType), CGF.getPointerAlign());
5484 
5485   //  ...
5486   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5487   //  ...
5488   CodeGenFunction::OMPPrivateScope Scope(CGF);
5489   auto IPriv = Privates.begin();
5490   unsigned Idx = 0;
5491   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5492     const auto *RHSVar =
5493         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5494     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5495       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5496     });
5497     const auto *LHSVar =
5498         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5499     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5500       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5501     });
5502     QualType PrivTy = (*IPriv)->getType();
5503     if (PrivTy->isVariablyModifiedType()) {
5504       // Get array size and emit VLA type.
5505       ++Idx;
5506       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5507       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5508       const VariableArrayType *VLA =
5509           CGF.getContext().getAsVariableArrayType(PrivTy);
5510       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5511       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5512           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5513       CGF.EmitVariablyModifiedType(PrivTy);
5514     }
5515   }
5516   Scope.Privatize();
5517   IPriv = Privates.begin();
5518   auto ILHS = LHSExprs.begin();
5519   auto IRHS = RHSExprs.begin();
5520   for (const Expr *E : ReductionOps) {
5521     if ((*IPriv)->getType()->isArrayType()) {
5522       // Emit reduction for array section.
5523       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5524       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5525       EmitOMPAggregateReduction(
5526           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5527           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5528             emitReductionCombiner(CGF, E);
5529           });
5530     } else {
5531       // Emit reduction for array subscript or single variable.
5532       emitReductionCombiner(CGF, E);
5533     }
5534     ++IPriv;
5535     ++ILHS;
5536     ++IRHS;
5537   }
5538   Scope.ForceCleanup();
5539   CGF.FinishFunction();
5540   return Fn;
5541 }
5542 
5543 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5544                                                   const Expr *ReductionOp,
5545                                                   const Expr *PrivateRef,
5546                                                   const DeclRefExpr *LHS,
5547                                                   const DeclRefExpr *RHS) {
5548   if (PrivateRef->getType()->isArrayType()) {
5549     // Emit reduction for array section.
5550     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5551     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5552     EmitOMPAggregateReduction(
5553         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5554         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5555           emitReductionCombiner(CGF, ReductionOp);
5556         });
5557   } else {
5558     // Emit reduction for array subscript or single variable.
5559     emitReductionCombiner(CGF, ReductionOp);
5560   }
5561 }
5562 
5563 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5564                                     ArrayRef<const Expr *> Privates,
5565                                     ArrayRef<const Expr *> LHSExprs,
5566                                     ArrayRef<const Expr *> RHSExprs,
5567                                     ArrayRef<const Expr *> ReductionOps,
5568                                     ReductionOptionsTy Options) {
5569   if (!CGF.HaveInsertPoint())
5570     return;
5571 
5572   bool WithNowait = Options.WithNowait;
5573   bool SimpleReduction = Options.SimpleReduction;
5574 
5575   // Next code should be emitted for reduction:
5576   //
5577   // static kmp_critical_name lock = { 0 };
5578   //
5579   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5580   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5581   //  ...
5582   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5583   //  *(Type<n>-1*)rhs[<n>-1]);
5584   // }
5585   //
5586   // ...
5587   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5588   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5589   // RedList, reduce_func, &<lock>)) {
5590   // case 1:
5591   //  ...
5592   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5593   //  ...
5594   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5595   // break;
5596   // case 2:
5597   //  ...
5598   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5599   //  ...
5600   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5601   // break;
5602   // default:;
5603   // }
5604   //
5605   // if SimpleReduction is true, only the next code is generated:
5606   //  ...
5607   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5608   //  ...
5609 
5610   ASTContext &C = CGM.getContext();
5611 
5612   if (SimpleReduction) {
5613     CodeGenFunction::RunCleanupsScope Scope(CGF);
5614     auto IPriv = Privates.begin();
5615     auto ILHS = LHSExprs.begin();
5616     auto IRHS = RHSExprs.begin();
5617     for (const Expr *E : ReductionOps) {
5618       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5619                                   cast<DeclRefExpr>(*IRHS));
5620       ++IPriv;
5621       ++ILHS;
5622       ++IRHS;
5623     }
5624     return;
5625   }
5626 
5627   // 1. Build a list of reduction variables.
5628   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5629   auto Size = RHSExprs.size();
5630   for (const Expr *E : Privates) {
5631     if (E->getType()->isVariablyModifiedType())
5632       // Reserve place for array size.
5633       ++Size;
5634   }
5635   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5636   QualType ReductionArrayTy =
5637       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5638                              /*IndexTypeQuals=*/0);
5639   Address ReductionList =
5640       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5641   auto IPriv = Privates.begin();
5642   unsigned Idx = 0;
5643   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5644     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5645     CGF.Builder.CreateStore(
5646         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5647             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5648         Elem);
5649     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5650       // Store array size.
5651       ++Idx;
5652       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5653       llvm::Value *Size = CGF.Builder.CreateIntCast(
5654           CGF.getVLASize(
5655                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5656               .NumElts,
5657           CGF.SizeTy, /*isSigned=*/false);
5658       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5659                               Elem);
5660     }
5661   }
5662 
5663   // 2. Emit reduce_func().
5664   llvm::Function *ReductionFn = emitReductionFunction(
5665       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5666       LHSExprs, RHSExprs, ReductionOps);
5667 
5668   // 3. Create static kmp_critical_name lock = { 0 };
5669   std::string Name = getName({"reduction"});
5670   llvm::Value *Lock = getCriticalRegionLock(Name);
5671 
5672   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5673   // RedList, reduce_func, &<lock>);
5674   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5675   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5676   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5677   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5678       ReductionList.getPointer(), CGF.VoidPtrTy);
5679   llvm::Value *Args[] = {
5680       IdentTLoc,                             // ident_t *<loc>
5681       ThreadId,                              // i32 <gtid>
5682       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5683       ReductionArrayTySize,                  // size_type sizeof(RedList)
5684       RL,                                    // void *RedList
5685       ReductionFn, // void (*) (void *, void *) <reduce_func>
5686       Lock         // kmp_critical_name *&<lock>
5687   };
5688   llvm::Value *Res = CGF.EmitRuntimeCall(
5689       OMPBuilder.getOrCreateRuntimeFunction(
5690           CGM.getModule(),
5691           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5692       Args);
5693 
5694   // 5. Build switch(res)
5695   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5696   llvm::SwitchInst *SwInst =
5697       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5698 
5699   // 6. Build case 1:
5700   //  ...
5701   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5702   //  ...
5703   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5704   // break;
5705   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5706   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5707   CGF.EmitBlock(Case1BB);
5708 
5709   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5710   llvm::Value *EndArgs[] = {
5711       IdentTLoc, // ident_t *<loc>
5712       ThreadId,  // i32 <gtid>
5713       Lock       // kmp_critical_name *&<lock>
5714   };
5715   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5716                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5717     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5718     auto IPriv = Privates.begin();
5719     auto ILHS = LHSExprs.begin();
5720     auto IRHS = RHSExprs.begin();
5721     for (const Expr *E : ReductionOps) {
5722       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5723                                      cast<DeclRefExpr>(*IRHS));
5724       ++IPriv;
5725       ++ILHS;
5726       ++IRHS;
5727     }
5728   };
5729   RegionCodeGenTy RCG(CodeGen);
5730   CommonActionTy Action(
5731       nullptr, llvm::None,
5732       OMPBuilder.getOrCreateRuntimeFunction(
5733           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5734                                       : OMPRTL___kmpc_end_reduce),
5735       EndArgs);
5736   RCG.setAction(Action);
5737   RCG(CGF);
5738 
5739   CGF.EmitBranch(DefaultBB);
5740 
5741   // 7. Build case 2:
5742   //  ...
5743   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5744   //  ...
5745   // break;
5746   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5747   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5748   CGF.EmitBlock(Case2BB);
5749 
5750   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5751                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5752     auto ILHS = LHSExprs.begin();
5753     auto IRHS = RHSExprs.begin();
5754     auto IPriv = Privates.begin();
5755     for (const Expr *E : ReductionOps) {
5756       const Expr *XExpr = nullptr;
5757       const Expr *EExpr = nullptr;
5758       const Expr *UpExpr = nullptr;
5759       BinaryOperatorKind BO = BO_Comma;
5760       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5761         if (BO->getOpcode() == BO_Assign) {
5762           XExpr = BO->getLHS();
5763           UpExpr = BO->getRHS();
5764         }
5765       }
5766       // Try to emit update expression as a simple atomic.
5767       const Expr *RHSExpr = UpExpr;
5768       if (RHSExpr) {
5769         // Analyze RHS part of the whole expression.
5770         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5771                 RHSExpr->IgnoreParenImpCasts())) {
5772           // If this is a conditional operator, analyze its condition for
5773           // min/max reduction operator.
5774           RHSExpr = ACO->getCond();
5775         }
5776         if (const auto *BORHS =
5777                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5778           EExpr = BORHS->getRHS();
5779           BO = BORHS->getOpcode();
5780         }
5781       }
5782       if (XExpr) {
5783         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5784         auto &&AtomicRedGen = [BO, VD,
5785                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5786                                     const Expr *EExpr, const Expr *UpExpr) {
5787           LValue X = CGF.EmitLValue(XExpr);
5788           RValue E;
5789           if (EExpr)
5790             E = CGF.EmitAnyExpr(EExpr);
5791           CGF.EmitOMPAtomicSimpleUpdateExpr(
5792               X, E, BO, /*IsXLHSInRHSPart=*/true,
5793               llvm::AtomicOrdering::Monotonic, Loc,
5794               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5795                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5796                 PrivateScope.addPrivate(
5797                     VD, [&CGF, VD, XRValue, Loc]() {
5798                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5799                       CGF.emitOMPSimpleStore(
5800                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5801                           VD->getType().getNonReferenceType(), Loc);
5802                       return LHSTemp;
5803                     });
5804                 (void)PrivateScope.Privatize();
5805                 return CGF.EmitAnyExpr(UpExpr);
5806               });
5807         };
5808         if ((*IPriv)->getType()->isArrayType()) {
5809           // Emit atomic reduction for array section.
5810           const auto *RHSVar =
5811               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5812           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5813                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5814         } else {
5815           // Emit atomic reduction for array subscript or single variable.
5816           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5817         }
5818       } else {
5819         // Emit as a critical region.
5820         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5821                                            const Expr *, const Expr *) {
5822           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5823           std::string Name = RT.getName({"atomic_reduction"});
5824           RT.emitCriticalRegion(
5825               CGF, Name,
5826               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5827                 Action.Enter(CGF);
5828                 emitReductionCombiner(CGF, E);
5829               },
5830               Loc);
5831         };
5832         if ((*IPriv)->getType()->isArrayType()) {
5833           const auto *LHSVar =
5834               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5835           const auto *RHSVar =
5836               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5837           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5838                                     CritRedGen);
5839         } else {
5840           CritRedGen(CGF, nullptr, nullptr, nullptr);
5841         }
5842       }
5843       ++ILHS;
5844       ++IRHS;
5845       ++IPriv;
5846     }
5847   };
5848   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5849   if (!WithNowait) {
5850     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5851     llvm::Value *EndArgs[] = {
5852         IdentTLoc, // ident_t *<loc>
5853         ThreadId,  // i32 <gtid>
5854         Lock       // kmp_critical_name *&<lock>
5855     };
5856     CommonActionTy Action(nullptr, llvm::None,
5857                           OMPBuilder.getOrCreateRuntimeFunction(
5858                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5859                           EndArgs);
5860     AtomicRCG.setAction(Action);
5861     AtomicRCG(CGF);
5862   } else {
5863     AtomicRCG(CGF);
5864   }
5865 
5866   CGF.EmitBranch(DefaultBB);
5867   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5868 }
5869 
5870 /// Generates unique name for artificial threadprivate variables.
5871 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5872 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5873                                       const Expr *Ref) {
5874   SmallString<256> Buffer;
5875   llvm::raw_svector_ostream Out(Buffer);
5876   const clang::DeclRefExpr *DE;
5877   const VarDecl *D = ::getBaseDecl(Ref, DE);
5878   if (!D)
5879     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5880   D = D->getCanonicalDecl();
5881   std::string Name = CGM.getOpenMPRuntime().getName(
5882       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5883   Out << Prefix << Name << "_"
5884       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5885   return std::string(Out.str());
5886 }
5887 
5888 /// Emits reduction initializer function:
5889 /// \code
5890 /// void @.red_init(void* %arg, void* %orig) {
5891 /// %0 = bitcast void* %arg to <type>*
5892 /// store <type> <init>, <type>* %0
5893 /// ret void
5894 /// }
5895 /// \endcode
5896 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5897                                            SourceLocation Loc,
5898                                            ReductionCodeGen &RCG, unsigned N) {
5899   ASTContext &C = CGM.getContext();
5900   QualType VoidPtrTy = C.VoidPtrTy;
5901   VoidPtrTy.addRestrict();
5902   FunctionArgList Args;
5903   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5904                           ImplicitParamDecl::Other);
5905   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5906                               ImplicitParamDecl::Other);
5907   Args.emplace_back(&Param);
5908   Args.emplace_back(&ParamOrig);
5909   const auto &FnInfo =
5910       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5911   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5912   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5913   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5914                                     Name, &CGM.getModule());
5915   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5916   Fn->setDoesNotRecurse();
5917   CodeGenFunction CGF(CGM);
5918   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5919   Address PrivateAddr = CGF.EmitLoadOfPointer(
5920       CGF.GetAddrOfLocalVar(&Param),
5921       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5922   llvm::Value *Size = nullptr;
5923   // If the size of the reduction item is non-constant, load it from global
5924   // threadprivate variable.
5925   if (RCG.getSizes(N).second) {
5926     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5927         CGF, CGM.getContext().getSizeType(),
5928         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5929     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5930                                 CGM.getContext().getSizeType(), Loc);
5931   }
5932   RCG.emitAggregateType(CGF, N, Size);
5933   LValue OrigLVal;
5934   // If initializer uses initializer from declare reduction construct, emit a
5935   // pointer to the address of the original reduction item (reuired by reduction
5936   // initializer)
5937   if (RCG.usesReductionInitializer(N)) {
5938     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5939     SharedAddr = CGF.EmitLoadOfPointer(
5940         SharedAddr,
5941         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5942     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5943   } else {
5944     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5945         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5946         CGM.getContext().VoidPtrTy);
5947   }
5948   // Emit the initializer:
5949   // %0 = bitcast void* %arg to <type>*
5950   // store <type> <init>, <type>* %0
5951   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5952                          [](CodeGenFunction &) { return false; });
5953   CGF.FinishFunction();
5954   return Fn;
5955 }
5956 
5957 /// Emits reduction combiner function:
5958 /// \code
5959 /// void @.red_comb(void* %arg0, void* %arg1) {
5960 /// %lhs = bitcast void* %arg0 to <type>*
5961 /// %rhs = bitcast void* %arg1 to <type>*
5962 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5963 /// store <type> %2, <type>* %lhs
5964 /// ret void
5965 /// }
5966 /// \endcode
5967 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5968                                            SourceLocation Loc,
5969                                            ReductionCodeGen &RCG, unsigned N,
5970                                            const Expr *ReductionOp,
5971                                            const Expr *LHS, const Expr *RHS,
5972                                            const Expr *PrivateRef) {
5973   ASTContext &C = CGM.getContext();
5974   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5975   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5976   FunctionArgList Args;
5977   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5978                                C.VoidPtrTy, ImplicitParamDecl::Other);
5979   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5980                             ImplicitParamDecl::Other);
5981   Args.emplace_back(&ParamInOut);
5982   Args.emplace_back(&ParamIn);
5983   const auto &FnInfo =
5984       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5985   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5986   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5987   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5988                                     Name, &CGM.getModule());
5989   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5990   Fn->setDoesNotRecurse();
5991   CodeGenFunction CGF(CGM);
5992   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5993   llvm::Value *Size = nullptr;
5994   // If the size of the reduction item is non-constant, load it from global
5995   // threadprivate variable.
5996   if (RCG.getSizes(N).second) {
5997     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5998         CGF, CGM.getContext().getSizeType(),
5999         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6000     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6001                                 CGM.getContext().getSizeType(), Loc);
6002   }
6003   RCG.emitAggregateType(CGF, N, Size);
6004   // Remap lhs and rhs variables to the addresses of the function arguments.
6005   // %lhs = bitcast void* %arg0 to <type>*
6006   // %rhs = bitcast void* %arg1 to <type>*
6007   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6008   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6009     // Pull out the pointer to the variable.
6010     Address PtrAddr = CGF.EmitLoadOfPointer(
6011         CGF.GetAddrOfLocalVar(&ParamInOut),
6012         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6013     return CGF.Builder.CreateElementBitCast(
6014         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6015   });
6016   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6017     // Pull out the pointer to the variable.
6018     Address PtrAddr = CGF.EmitLoadOfPointer(
6019         CGF.GetAddrOfLocalVar(&ParamIn),
6020         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6021     return CGF.Builder.CreateElementBitCast(
6022         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6023   });
6024   PrivateScope.Privatize();
6025   // Emit the combiner body:
6026   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6027   // store <type> %2, <type>* %lhs
6028   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6029       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6030       cast<DeclRefExpr>(RHS));
6031   CGF.FinishFunction();
6032   return Fn;
6033 }
6034 
6035 /// Emits reduction finalizer function:
6036 /// \code
6037 /// void @.red_fini(void* %arg) {
6038 /// %0 = bitcast void* %arg to <type>*
6039 /// <destroy>(<type>* %0)
6040 /// ret void
6041 /// }
6042 /// \endcode
6043 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6044                                            SourceLocation Loc,
6045                                            ReductionCodeGen &RCG, unsigned N) {
6046   if (!RCG.needCleanups(N))
6047     return nullptr;
6048   ASTContext &C = CGM.getContext();
6049   FunctionArgList Args;
6050   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6051                           ImplicitParamDecl::Other);
6052   Args.emplace_back(&Param);
6053   const auto &FnInfo =
6054       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6055   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6056   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6057   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6058                                     Name, &CGM.getModule());
6059   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6060   Fn->setDoesNotRecurse();
6061   CodeGenFunction CGF(CGM);
6062   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6063   Address PrivateAddr = CGF.EmitLoadOfPointer(
6064       CGF.GetAddrOfLocalVar(&Param),
6065       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6066   llvm::Value *Size = nullptr;
6067   // If the size of the reduction item is non-constant, load it from global
6068   // threadprivate variable.
6069   if (RCG.getSizes(N).second) {
6070     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6071         CGF, CGM.getContext().getSizeType(),
6072         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6073     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6074                                 CGM.getContext().getSizeType(), Loc);
6075   }
6076   RCG.emitAggregateType(CGF, N, Size);
6077   // Emit the finalizer body:
6078   // <destroy>(<type>* %0)
6079   RCG.emitCleanups(CGF, N, PrivateAddr);
6080   CGF.FinishFunction(Loc);
6081   return Fn;
6082 }
6083 
6084 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6085     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6086     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6087   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6088     return nullptr;
6089 
6090   // Build typedef struct:
6091   // kmp_taskred_input {
6092   //   void *reduce_shar; // shared reduction item
6093   //   void *reduce_orig; // original reduction item used for initialization
6094   //   size_t reduce_size; // size of data item
6095   //   void *reduce_init; // data initialization routine
6096   //   void *reduce_fini; // data finalization routine
6097   //   void *reduce_comb; // data combiner routine
6098   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6099   // } kmp_taskred_input_t;
6100   ASTContext &C = CGM.getContext();
6101   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6102   RD->startDefinition();
6103   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6104   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6105   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6106   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6107   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6108   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6109   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6110       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6111   RD->completeDefinition();
6112   QualType RDType = C.getRecordType(RD);
6113   unsigned Size = Data.ReductionVars.size();
6114   llvm::APInt ArraySize(/*numBits=*/64, Size);
6115   QualType ArrayRDType = C.getConstantArrayType(
6116       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6117   // kmp_task_red_input_t .rd_input.[Size];
6118   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6119   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6120                        Data.ReductionCopies, Data.ReductionOps);
6121   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6122     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6123     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6124                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6125     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6126         TaskRedInput.getPointer(), Idxs,
6127         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6128         ".rd_input.gep.");
6129     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6130     // ElemLVal.reduce_shar = &Shareds[Cnt];
6131     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6132     RCG.emitSharedOrigLValue(CGF, Cnt);
6133     llvm::Value *CastedShared =
6134         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6135     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6136     // ElemLVal.reduce_orig = &Origs[Cnt];
6137     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6138     llvm::Value *CastedOrig =
6139         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6140     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6141     RCG.emitAggregateType(CGF, Cnt);
6142     llvm::Value *SizeValInChars;
6143     llvm::Value *SizeVal;
6144     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6145     // We use delayed creation/initialization for VLAs and array sections. It is
6146     // required because runtime does not provide the way to pass the sizes of
6147     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6148     // threadprivate global variables are used to store these values and use
6149     // them in the functions.
6150     bool DelayedCreation = !!SizeVal;
6151     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6152                                                /*isSigned=*/false);
6153     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6154     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6155     // ElemLVal.reduce_init = init;
6156     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6157     llvm::Value *InitAddr =
6158         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6159     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6160     // ElemLVal.reduce_fini = fini;
6161     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6162     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6163     llvm::Value *FiniAddr = Fini
6164                                 ? CGF.EmitCastToVoidPtr(Fini)
6165                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6166     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6167     // ElemLVal.reduce_comb = comb;
6168     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6169     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6170         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6171         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6172     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6173     // ElemLVal.flags = 0;
6174     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6175     if (DelayedCreation) {
6176       CGF.EmitStoreOfScalar(
6177           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6178           FlagsLVal);
6179     } else
6180       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6181                                  FlagsLVal.getType());
6182   }
6183   if (Data.IsReductionWithTaskMod) {
6184     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6185     // is_ws, int num, void *data);
6186     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6187     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6188                                                   CGM.IntTy, /*isSigned=*/true);
6189     llvm::Value *Args[] = {
6190         IdentTLoc, GTid,
6191         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6192                                /*isSigned=*/true),
6193         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6194         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6195             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6196     return CGF.EmitRuntimeCall(
6197         OMPBuilder.getOrCreateRuntimeFunction(
6198             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6199         Args);
6200   }
6201   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6202   llvm::Value *Args[] = {
6203       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6204                                 /*isSigned=*/true),
6205       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6206       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6207                                                       CGM.VoidPtrTy)};
6208   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6209                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6210                              Args);
6211 }
6212 
6213 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6214                                             SourceLocation Loc,
6215                                             bool IsWorksharingReduction) {
6216   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6217   // is_ws, int num, void *data);
6218   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6219   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6220                                                 CGM.IntTy, /*isSigned=*/true);
6221   llvm::Value *Args[] = {IdentTLoc, GTid,
6222                          llvm::ConstantInt::get(CGM.IntTy,
6223                                                 IsWorksharingReduction ? 1 : 0,
6224                                                 /*isSigned=*/true)};
6225   (void)CGF.EmitRuntimeCall(
6226       OMPBuilder.getOrCreateRuntimeFunction(
6227           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6228       Args);
6229 }
6230 
6231 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6232                                               SourceLocation Loc,
6233                                               ReductionCodeGen &RCG,
6234                                               unsigned N) {
6235   auto Sizes = RCG.getSizes(N);
6236   // Emit threadprivate global variable if the type is non-constant
6237   // (Sizes.second = nullptr).
6238   if (Sizes.second) {
6239     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6240                                                      /*isSigned=*/false);
6241     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6242         CGF, CGM.getContext().getSizeType(),
6243         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6244     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6245   }
6246 }
6247 
6248 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6249                                               SourceLocation Loc,
6250                                               llvm::Value *ReductionsPtr,
6251                                               LValue SharedLVal) {
6252   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6253   // *d);
6254   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6255                                                    CGM.IntTy,
6256                                                    /*isSigned=*/true),
6257                          ReductionsPtr,
6258                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6259                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6260   return Address(
6261       CGF.EmitRuntimeCall(
6262           OMPBuilder.getOrCreateRuntimeFunction(
6263               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6264           Args),
6265       SharedLVal.getAlignment());
6266 }
6267 
6268 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6269                                        const OMPTaskDataTy &Data) {
6270   if (!CGF.HaveInsertPoint())
6271     return;
6272 
6273   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6274     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6275     OMPBuilder.createTaskwait(CGF.Builder);
6276   } else {
6277     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6278     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6279     auto &M = CGM.getModule();
6280     Address DependenciesArray = Address::invalid();
6281     llvm::Value *NumOfElements;
6282     std::tie(NumOfElements, DependenciesArray) =
6283         emitDependClause(CGF, Data.Dependences, Loc);
6284     llvm::Value *DepWaitTaskArgs[6];
6285     if (!Data.Dependences.empty()) {
6286       DepWaitTaskArgs[0] = UpLoc;
6287       DepWaitTaskArgs[1] = ThreadID;
6288       DepWaitTaskArgs[2] = NumOfElements;
6289       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6290       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6291       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6292 
6293       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6294 
6295       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6296       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6297       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6298       // is specified.
6299       CGF.EmitRuntimeCall(
6300           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6301           DepWaitTaskArgs);
6302 
6303     } else {
6304 
6305       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6306       // global_tid);
6307       llvm::Value *Args[] = {UpLoc, ThreadID};
6308       // Ignore return result until untied tasks are supported.
6309       CGF.EmitRuntimeCall(
6310           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6311           Args);
6312     }
6313   }
6314 
6315   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6316     Region->emitUntiedSwitch(CGF);
6317 }
6318 
6319 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6320                                            OpenMPDirectiveKind InnerKind,
6321                                            const RegionCodeGenTy &CodeGen,
6322                                            bool HasCancel) {
6323   if (!CGF.HaveInsertPoint())
6324     return;
6325   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6326                                  InnerKind != OMPD_critical &&
6327                                      InnerKind != OMPD_master &&
6328                                      InnerKind != OMPD_masked);
6329   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6330 }
6331 
6332 namespace {
6333 enum RTCancelKind {
6334   CancelNoreq = 0,
6335   CancelParallel = 1,
6336   CancelLoop = 2,
6337   CancelSections = 3,
6338   CancelTaskgroup = 4
6339 };
6340 } // anonymous namespace
6341 
6342 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6343   RTCancelKind CancelKind = CancelNoreq;
6344   if (CancelRegion == OMPD_parallel)
6345     CancelKind = CancelParallel;
6346   else if (CancelRegion == OMPD_for)
6347     CancelKind = CancelLoop;
6348   else if (CancelRegion == OMPD_sections)
6349     CancelKind = CancelSections;
6350   else {
6351     assert(CancelRegion == OMPD_taskgroup);
6352     CancelKind = CancelTaskgroup;
6353   }
6354   return CancelKind;
6355 }
6356 
6357 void CGOpenMPRuntime::emitCancellationPointCall(
6358     CodeGenFunction &CGF, SourceLocation Loc,
6359     OpenMPDirectiveKind CancelRegion) {
6360   if (!CGF.HaveInsertPoint())
6361     return;
6362   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6363   // global_tid, kmp_int32 cncl_kind);
6364   if (auto *OMPRegionInfo =
6365           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6366     // For 'cancellation point taskgroup', the task region info may not have a
6367     // cancel. This may instead happen in another adjacent task.
6368     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6369       llvm::Value *Args[] = {
6370           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6371           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6372       // Ignore return result until untied tasks are supported.
6373       llvm::Value *Result = CGF.EmitRuntimeCall(
6374           OMPBuilder.getOrCreateRuntimeFunction(
6375               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6376           Args);
6377       // if (__kmpc_cancellationpoint()) {
6378       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6379       //   exit from construct;
6380       // }
6381       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6382       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6383       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6384       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6385       CGF.EmitBlock(ExitBB);
6386       if (CancelRegion == OMPD_parallel)
6387         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6388       // exit from construct;
6389       CodeGenFunction::JumpDest CancelDest =
6390           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6391       CGF.EmitBranchThroughCleanup(CancelDest);
6392       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6393     }
6394   }
6395 }
6396 
6397 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6398                                      const Expr *IfCond,
6399                                      OpenMPDirectiveKind CancelRegion) {
6400   if (!CGF.HaveInsertPoint())
6401     return;
6402   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6403   // kmp_int32 cncl_kind);
6404   auto &M = CGM.getModule();
6405   if (auto *OMPRegionInfo =
6406           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6407     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6408                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6409       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6410       llvm::Value *Args[] = {
6411           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6412           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6413       // Ignore return result until untied tasks are supported.
6414       llvm::Value *Result = CGF.EmitRuntimeCall(
6415           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6416       // if (__kmpc_cancel()) {
6417       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6418       //   exit from construct;
6419       // }
6420       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6421       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6422       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6423       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6424       CGF.EmitBlock(ExitBB);
6425       if (CancelRegion == OMPD_parallel)
6426         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6427       // exit from construct;
6428       CodeGenFunction::JumpDest CancelDest =
6429           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6430       CGF.EmitBranchThroughCleanup(CancelDest);
6431       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6432     };
6433     if (IfCond) {
6434       emitIfClause(CGF, IfCond, ThenGen,
6435                    [](CodeGenFunction &, PrePostActionTy &) {});
6436     } else {
6437       RegionCodeGenTy ThenRCG(ThenGen);
6438       ThenRCG(CGF);
6439     }
6440   }
6441 }
6442 
6443 namespace {
6444 /// Cleanup action for uses_allocators support.
6445 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6446   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6447 
6448 public:
6449   OMPUsesAllocatorsActionTy(
6450       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6451       : Allocators(Allocators) {}
6452   void Enter(CodeGenFunction &CGF) override {
6453     if (!CGF.HaveInsertPoint())
6454       return;
6455     for (const auto &AllocatorData : Allocators) {
6456       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6457           CGF, AllocatorData.first, AllocatorData.second);
6458     }
6459   }
6460   void Exit(CodeGenFunction &CGF) override {
6461     if (!CGF.HaveInsertPoint())
6462       return;
6463     for (const auto &AllocatorData : Allocators) {
6464       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6465                                                         AllocatorData.first);
6466     }
6467   }
6468 };
6469 } // namespace
6470 
6471 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6472     const OMPExecutableDirective &D, StringRef ParentName,
6473     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6474     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6475   assert(!ParentName.empty() && "Invalid target region parent name!");
6476   HasEmittedTargetRegion = true;
6477   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6478   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6479     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6480       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6481       if (!D.AllocatorTraits)
6482         continue;
6483       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6484     }
6485   }
6486   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6487   CodeGen.setAction(UsesAllocatorAction);
6488   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6489                                    IsOffloadEntry, CodeGen);
6490 }
6491 
6492 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6493                                              const Expr *Allocator,
6494                                              const Expr *AllocatorTraits) {
6495   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6496   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6497   // Use default memspace handle.
6498   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6499   llvm::Value *NumTraits = llvm::ConstantInt::get(
6500       CGF.IntTy, cast<ConstantArrayType>(
6501                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6502                      ->getSize()
6503                      .getLimitedValue());
6504   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6505   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6506       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6507   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6508                                            AllocatorTraitsLVal.getBaseInfo(),
6509                                            AllocatorTraitsLVal.getTBAAInfo());
6510   llvm::Value *Traits =
6511       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6512 
6513   llvm::Value *AllocatorVal =
6514       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6515                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6516                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6517   // Store to allocator.
6518   CGF.EmitVarDecl(*cast<VarDecl>(
6519       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6520   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6521   AllocatorVal =
6522       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6523                                Allocator->getType(), Allocator->getExprLoc());
6524   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6525 }
6526 
6527 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6528                                              const Expr *Allocator) {
6529   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6530   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6531   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6532   llvm::Value *AllocatorVal =
6533       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6534   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6535                                           CGF.getContext().VoidPtrTy,
6536                                           Allocator->getExprLoc());
6537   (void)CGF.EmitRuntimeCall(
6538       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6539                                             OMPRTL___kmpc_destroy_allocator),
6540       {ThreadId, AllocatorVal});
6541 }
6542 
6543 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6544     const OMPExecutableDirective &D, StringRef ParentName,
6545     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6546     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6547   // Create a unique name for the entry function using the source location
6548   // information of the current target region. The name will be something like:
6549   //
6550   // __omp_offloading_DD_FFFF_PP_lBB
6551   //
6552   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6553   // mangled name of the function that encloses the target region and BB is the
6554   // line number of the target region.
6555 
6556   unsigned DeviceID;
6557   unsigned FileID;
6558   unsigned Line;
6559   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6560                            Line);
6561   SmallString<64> EntryFnName;
6562   {
6563     llvm::raw_svector_ostream OS(EntryFnName);
6564     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6565        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6566   }
6567 
6568   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6569 
6570   CodeGenFunction CGF(CGM, true);
6571   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6572   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6573 
6574   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6575 
6576   // If this target outline function is not an offload entry, we don't need to
6577   // register it.
6578   if (!IsOffloadEntry)
6579     return;
6580 
6581   // The target region ID is used by the runtime library to identify the current
6582   // target region, so it only has to be unique and not necessarily point to
6583   // anything. It could be the pointer to the outlined function that implements
6584   // the target region, but we aren't using that so that the compiler doesn't
6585   // need to keep that, and could therefore inline the host function if proven
6586   // worthwhile during optimization. In the other hand, if emitting code for the
6587   // device, the ID has to be the function address so that it can retrieved from
6588   // the offloading entry and launched by the runtime library. We also mark the
6589   // outlined function to have external linkage in case we are emitting code for
6590   // the device, because these functions will be entry points to the device.
6591 
6592   if (CGM.getLangOpts().OpenMPIsDevice) {
6593     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6594     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6595     OutlinedFn->setDSOLocal(false);
6596     if (CGM.getTriple().isAMDGCN())
6597       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6598   } else {
6599     std::string Name = getName({EntryFnName, "region_id"});
6600     OutlinedFnID = new llvm::GlobalVariable(
6601         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6602         llvm::GlobalValue::WeakAnyLinkage,
6603         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6604   }
6605 
6606   // Register the information for the entry associated with this target region.
6607   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6608       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6609       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6610 
6611   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6612   int32_t DefaultValTeams = -1;
6613   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6614   if (DefaultValTeams > 0) {
6615     OutlinedFn->addFnAttr("omp_target_num_teams",
6616                           std::to_string(DefaultValTeams));
6617   }
6618   int32_t DefaultValThreads = -1;
6619   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6620   if (DefaultValThreads > 0) {
6621     OutlinedFn->addFnAttr("omp_target_thread_limit",
6622                           std::to_string(DefaultValThreads));
6623   }
6624 
6625   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6626 }
6627 
6628 /// Checks if the expression is constant or does not have non-trivial function
6629 /// calls.
6630 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6631   // We can skip constant expressions.
6632   // We can skip expressions with trivial calls or simple expressions.
6633   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6634           !E->hasNonTrivialCall(Ctx)) &&
6635          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6636 }
6637 
6638 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6639                                                     const Stmt *Body) {
6640   const Stmt *Child = Body->IgnoreContainers();
6641   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6642     Child = nullptr;
6643     for (const Stmt *S : C->body()) {
6644       if (const auto *E = dyn_cast<Expr>(S)) {
6645         if (isTrivial(Ctx, E))
6646           continue;
6647       }
6648       // Some of the statements can be ignored.
6649       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6650           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6651         continue;
6652       // Analyze declarations.
6653       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6654         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6655               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6656                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6657                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6658                   isa<UsingDirectiveDecl>(D) ||
6659                   isa<OMPDeclareReductionDecl>(D) ||
6660                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6661                 return true;
6662               const auto *VD = dyn_cast<VarDecl>(D);
6663               if (!VD)
6664                 return false;
6665               return VD->hasGlobalStorage() || !VD->isUsed();
6666             }))
6667           continue;
6668       }
6669       // Found multiple children - cannot get the one child only.
6670       if (Child)
6671         return nullptr;
6672       Child = S;
6673     }
6674     if (Child)
6675       Child = Child->IgnoreContainers();
6676   }
6677   return Child;
6678 }
6679 
6680 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6681     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6682     int32_t &DefaultVal) {
6683 
6684   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6685   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6686          "Expected target-based executable directive.");
6687   switch (DirectiveKind) {
6688   case OMPD_target: {
6689     const auto *CS = D.getInnermostCapturedStmt();
6690     const auto *Body =
6691         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6692     const Stmt *ChildStmt =
6693         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6694     if (const auto *NestedDir =
6695             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6696       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6697         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6698           const Expr *NumTeams =
6699               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6700           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6701             if (auto Constant =
6702                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6703               DefaultVal = Constant->getExtValue();
6704           return NumTeams;
6705         }
6706         DefaultVal = 0;
6707         return nullptr;
6708       }
6709       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6710           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6711         DefaultVal = 1;
6712         return nullptr;
6713       }
6714       DefaultVal = 1;
6715       return nullptr;
6716     }
6717     // A value of -1 is used to check if we need to emit no teams region
6718     DefaultVal = -1;
6719     return nullptr;
6720   }
6721   case OMPD_target_teams:
6722   case OMPD_target_teams_distribute:
6723   case OMPD_target_teams_distribute_simd:
6724   case OMPD_target_teams_distribute_parallel_for:
6725   case OMPD_target_teams_distribute_parallel_for_simd: {
6726     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6727       const Expr *NumTeams =
6728           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6729       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6730         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6731           DefaultVal = Constant->getExtValue();
6732       return NumTeams;
6733     }
6734     DefaultVal = 0;
6735     return nullptr;
6736   }
6737   case OMPD_target_parallel:
6738   case OMPD_target_parallel_for:
6739   case OMPD_target_parallel_for_simd:
6740   case OMPD_target_simd:
6741     DefaultVal = 1;
6742     return nullptr;
6743   case OMPD_parallel:
6744   case OMPD_for:
6745   case OMPD_parallel_for:
6746   case OMPD_parallel_master:
6747   case OMPD_parallel_sections:
6748   case OMPD_for_simd:
6749   case OMPD_parallel_for_simd:
6750   case OMPD_cancel:
6751   case OMPD_cancellation_point:
6752   case OMPD_ordered:
6753   case OMPD_threadprivate:
6754   case OMPD_allocate:
6755   case OMPD_task:
6756   case OMPD_simd:
6757   case OMPD_tile:
6758   case OMPD_unroll:
6759   case OMPD_sections:
6760   case OMPD_section:
6761   case OMPD_single:
6762   case OMPD_master:
6763   case OMPD_critical:
6764   case OMPD_taskyield:
6765   case OMPD_barrier:
6766   case OMPD_taskwait:
6767   case OMPD_taskgroup:
6768   case OMPD_atomic:
6769   case OMPD_flush:
6770   case OMPD_depobj:
6771   case OMPD_scan:
6772   case OMPD_teams:
6773   case OMPD_target_data:
6774   case OMPD_target_exit_data:
6775   case OMPD_target_enter_data:
6776   case OMPD_distribute:
6777   case OMPD_distribute_simd:
6778   case OMPD_distribute_parallel_for:
6779   case OMPD_distribute_parallel_for_simd:
6780   case OMPD_teams_distribute:
6781   case OMPD_teams_distribute_simd:
6782   case OMPD_teams_distribute_parallel_for:
6783   case OMPD_teams_distribute_parallel_for_simd:
6784   case OMPD_target_update:
6785   case OMPD_declare_simd:
6786   case OMPD_declare_variant:
6787   case OMPD_begin_declare_variant:
6788   case OMPD_end_declare_variant:
6789   case OMPD_declare_target:
6790   case OMPD_end_declare_target:
6791   case OMPD_declare_reduction:
6792   case OMPD_declare_mapper:
6793   case OMPD_taskloop:
6794   case OMPD_taskloop_simd:
6795   case OMPD_master_taskloop:
6796   case OMPD_master_taskloop_simd:
6797   case OMPD_parallel_master_taskloop:
6798   case OMPD_parallel_master_taskloop_simd:
6799   case OMPD_requires:
6800   case OMPD_metadirective:
6801   case OMPD_unknown:
6802     break;
6803   default:
6804     break;
6805   }
6806   llvm_unreachable("Unexpected directive kind.");
6807 }
6808 
6809 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6810     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6811   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6812          "Clauses associated with the teams directive expected to be emitted "
6813          "only for the host!");
6814   CGBuilderTy &Bld = CGF.Builder;
6815   int32_t DefaultNT = -1;
6816   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6817   if (NumTeams != nullptr) {
6818     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6819 
6820     switch (DirectiveKind) {
6821     case OMPD_target: {
6822       const auto *CS = D.getInnermostCapturedStmt();
6823       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6824       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6825       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6826                                                   /*IgnoreResultAssign*/ true);
6827       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6828                              /*isSigned=*/true);
6829     }
6830     case OMPD_target_teams:
6831     case OMPD_target_teams_distribute:
6832     case OMPD_target_teams_distribute_simd:
6833     case OMPD_target_teams_distribute_parallel_for:
6834     case OMPD_target_teams_distribute_parallel_for_simd: {
6835       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6836       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6837                                                   /*IgnoreResultAssign*/ true);
6838       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6839                              /*isSigned=*/true);
6840     }
6841     default:
6842       break;
6843     }
6844   } else if (DefaultNT == -1) {
6845     return nullptr;
6846   }
6847 
6848   return Bld.getInt32(DefaultNT);
6849 }
6850 
6851 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6852                                   llvm::Value *DefaultThreadLimitVal) {
6853   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6854       CGF.getContext(), CS->getCapturedStmt());
6855   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6856     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6857       llvm::Value *NumThreads = nullptr;
6858       llvm::Value *CondVal = nullptr;
6859       // Handle if clause. If if clause present, the number of threads is
6860       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6861       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6862         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6863         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6864         const OMPIfClause *IfClause = nullptr;
6865         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6866           if (C->getNameModifier() == OMPD_unknown ||
6867               C->getNameModifier() == OMPD_parallel) {
6868             IfClause = C;
6869             break;
6870           }
6871         }
6872         if (IfClause) {
6873           const Expr *Cond = IfClause->getCondition();
6874           bool Result;
6875           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6876             if (!Result)
6877               return CGF.Builder.getInt32(1);
6878           } else {
6879             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6880             if (const auto *PreInit =
6881                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6882               for (const auto *I : PreInit->decls()) {
6883                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6884                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6885                 } else {
6886                   CodeGenFunction::AutoVarEmission Emission =
6887                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6888                   CGF.EmitAutoVarCleanups(Emission);
6889                 }
6890               }
6891             }
6892             CondVal = CGF.EvaluateExprAsBool(Cond);
6893           }
6894         }
6895       }
6896       // Check the value of num_threads clause iff if clause was not specified
6897       // or is not evaluated to false.
6898       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6899         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6900         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6901         const auto *NumThreadsClause =
6902             Dir->getSingleClause<OMPNumThreadsClause>();
6903         CodeGenFunction::LexicalScope Scope(
6904             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6905         if (const auto *PreInit =
6906                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6907           for (const auto *I : PreInit->decls()) {
6908             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6909               CGF.EmitVarDecl(cast<VarDecl>(*I));
6910             } else {
6911               CodeGenFunction::AutoVarEmission Emission =
6912                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6913               CGF.EmitAutoVarCleanups(Emission);
6914             }
6915           }
6916         }
6917         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6918         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6919                                                /*isSigned=*/false);
6920         if (DefaultThreadLimitVal)
6921           NumThreads = CGF.Builder.CreateSelect(
6922               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6923               DefaultThreadLimitVal, NumThreads);
6924       } else {
6925         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6926                                            : CGF.Builder.getInt32(0);
6927       }
6928       // Process condition of the if clause.
6929       if (CondVal) {
6930         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6931                                               CGF.Builder.getInt32(1));
6932       }
6933       return NumThreads;
6934     }
6935     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6936       return CGF.Builder.getInt32(1);
6937     return DefaultThreadLimitVal;
6938   }
6939   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6940                                : CGF.Builder.getInt32(0);
6941 }
6942 
6943 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6944     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6945     int32_t &DefaultVal) {
6946   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6947   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6948          "Expected target-based executable directive.");
6949 
6950   switch (DirectiveKind) {
6951   case OMPD_target:
6952     // Teams have no clause thread_limit
6953     return nullptr;
6954   case OMPD_target_teams:
6955   case OMPD_target_teams_distribute:
6956     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6957       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6958       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6959       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6960         if (auto Constant =
6961                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6962           DefaultVal = Constant->getExtValue();
6963       return ThreadLimit;
6964     }
6965     return nullptr;
6966   case OMPD_target_parallel:
6967   case OMPD_target_parallel_for:
6968   case OMPD_target_parallel_for_simd:
6969   case OMPD_target_teams_distribute_parallel_for:
6970   case OMPD_target_teams_distribute_parallel_for_simd: {
6971     Expr *ThreadLimit = nullptr;
6972     Expr *NumThreads = nullptr;
6973     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6974       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6975       ThreadLimit = ThreadLimitClause->getThreadLimit();
6976       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6977         if (auto Constant =
6978                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6979           DefaultVal = Constant->getExtValue();
6980     }
6981     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6982       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6983       NumThreads = NumThreadsClause->getNumThreads();
6984       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6985         if (auto Constant =
6986                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6987           if (Constant->getExtValue() < DefaultVal) {
6988             DefaultVal = Constant->getExtValue();
6989             ThreadLimit = NumThreads;
6990           }
6991         }
6992       }
6993     }
6994     return ThreadLimit;
6995   }
6996   case OMPD_target_teams_distribute_simd:
6997   case OMPD_target_simd:
6998     DefaultVal = 1;
6999     return nullptr;
7000   case OMPD_parallel:
7001   case OMPD_for:
7002   case OMPD_parallel_for:
7003   case OMPD_parallel_master:
7004   case OMPD_parallel_sections:
7005   case OMPD_for_simd:
7006   case OMPD_parallel_for_simd:
7007   case OMPD_cancel:
7008   case OMPD_cancellation_point:
7009   case OMPD_ordered:
7010   case OMPD_threadprivate:
7011   case OMPD_allocate:
7012   case OMPD_task:
7013   case OMPD_simd:
7014   case OMPD_tile:
7015   case OMPD_unroll:
7016   case OMPD_sections:
7017   case OMPD_section:
7018   case OMPD_single:
7019   case OMPD_master:
7020   case OMPD_critical:
7021   case OMPD_taskyield:
7022   case OMPD_barrier:
7023   case OMPD_taskwait:
7024   case OMPD_taskgroup:
7025   case OMPD_atomic:
7026   case OMPD_flush:
7027   case OMPD_depobj:
7028   case OMPD_scan:
7029   case OMPD_teams:
7030   case OMPD_target_data:
7031   case OMPD_target_exit_data:
7032   case OMPD_target_enter_data:
7033   case OMPD_distribute:
7034   case OMPD_distribute_simd:
7035   case OMPD_distribute_parallel_for:
7036   case OMPD_distribute_parallel_for_simd:
7037   case OMPD_teams_distribute:
7038   case OMPD_teams_distribute_simd:
7039   case OMPD_teams_distribute_parallel_for:
7040   case OMPD_teams_distribute_parallel_for_simd:
7041   case OMPD_target_update:
7042   case OMPD_declare_simd:
7043   case OMPD_declare_variant:
7044   case OMPD_begin_declare_variant:
7045   case OMPD_end_declare_variant:
7046   case OMPD_declare_target:
7047   case OMPD_end_declare_target:
7048   case OMPD_declare_reduction:
7049   case OMPD_declare_mapper:
7050   case OMPD_taskloop:
7051   case OMPD_taskloop_simd:
7052   case OMPD_master_taskloop:
7053   case OMPD_master_taskloop_simd:
7054   case OMPD_parallel_master_taskloop:
7055   case OMPD_parallel_master_taskloop_simd:
7056   case OMPD_requires:
7057   case OMPD_unknown:
7058     break;
7059   default:
7060     break;
7061   }
7062   llvm_unreachable("Unsupported directive kind.");
7063 }
7064 
7065 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7066     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7067   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7068          "Clauses associated with the teams directive expected to be emitted "
7069          "only for the host!");
7070   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7071   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7072          "Expected target-based executable directive.");
7073   CGBuilderTy &Bld = CGF.Builder;
7074   llvm::Value *ThreadLimitVal = nullptr;
7075   llvm::Value *NumThreadsVal = nullptr;
7076   switch (DirectiveKind) {
7077   case OMPD_target: {
7078     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7079     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7080       return NumThreads;
7081     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7082         CGF.getContext(), CS->getCapturedStmt());
7083     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7084       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7085         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7086         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7087         const auto *ThreadLimitClause =
7088             Dir->getSingleClause<OMPThreadLimitClause>();
7089         CodeGenFunction::LexicalScope Scope(
7090             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7091         if (const auto *PreInit =
7092                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7093           for (const auto *I : PreInit->decls()) {
7094             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7095               CGF.EmitVarDecl(cast<VarDecl>(*I));
7096             } else {
7097               CodeGenFunction::AutoVarEmission Emission =
7098                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7099               CGF.EmitAutoVarCleanups(Emission);
7100             }
7101           }
7102         }
7103         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7104             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7105         ThreadLimitVal =
7106             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7107       }
7108       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7109           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7110         CS = Dir->getInnermostCapturedStmt();
7111         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7112             CGF.getContext(), CS->getCapturedStmt());
7113         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7114       }
7115       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7116           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7117         CS = Dir->getInnermostCapturedStmt();
7118         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7119           return NumThreads;
7120       }
7121       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7122         return Bld.getInt32(1);
7123     }
7124     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7125   }
7126   case OMPD_target_teams: {
7127     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7128       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7129       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7130       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7131           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7132       ThreadLimitVal =
7133           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7134     }
7135     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7136     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7137       return NumThreads;
7138     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7139         CGF.getContext(), CS->getCapturedStmt());
7140     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7141       if (Dir->getDirectiveKind() == OMPD_distribute) {
7142         CS = Dir->getInnermostCapturedStmt();
7143         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7144           return NumThreads;
7145       }
7146     }
7147     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7148   }
7149   case OMPD_target_teams_distribute:
7150     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7151       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7152       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7153       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7154           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7155       ThreadLimitVal =
7156           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7157     }
7158     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7159   case OMPD_target_parallel:
7160   case OMPD_target_parallel_for:
7161   case OMPD_target_parallel_for_simd:
7162   case OMPD_target_teams_distribute_parallel_for:
7163   case OMPD_target_teams_distribute_parallel_for_simd: {
7164     llvm::Value *CondVal = nullptr;
7165     // Handle if clause. If if clause present, the number of threads is
7166     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7167     if (D.hasClausesOfKind<OMPIfClause>()) {
7168       const OMPIfClause *IfClause = nullptr;
7169       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7170         if (C->getNameModifier() == OMPD_unknown ||
7171             C->getNameModifier() == OMPD_parallel) {
7172           IfClause = C;
7173           break;
7174         }
7175       }
7176       if (IfClause) {
7177         const Expr *Cond = IfClause->getCondition();
7178         bool Result;
7179         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7180           if (!Result)
7181             return Bld.getInt32(1);
7182         } else {
7183           CodeGenFunction::RunCleanupsScope Scope(CGF);
7184           CondVal = CGF.EvaluateExprAsBool(Cond);
7185         }
7186       }
7187     }
7188     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7189       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7190       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7191       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7192           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7193       ThreadLimitVal =
7194           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7195     }
7196     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7197       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7198       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7199       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7200           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7201       NumThreadsVal =
7202           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7203       ThreadLimitVal = ThreadLimitVal
7204                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7205                                                                 ThreadLimitVal),
7206                                               NumThreadsVal, ThreadLimitVal)
7207                            : NumThreadsVal;
7208     }
7209     if (!ThreadLimitVal)
7210       ThreadLimitVal = Bld.getInt32(0);
7211     if (CondVal)
7212       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7213     return ThreadLimitVal;
7214   }
7215   case OMPD_target_teams_distribute_simd:
7216   case OMPD_target_simd:
7217     return Bld.getInt32(1);
7218   case OMPD_parallel:
7219   case OMPD_for:
7220   case OMPD_parallel_for:
7221   case OMPD_parallel_master:
7222   case OMPD_parallel_sections:
7223   case OMPD_for_simd:
7224   case OMPD_parallel_for_simd:
7225   case OMPD_cancel:
7226   case OMPD_cancellation_point:
7227   case OMPD_ordered:
7228   case OMPD_threadprivate:
7229   case OMPD_allocate:
7230   case OMPD_task:
7231   case OMPD_simd:
7232   case OMPD_tile:
7233   case OMPD_unroll:
7234   case OMPD_sections:
7235   case OMPD_section:
7236   case OMPD_single:
7237   case OMPD_master:
7238   case OMPD_critical:
7239   case OMPD_taskyield:
7240   case OMPD_barrier:
7241   case OMPD_taskwait:
7242   case OMPD_taskgroup:
7243   case OMPD_atomic:
7244   case OMPD_flush:
7245   case OMPD_depobj:
7246   case OMPD_scan:
7247   case OMPD_teams:
7248   case OMPD_target_data:
7249   case OMPD_target_exit_data:
7250   case OMPD_target_enter_data:
7251   case OMPD_distribute:
7252   case OMPD_distribute_simd:
7253   case OMPD_distribute_parallel_for:
7254   case OMPD_distribute_parallel_for_simd:
7255   case OMPD_teams_distribute:
7256   case OMPD_teams_distribute_simd:
7257   case OMPD_teams_distribute_parallel_for:
7258   case OMPD_teams_distribute_parallel_for_simd:
7259   case OMPD_target_update:
7260   case OMPD_declare_simd:
7261   case OMPD_declare_variant:
7262   case OMPD_begin_declare_variant:
7263   case OMPD_end_declare_variant:
7264   case OMPD_declare_target:
7265   case OMPD_end_declare_target:
7266   case OMPD_declare_reduction:
7267   case OMPD_declare_mapper:
7268   case OMPD_taskloop:
7269   case OMPD_taskloop_simd:
7270   case OMPD_master_taskloop:
7271   case OMPD_master_taskloop_simd:
7272   case OMPD_parallel_master_taskloop:
7273   case OMPD_parallel_master_taskloop_simd:
7274   case OMPD_requires:
7275   case OMPD_metadirective:
7276   case OMPD_unknown:
7277     break;
7278   default:
7279     break;
7280   }
7281   llvm_unreachable("Unsupported directive kind.");
7282 }
7283 
7284 namespace {
7285 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7286 
7287 // Utility to handle information from clauses associated with a given
7288 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7289 // It provides a convenient interface to obtain the information and generate
7290 // code for that information.
7291 class MappableExprsHandler {
7292 public:
7293   /// Values for bit flags used to specify the mapping type for
7294   /// offloading.
7295   enum OpenMPOffloadMappingFlags : uint64_t {
7296     /// No flags
7297     OMP_MAP_NONE = 0x0,
7298     /// Allocate memory on the device and move data from host to device.
7299     OMP_MAP_TO = 0x01,
7300     /// Allocate memory on the device and move data from device to host.
7301     OMP_MAP_FROM = 0x02,
7302     /// Always perform the requested mapping action on the element, even
7303     /// if it was already mapped before.
7304     OMP_MAP_ALWAYS = 0x04,
7305     /// Delete the element from the device environment, ignoring the
7306     /// current reference count associated with the element.
7307     OMP_MAP_DELETE = 0x08,
7308     /// The element being mapped is a pointer-pointee pair; both the
7309     /// pointer and the pointee should be mapped.
7310     OMP_MAP_PTR_AND_OBJ = 0x10,
7311     /// This flags signals that the base address of an entry should be
7312     /// passed to the target kernel as an argument.
7313     OMP_MAP_TARGET_PARAM = 0x20,
7314     /// Signal that the runtime library has to return the device pointer
7315     /// in the current position for the data being mapped. Used when we have the
7316     /// use_device_ptr or use_device_addr clause.
7317     OMP_MAP_RETURN_PARAM = 0x40,
7318     /// This flag signals that the reference being passed is a pointer to
7319     /// private data.
7320     OMP_MAP_PRIVATE = 0x80,
7321     /// Pass the element to the device by value.
7322     OMP_MAP_LITERAL = 0x100,
7323     /// Implicit map
7324     OMP_MAP_IMPLICIT = 0x200,
7325     /// Close is a hint to the runtime to allocate memory close to
7326     /// the target device.
7327     OMP_MAP_CLOSE = 0x400,
7328     /// 0x800 is reserved for compatibility with XLC.
7329     /// Produce a runtime error if the data is not already allocated.
7330     OMP_MAP_PRESENT = 0x1000,
7331     // Increment and decrement a separate reference counter so that the data
7332     // cannot be unmapped within the associated region.  Thus, this flag is
7333     // intended to be used on 'target' and 'target data' directives because they
7334     // are inherently structured.  It is not intended to be used on 'target
7335     // enter data' and 'target exit data' directives because they are inherently
7336     // dynamic.
7337     // This is an OpenMP extension for the sake of OpenACC support.
7338     OMP_MAP_OMPX_HOLD = 0x2000,
7339     /// Signal that the runtime library should use args as an array of
7340     /// descriptor_dim pointers and use args_size as dims. Used when we have
7341     /// non-contiguous list items in target update directive
7342     OMP_MAP_NON_CONTIG = 0x100000000000,
7343     /// The 16 MSBs of the flags indicate whether the entry is member of some
7344     /// struct/class.
7345     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7346     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7347   };
7348 
7349   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7350   static unsigned getFlagMemberOffset() {
7351     unsigned Offset = 0;
7352     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7353          Remain = Remain >> 1)
7354       Offset++;
7355     return Offset;
7356   }
7357 
7358   /// Class that holds debugging information for a data mapping to be passed to
7359   /// the runtime library.
7360   class MappingExprInfo {
7361     /// The variable declaration used for the data mapping.
7362     const ValueDecl *MapDecl = nullptr;
7363     /// The original expression used in the map clause, or null if there is
7364     /// none.
7365     const Expr *MapExpr = nullptr;
7366 
7367   public:
7368     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7369         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7370 
7371     const ValueDecl *getMapDecl() const { return MapDecl; }
7372     const Expr *getMapExpr() const { return MapExpr; }
7373   };
7374 
7375   /// Class that associates information with a base pointer to be passed to the
7376   /// runtime library.
7377   class BasePointerInfo {
7378     /// The base pointer.
7379     llvm::Value *Ptr = nullptr;
7380     /// The base declaration that refers to this device pointer, or null if
7381     /// there is none.
7382     const ValueDecl *DevPtrDecl = nullptr;
7383 
7384   public:
7385     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7386         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7387     llvm::Value *operator*() const { return Ptr; }
7388     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7389     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7390   };
7391 
7392   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7393   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7394   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7395   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7396   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7397   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7398   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7399 
7400   /// This structure contains combined information generated for mappable
7401   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7402   /// mappers, and non-contiguous information.
7403   struct MapCombinedInfoTy {
7404     struct StructNonContiguousInfo {
7405       bool IsNonContiguous = false;
7406       MapDimArrayTy Dims;
7407       MapNonContiguousArrayTy Offsets;
7408       MapNonContiguousArrayTy Counts;
7409       MapNonContiguousArrayTy Strides;
7410     };
7411     MapExprsArrayTy Exprs;
7412     MapBaseValuesArrayTy BasePointers;
7413     MapValuesArrayTy Pointers;
7414     MapValuesArrayTy Sizes;
7415     MapFlagsArrayTy Types;
7416     MapMappersArrayTy Mappers;
7417     StructNonContiguousInfo NonContigInfo;
7418 
7419     /// Append arrays in \a CurInfo.
7420     void append(MapCombinedInfoTy &CurInfo) {
7421       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7422       BasePointers.append(CurInfo.BasePointers.begin(),
7423                           CurInfo.BasePointers.end());
7424       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7425       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7426       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7427       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7428       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7429                                  CurInfo.NonContigInfo.Dims.end());
7430       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7431                                     CurInfo.NonContigInfo.Offsets.end());
7432       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7433                                    CurInfo.NonContigInfo.Counts.end());
7434       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7435                                     CurInfo.NonContigInfo.Strides.end());
7436     }
7437   };
7438 
7439   /// Map between a struct and the its lowest & highest elements which have been
7440   /// mapped.
7441   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7442   ///                    HE(FieldIndex, Pointer)}
7443   struct StructRangeInfoTy {
7444     MapCombinedInfoTy PreliminaryMapData;
7445     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7446         0, Address::invalid()};
7447     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7448         0, Address::invalid()};
7449     Address Base = Address::invalid();
7450     Address LB = Address::invalid();
7451     bool IsArraySection = false;
7452     bool HasCompleteRecord = false;
7453   };
7454 
7455 private:
7456   /// Kind that defines how a device pointer has to be returned.
7457   struct MapInfo {
7458     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7459     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7460     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7461     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7462     bool ReturnDevicePointer = false;
7463     bool IsImplicit = false;
7464     const ValueDecl *Mapper = nullptr;
7465     const Expr *VarRef = nullptr;
7466     bool ForDeviceAddr = false;
7467 
7468     MapInfo() = default;
7469     MapInfo(
7470         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7471         OpenMPMapClauseKind MapType,
7472         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7473         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7474         bool ReturnDevicePointer, bool IsImplicit,
7475         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7476         bool ForDeviceAddr = false)
7477         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7478           MotionModifiers(MotionModifiers),
7479           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7480           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7481   };
7482 
7483   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7484   /// member and there is no map information about it, then emission of that
7485   /// entry is deferred until the whole struct has been processed.
7486   struct DeferredDevicePtrEntryTy {
7487     const Expr *IE = nullptr;
7488     const ValueDecl *VD = nullptr;
7489     bool ForDeviceAddr = false;
7490 
7491     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7492                              bool ForDeviceAddr)
7493         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7494   };
7495 
7496   /// The target directive from where the mappable clauses were extracted. It
7497   /// is either a executable directive or a user-defined mapper directive.
7498   llvm::PointerUnion<const OMPExecutableDirective *,
7499                      const OMPDeclareMapperDecl *>
7500       CurDir;
7501 
7502   /// Function the directive is being generated for.
7503   CodeGenFunction &CGF;
7504 
7505   /// Set of all first private variables in the current directive.
7506   /// bool data is set to true if the variable is implicitly marked as
7507   /// firstprivate, false otherwise.
7508   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7509 
7510   /// Map between device pointer declarations and their expression components.
7511   /// The key value for declarations in 'this' is null.
7512   llvm::DenseMap<
7513       const ValueDecl *,
7514       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7515       DevPointersMap;
7516 
7517   /// Map between lambda declarations and their map type.
7518   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7519 
7520   llvm::Value *getExprTypeSize(const Expr *E) const {
7521     QualType ExprTy = E->getType().getCanonicalType();
7522 
7523     // Calculate the size for array shaping expression.
7524     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7525       llvm::Value *Size =
7526           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7527       for (const Expr *SE : OAE->getDimensions()) {
7528         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7529         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7530                                       CGF.getContext().getSizeType(),
7531                                       SE->getExprLoc());
7532         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7533       }
7534       return Size;
7535     }
7536 
7537     // Reference types are ignored for mapping purposes.
7538     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7539       ExprTy = RefTy->getPointeeType().getCanonicalType();
7540 
7541     // Given that an array section is considered a built-in type, we need to
7542     // do the calculation based on the length of the section instead of relying
7543     // on CGF.getTypeSize(E->getType()).
7544     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7545       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7546                             OAE->getBase()->IgnoreParenImpCasts())
7547                             .getCanonicalType();
7548 
7549       // If there is no length associated with the expression and lower bound is
7550       // not specified too, that means we are using the whole length of the
7551       // base.
7552       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7553           !OAE->getLowerBound())
7554         return CGF.getTypeSize(BaseTy);
7555 
7556       llvm::Value *ElemSize;
7557       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7558         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7559       } else {
7560         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7561         assert(ATy && "Expecting array type if not a pointer type.");
7562         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7563       }
7564 
7565       // If we don't have a length at this point, that is because we have an
7566       // array section with a single element.
7567       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7568         return ElemSize;
7569 
7570       if (const Expr *LenExpr = OAE->getLength()) {
7571         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7572         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7573                                              CGF.getContext().getSizeType(),
7574                                              LenExpr->getExprLoc());
7575         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7576       }
7577       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7578              OAE->getLowerBound() && "expected array_section[lb:].");
7579       // Size = sizetype - lb * elemtype;
7580       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7581       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7582       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7583                                        CGF.getContext().getSizeType(),
7584                                        OAE->getLowerBound()->getExprLoc());
7585       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7586       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7587       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7588       LengthVal = CGF.Builder.CreateSelect(
7589           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7590       return LengthVal;
7591     }
7592     return CGF.getTypeSize(ExprTy);
7593   }
7594 
7595   /// Return the corresponding bits for a given map clause modifier. Add
7596   /// a flag marking the map as a pointer if requested. Add a flag marking the
7597   /// map as the first one of a series of maps that relate to the same map
7598   /// expression.
7599   OpenMPOffloadMappingFlags getMapTypeBits(
7600       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7601       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7602       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7603     OpenMPOffloadMappingFlags Bits =
7604         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7605     switch (MapType) {
7606     case OMPC_MAP_alloc:
7607     case OMPC_MAP_release:
7608       // alloc and release is the default behavior in the runtime library,  i.e.
7609       // if we don't pass any bits alloc/release that is what the runtime is
7610       // going to do. Therefore, we don't need to signal anything for these two
7611       // type modifiers.
7612       break;
7613     case OMPC_MAP_to:
7614       Bits |= OMP_MAP_TO;
7615       break;
7616     case OMPC_MAP_from:
7617       Bits |= OMP_MAP_FROM;
7618       break;
7619     case OMPC_MAP_tofrom:
7620       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7621       break;
7622     case OMPC_MAP_delete:
7623       Bits |= OMP_MAP_DELETE;
7624       break;
7625     case OMPC_MAP_unknown:
7626       llvm_unreachable("Unexpected map type!");
7627     }
7628     if (AddPtrFlag)
7629       Bits |= OMP_MAP_PTR_AND_OBJ;
7630     if (AddIsTargetParamFlag)
7631       Bits |= OMP_MAP_TARGET_PARAM;
7632     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7633       Bits |= OMP_MAP_ALWAYS;
7634     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7635       Bits |= OMP_MAP_CLOSE;
7636     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7637         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7638       Bits |= OMP_MAP_PRESENT;
7639     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7640       Bits |= OMP_MAP_OMPX_HOLD;
7641     if (IsNonContiguous)
7642       Bits |= OMP_MAP_NON_CONTIG;
7643     return Bits;
7644   }
7645 
7646   /// Return true if the provided expression is a final array section. A
7647   /// final array section, is one whose length can't be proved to be one.
7648   bool isFinalArraySectionExpression(const Expr *E) const {
7649     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7650 
7651     // It is not an array section and therefore not a unity-size one.
7652     if (!OASE)
7653       return false;
7654 
7655     // An array section with no colon always refer to a single element.
7656     if (OASE->getColonLocFirst().isInvalid())
7657       return false;
7658 
7659     const Expr *Length = OASE->getLength();
7660 
7661     // If we don't have a length we have to check if the array has size 1
7662     // for this dimension. Also, we should always expect a length if the
7663     // base type is pointer.
7664     if (!Length) {
7665       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7666                              OASE->getBase()->IgnoreParenImpCasts())
7667                              .getCanonicalType();
7668       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7669         return ATy->getSize().getSExtValue() != 1;
7670       // If we don't have a constant dimension length, we have to consider
7671       // the current section as having any size, so it is not necessarily
7672       // unitary. If it happen to be unity size, that's user fault.
7673       return true;
7674     }
7675 
7676     // Check if the length evaluates to 1.
7677     Expr::EvalResult Result;
7678     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7679       return true; // Can have more that size 1.
7680 
7681     llvm::APSInt ConstLength = Result.Val.getInt();
7682     return ConstLength.getSExtValue() != 1;
7683   }
7684 
7685   /// Generate the base pointers, section pointers, sizes, map type bits, and
7686   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7687   /// map type, map or motion modifiers, and expression components.
7688   /// \a IsFirstComponent should be set to true if the provided set of
7689   /// components is the first associated with a capture.
7690   void generateInfoForComponentList(
7691       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7692       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7693       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7694       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7695       bool IsFirstComponentList, bool IsImplicit,
7696       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7697       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7698       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7699           OverlappedElements = llvm::None) const {
7700     // The following summarizes what has to be generated for each map and the
7701     // types below. The generated information is expressed in this order:
7702     // base pointer, section pointer, size, flags
7703     // (to add to the ones that come from the map type and modifier).
7704     //
7705     // double d;
7706     // int i[100];
7707     // float *p;
7708     //
7709     // struct S1 {
7710     //   int i;
7711     //   float f[50];
7712     // }
7713     // struct S2 {
7714     //   int i;
7715     //   float f[50];
7716     //   S1 s;
7717     //   double *p;
7718     //   struct S2 *ps;
7719     //   int &ref;
7720     // }
7721     // S2 s;
7722     // S2 *ps;
7723     //
7724     // map(d)
7725     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7726     //
7727     // map(i)
7728     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7729     //
7730     // map(i[1:23])
7731     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7732     //
7733     // map(p)
7734     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7735     //
7736     // map(p[1:24])
7737     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7738     // in unified shared memory mode or for local pointers
7739     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7740     //
7741     // map(s)
7742     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7743     //
7744     // map(s.i)
7745     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7746     //
7747     // map(s.s.f)
7748     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7749     //
7750     // map(s.p)
7751     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7752     //
7753     // map(to: s.p[:22])
7754     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7755     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7756     // &(s.p), &(s.p[0]), 22*sizeof(double),
7757     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7758     // (*) alloc space for struct members, only this is a target parameter
7759     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7760     //      optimizes this entry out, same in the examples below)
7761     // (***) map the pointee (map: to)
7762     //
7763     // map(to: s.ref)
7764     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7765     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7766     // (*) alloc space for struct members, only this is a target parameter
7767     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7768     //      optimizes this entry out, same in the examples below)
7769     // (***) map the pointee (map: to)
7770     //
7771     // map(s.ps)
7772     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7773     //
7774     // map(from: s.ps->s.i)
7775     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7776     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7777     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7778     //
7779     // map(to: s.ps->ps)
7780     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7781     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7782     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7783     //
7784     // map(s.ps->ps->ps)
7785     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7786     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7787     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7788     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7789     //
7790     // map(to: s.ps->ps->s.f[:22])
7791     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7792     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7793     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7794     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7795     //
7796     // map(ps)
7797     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7798     //
7799     // map(ps->i)
7800     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7801     //
7802     // map(ps->s.f)
7803     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7804     //
7805     // map(from: ps->p)
7806     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7807     //
7808     // map(to: ps->p[:22])
7809     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7810     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7811     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7812     //
7813     // map(ps->ps)
7814     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7815     //
7816     // map(from: ps->ps->s.i)
7817     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7818     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7819     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7820     //
7821     // map(from: ps->ps->ps)
7822     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7823     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7824     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7825     //
7826     // map(ps->ps->ps->ps)
7827     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7828     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7829     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7830     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7831     //
7832     // map(to: ps->ps->ps->s.f[:22])
7833     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7834     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7835     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7836     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7837     //
7838     // map(to: s.f[:22]) map(from: s.p[:33])
7839     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7840     //     sizeof(double*) (**), TARGET_PARAM
7841     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7842     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7843     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7844     // (*) allocate contiguous space needed to fit all mapped members even if
7845     //     we allocate space for members not mapped (in this example,
7846     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7847     //     them as well because they fall between &s.f[0] and &s.p)
7848     //
7849     // map(from: s.f[:22]) map(to: ps->p[:33])
7850     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7851     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7852     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7853     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7854     // (*) the struct this entry pertains to is the 2nd element in the list of
7855     //     arguments, hence MEMBER_OF(2)
7856     //
7857     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7858     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7859     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7860     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7861     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7862     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7863     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7864     // (*) the struct this entry pertains to is the 4th element in the list
7865     //     of arguments, hence MEMBER_OF(4)
7866 
7867     // Track if the map information being generated is the first for a capture.
7868     bool IsCaptureFirstInfo = IsFirstComponentList;
7869     // When the variable is on a declare target link or in a to clause with
7870     // unified memory, a reference is needed to hold the host/device address
7871     // of the variable.
7872     bool RequiresReference = false;
7873 
7874     // Scan the components from the base to the complete expression.
7875     auto CI = Components.rbegin();
7876     auto CE = Components.rend();
7877     auto I = CI;
7878 
7879     // Track if the map information being generated is the first for a list of
7880     // components.
7881     bool IsExpressionFirstInfo = true;
7882     bool FirstPointerInComplexData = false;
7883     Address BP = Address::invalid();
7884     const Expr *AssocExpr = I->getAssociatedExpression();
7885     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7886     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7887     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7888 
7889     if (isa<MemberExpr>(AssocExpr)) {
7890       // The base is the 'this' pointer. The content of the pointer is going
7891       // to be the base of the field being mapped.
7892       BP = CGF.LoadCXXThisAddress();
7893     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7894                (OASE &&
7895                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7896       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7897     } else if (OAShE &&
7898                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7899       BP = Address(
7900           CGF.EmitScalarExpr(OAShE->getBase()),
7901           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7902     } else {
7903       // The base is the reference to the variable.
7904       // BP = &Var.
7905       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7906       if (const auto *VD =
7907               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7908         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7909                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7910           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7911               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7912                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7913             RequiresReference = true;
7914             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7915           }
7916         }
7917       }
7918 
7919       // If the variable is a pointer and is being dereferenced (i.e. is not
7920       // the last component), the base has to be the pointer itself, not its
7921       // reference. References are ignored for mapping purposes.
7922       QualType Ty =
7923           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7924       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7925         // No need to generate individual map information for the pointer, it
7926         // can be associated with the combined storage if shared memory mode is
7927         // active or the base declaration is not global variable.
7928         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7929         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7930             !VD || VD->hasLocalStorage())
7931           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7932         else
7933           FirstPointerInComplexData = true;
7934         ++I;
7935       }
7936     }
7937 
7938     // Track whether a component of the list should be marked as MEMBER_OF some
7939     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7940     // in a component list should be marked as MEMBER_OF, all subsequent entries
7941     // do not belong to the base struct. E.g.
7942     // struct S2 s;
7943     // s.ps->ps->ps->f[:]
7944     //   (1) (2) (3) (4)
7945     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7946     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7947     // is the pointee of ps(2) which is not member of struct s, so it should not
7948     // be marked as such (it is still PTR_AND_OBJ).
7949     // The variable is initialized to false so that PTR_AND_OBJ entries which
7950     // are not struct members are not considered (e.g. array of pointers to
7951     // data).
7952     bool ShouldBeMemberOf = false;
7953 
7954     // Variable keeping track of whether or not we have encountered a component
7955     // in the component list which is a member expression. Useful when we have a
7956     // pointer or a final array section, in which case it is the previous
7957     // component in the list which tells us whether we have a member expression.
7958     // E.g. X.f[:]
7959     // While processing the final array section "[:]" it is "f" which tells us
7960     // whether we are dealing with a member of a declared struct.
7961     const MemberExpr *EncounteredME = nullptr;
7962 
7963     // Track for the total number of dimension. Start from one for the dummy
7964     // dimension.
7965     uint64_t DimSize = 1;
7966 
7967     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7968     bool IsPrevMemberReference = false;
7969 
7970     for (; I != CE; ++I) {
7971       // If the current component is member of a struct (parent struct) mark it.
7972       if (!EncounteredME) {
7973         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7974         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7975         // as MEMBER_OF the parent struct.
7976         if (EncounteredME) {
7977           ShouldBeMemberOf = true;
7978           // Do not emit as complex pointer if this is actually not array-like
7979           // expression.
7980           if (FirstPointerInComplexData) {
7981             QualType Ty = std::prev(I)
7982                               ->getAssociatedDeclaration()
7983                               ->getType()
7984                               .getNonReferenceType();
7985             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7986             FirstPointerInComplexData = false;
7987           }
7988         }
7989       }
7990 
7991       auto Next = std::next(I);
7992 
7993       // We need to generate the addresses and sizes if this is the last
7994       // component, if the component is a pointer or if it is an array section
7995       // whose length can't be proved to be one. If this is a pointer, it
7996       // becomes the base address for the following components.
7997 
7998       // A final array section, is one whose length can't be proved to be one.
7999       // If the map item is non-contiguous then we don't treat any array section
8000       // as final array section.
8001       bool IsFinalArraySection =
8002           !IsNonContiguous &&
8003           isFinalArraySectionExpression(I->getAssociatedExpression());
8004 
8005       // If we have a declaration for the mapping use that, otherwise use
8006       // the base declaration of the map clause.
8007       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8008                                      ? I->getAssociatedDeclaration()
8009                                      : BaseDecl;
8010       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8011                                                : MapExpr;
8012 
8013       // Get information on whether the element is a pointer. Have to do a
8014       // special treatment for array sections given that they are built-in
8015       // types.
8016       const auto *OASE =
8017           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8018       const auto *OAShE =
8019           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8020       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8021       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8022       bool IsPointer =
8023           OAShE ||
8024           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8025                        .getCanonicalType()
8026                        ->isAnyPointerType()) ||
8027           I->getAssociatedExpression()->getType()->isAnyPointerType();
8028       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8029                                MapDecl &&
8030                                MapDecl->getType()->isLValueReferenceType();
8031       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8032 
8033       if (OASE)
8034         ++DimSize;
8035 
8036       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8037           IsFinalArraySection) {
8038         // If this is not the last component, we expect the pointer to be
8039         // associated with an array expression or member expression.
8040         assert((Next == CE ||
8041                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8042                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8043                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8044                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8045                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8046                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8047                "Unexpected expression");
8048 
8049         Address LB = Address::invalid();
8050         Address LowestElem = Address::invalid();
8051         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8052                                        const MemberExpr *E) {
8053           const Expr *BaseExpr = E->getBase();
8054           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8055           // scalar.
8056           LValue BaseLV;
8057           if (E->isArrow()) {
8058             LValueBaseInfo BaseInfo;
8059             TBAAAccessInfo TBAAInfo;
8060             Address Addr =
8061                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8062             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8063             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8064           } else {
8065             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8066           }
8067           return BaseLV;
8068         };
8069         if (OAShE) {
8070           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8071                                     CGF.getContext().getTypeAlignInChars(
8072                                         OAShE->getBase()->getType()));
8073         } else if (IsMemberReference) {
8074           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8075           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8076           LowestElem = CGF.EmitLValueForFieldInitialization(
8077                               BaseLVal, cast<FieldDecl>(MapDecl))
8078                            .getAddress(CGF);
8079           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8080                    .getAddress(CGF);
8081         } else {
8082           LowestElem = LB =
8083               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8084                   .getAddress(CGF);
8085         }
8086 
8087         // If this component is a pointer inside the base struct then we don't
8088         // need to create any entry for it - it will be combined with the object
8089         // it is pointing to into a single PTR_AND_OBJ entry.
8090         bool IsMemberPointerOrAddr =
8091             EncounteredME &&
8092             (((IsPointer || ForDeviceAddr) &&
8093               I->getAssociatedExpression() == EncounteredME) ||
8094              (IsPrevMemberReference && !IsPointer) ||
8095              (IsMemberReference && Next != CE &&
8096               !Next->getAssociatedExpression()->getType()->isPointerType()));
8097         if (!OverlappedElements.empty() && Next == CE) {
8098           // Handle base element with the info for overlapped elements.
8099           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8100           assert(!IsPointer &&
8101                  "Unexpected base element with the pointer type.");
8102           // Mark the whole struct as the struct that requires allocation on the
8103           // device.
8104           PartialStruct.LowestElem = {0, LowestElem};
8105           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8106               I->getAssociatedExpression()->getType());
8107           Address HB = CGF.Builder.CreateConstGEP(
8108               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8109                                                               CGF.VoidPtrTy),
8110               TypeSize.getQuantity() - 1);
8111           PartialStruct.HighestElem = {
8112               std::numeric_limits<decltype(
8113                   PartialStruct.HighestElem.first)>::max(),
8114               HB};
8115           PartialStruct.Base = BP;
8116           PartialStruct.LB = LB;
8117           assert(
8118               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8119               "Overlapped elements must be used only once for the variable.");
8120           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8121           // Emit data for non-overlapped data.
8122           OpenMPOffloadMappingFlags Flags =
8123               OMP_MAP_MEMBER_OF |
8124               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8125                              /*AddPtrFlag=*/false,
8126                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8127           llvm::Value *Size = nullptr;
8128           // Do bitcopy of all non-overlapped structure elements.
8129           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8130                    Component : OverlappedElements) {
8131             Address ComponentLB = Address::invalid();
8132             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8133                  Component) {
8134               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8135                 const auto *FD = dyn_cast<FieldDecl>(VD);
8136                 if (FD && FD->getType()->isLValueReferenceType()) {
8137                   const auto *ME =
8138                       cast<MemberExpr>(MC.getAssociatedExpression());
8139                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8140                   ComponentLB =
8141                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8142                           .getAddress(CGF);
8143                 } else {
8144                   ComponentLB =
8145                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8146                           .getAddress(CGF);
8147                 }
8148                 Size = CGF.Builder.CreatePtrDiff(
8149                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8150                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8151                 break;
8152               }
8153             }
8154             assert(Size && "Failed to determine structure size");
8155             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8156             CombinedInfo.BasePointers.push_back(BP.getPointer());
8157             CombinedInfo.Pointers.push_back(LB.getPointer());
8158             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8159                 Size, CGF.Int64Ty, /*isSigned=*/true));
8160             CombinedInfo.Types.push_back(Flags);
8161             CombinedInfo.Mappers.push_back(nullptr);
8162             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8163                                                                       : 1);
8164             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8165           }
8166           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8167           CombinedInfo.BasePointers.push_back(BP.getPointer());
8168           CombinedInfo.Pointers.push_back(LB.getPointer());
8169           Size = CGF.Builder.CreatePtrDiff(
8170               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8171               CGF.EmitCastToVoidPtr(LB.getPointer()));
8172           CombinedInfo.Sizes.push_back(
8173               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8174           CombinedInfo.Types.push_back(Flags);
8175           CombinedInfo.Mappers.push_back(nullptr);
8176           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8177                                                                     : 1);
8178           break;
8179         }
8180         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8181         if (!IsMemberPointerOrAddr ||
8182             (Next == CE && MapType != OMPC_MAP_unknown)) {
8183           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8184           CombinedInfo.BasePointers.push_back(BP.getPointer());
8185           CombinedInfo.Pointers.push_back(LB.getPointer());
8186           CombinedInfo.Sizes.push_back(
8187               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8188           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8189                                                                     : 1);
8190 
8191           // If Mapper is valid, the last component inherits the mapper.
8192           bool HasMapper = Mapper && Next == CE;
8193           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8194 
8195           // We need to add a pointer flag for each map that comes from the
8196           // same expression except for the first one. We also need to signal
8197           // this map is the first one that relates with the current capture
8198           // (there is a set of entries for each capture).
8199           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8200               MapType, MapModifiers, MotionModifiers, IsImplicit,
8201               !IsExpressionFirstInfo || RequiresReference ||
8202                   FirstPointerInComplexData || IsMemberReference,
8203               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8204 
8205           if (!IsExpressionFirstInfo || IsMemberReference) {
8206             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8207             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8208             if (IsPointer || (IsMemberReference && Next != CE))
8209               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8210                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8211 
8212             if (ShouldBeMemberOf) {
8213               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8214               // should be later updated with the correct value of MEMBER_OF.
8215               Flags |= OMP_MAP_MEMBER_OF;
8216               // From now on, all subsequent PTR_AND_OBJ entries should not be
8217               // marked as MEMBER_OF.
8218               ShouldBeMemberOf = false;
8219             }
8220           }
8221 
8222           CombinedInfo.Types.push_back(Flags);
8223         }
8224 
8225         // If we have encountered a member expression so far, keep track of the
8226         // mapped member. If the parent is "*this", then the value declaration
8227         // is nullptr.
8228         if (EncounteredME) {
8229           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8230           unsigned FieldIndex = FD->getFieldIndex();
8231 
8232           // Update info about the lowest and highest elements for this struct
8233           if (!PartialStruct.Base.isValid()) {
8234             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8235             if (IsFinalArraySection) {
8236               Address HB =
8237                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8238                       .getAddress(CGF);
8239               PartialStruct.HighestElem = {FieldIndex, HB};
8240             } else {
8241               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8242             }
8243             PartialStruct.Base = BP;
8244             PartialStruct.LB = BP;
8245           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8246             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8247           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8248             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8249           }
8250         }
8251 
8252         // Need to emit combined struct for array sections.
8253         if (IsFinalArraySection || IsNonContiguous)
8254           PartialStruct.IsArraySection = true;
8255 
8256         // If we have a final array section, we are done with this expression.
8257         if (IsFinalArraySection)
8258           break;
8259 
8260         // The pointer becomes the base for the next element.
8261         if (Next != CE)
8262           BP = IsMemberReference ? LowestElem : LB;
8263 
8264         IsExpressionFirstInfo = false;
8265         IsCaptureFirstInfo = false;
8266         FirstPointerInComplexData = false;
8267         IsPrevMemberReference = IsMemberReference;
8268       } else if (FirstPointerInComplexData) {
8269         QualType Ty = Components.rbegin()
8270                           ->getAssociatedDeclaration()
8271                           ->getType()
8272                           .getNonReferenceType();
8273         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8274         FirstPointerInComplexData = false;
8275       }
8276     }
8277     // If ran into the whole component - allocate the space for the whole
8278     // record.
8279     if (!EncounteredME)
8280       PartialStruct.HasCompleteRecord = true;
8281 
8282     if (!IsNonContiguous)
8283       return;
8284 
8285     const ASTContext &Context = CGF.getContext();
8286 
8287     // For supporting stride in array section, we need to initialize the first
8288     // dimension size as 1, first offset as 0, and first count as 1
8289     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8290     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8291     MapValuesArrayTy CurStrides;
8292     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8293     uint64_t ElementTypeSize;
8294 
8295     // Collect Size information for each dimension and get the element size as
8296     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8297     // should be [10, 10] and the first stride is 4 btyes.
8298     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8299          Components) {
8300       const Expr *AssocExpr = Component.getAssociatedExpression();
8301       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8302 
8303       if (!OASE)
8304         continue;
8305 
8306       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8307       auto *CAT = Context.getAsConstantArrayType(Ty);
8308       auto *VAT = Context.getAsVariableArrayType(Ty);
8309 
8310       // We need all the dimension size except for the last dimension.
8311       assert((VAT || CAT || &Component == &*Components.begin()) &&
8312              "Should be either ConstantArray or VariableArray if not the "
8313              "first Component");
8314 
8315       // Get element size if CurStrides is empty.
8316       if (CurStrides.empty()) {
8317         const Type *ElementType = nullptr;
8318         if (CAT)
8319           ElementType = CAT->getElementType().getTypePtr();
8320         else if (VAT)
8321           ElementType = VAT->getElementType().getTypePtr();
8322         else
8323           assert(&Component == &*Components.begin() &&
8324                  "Only expect pointer (non CAT or VAT) when this is the "
8325                  "first Component");
8326         // If ElementType is null, then it means the base is a pointer
8327         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8328         // for next iteration.
8329         if (ElementType) {
8330           // For the case that having pointer as base, we need to remove one
8331           // level of indirection.
8332           if (&Component != &*Components.begin())
8333             ElementType = ElementType->getPointeeOrArrayElementType();
8334           ElementTypeSize =
8335               Context.getTypeSizeInChars(ElementType).getQuantity();
8336           CurStrides.push_back(
8337               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8338         }
8339       }
8340       // Get dimension value except for the last dimension since we don't need
8341       // it.
8342       if (DimSizes.size() < Components.size() - 1) {
8343         if (CAT)
8344           DimSizes.push_back(llvm::ConstantInt::get(
8345               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8346         else if (VAT)
8347           DimSizes.push_back(CGF.Builder.CreateIntCast(
8348               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8349               /*IsSigned=*/false));
8350       }
8351     }
8352 
8353     // Skip the dummy dimension since we have already have its information.
8354     auto DI = DimSizes.begin() + 1;
8355     // Product of dimension.
8356     llvm::Value *DimProd =
8357         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8358 
8359     // Collect info for non-contiguous. Notice that offset, count, and stride
8360     // are only meaningful for array-section, so we insert a null for anything
8361     // other than array-section.
8362     // Also, the size of offset, count, and stride are not the same as
8363     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8364     // count, and stride are the same as the number of non-contiguous
8365     // declaration in target update to/from clause.
8366     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8367          Components) {
8368       const Expr *AssocExpr = Component.getAssociatedExpression();
8369 
8370       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8371         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8372             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8373             /*isSigned=*/false);
8374         CurOffsets.push_back(Offset);
8375         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8376         CurStrides.push_back(CurStrides.back());
8377         continue;
8378       }
8379 
8380       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8381 
8382       if (!OASE)
8383         continue;
8384 
8385       // Offset
8386       const Expr *OffsetExpr = OASE->getLowerBound();
8387       llvm::Value *Offset = nullptr;
8388       if (!OffsetExpr) {
8389         // If offset is absent, then we just set it to zero.
8390         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8391       } else {
8392         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8393                                            CGF.Int64Ty,
8394                                            /*isSigned=*/false);
8395       }
8396       CurOffsets.push_back(Offset);
8397 
8398       // Count
8399       const Expr *CountExpr = OASE->getLength();
8400       llvm::Value *Count = nullptr;
8401       if (!CountExpr) {
8402         // In Clang, once a high dimension is an array section, we construct all
8403         // the lower dimension as array section, however, for case like
8404         // arr[0:2][2], Clang construct the inner dimension as an array section
8405         // but it actually is not in an array section form according to spec.
8406         if (!OASE->getColonLocFirst().isValid() &&
8407             !OASE->getColonLocSecond().isValid()) {
8408           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8409         } else {
8410           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8411           // When the length is absent it defaults to ⌈(size −
8412           // lower-bound)/stride⌉, where size is the size of the array
8413           // dimension.
8414           const Expr *StrideExpr = OASE->getStride();
8415           llvm::Value *Stride =
8416               StrideExpr
8417                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8418                                               CGF.Int64Ty, /*isSigned=*/false)
8419                   : nullptr;
8420           if (Stride)
8421             Count = CGF.Builder.CreateUDiv(
8422                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8423           else
8424             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8425         }
8426       } else {
8427         Count = CGF.EmitScalarExpr(CountExpr);
8428       }
8429       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8430       CurCounts.push_back(Count);
8431 
8432       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8433       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8434       //              Offset      Count     Stride
8435       //    D0          0           1         4    (int)    <- dummy dimension
8436       //    D1          0           2         8    (2 * (1) * 4)
8437       //    D2          1           2         20   (1 * (1 * 5) * 4)
8438       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8439       const Expr *StrideExpr = OASE->getStride();
8440       llvm::Value *Stride =
8441           StrideExpr
8442               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8443                                           CGF.Int64Ty, /*isSigned=*/false)
8444               : nullptr;
8445       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8446       if (Stride)
8447         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8448       else
8449         CurStrides.push_back(DimProd);
8450       if (DI != DimSizes.end())
8451         ++DI;
8452     }
8453 
8454     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8455     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8456     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8457   }
8458 
8459   /// Return the adjusted map modifiers if the declaration a capture refers to
8460   /// appears in a first-private clause. This is expected to be used only with
8461   /// directives that start with 'target'.
8462   MappableExprsHandler::OpenMPOffloadMappingFlags
8463   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8464     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8465 
8466     // A first private variable captured by reference will use only the
8467     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8468     // declaration is known as first-private in this handler.
8469     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8470       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8471         return MappableExprsHandler::OMP_MAP_TO |
8472                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8473       return MappableExprsHandler::OMP_MAP_PRIVATE |
8474              MappableExprsHandler::OMP_MAP_TO;
8475     }
8476     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8477     if (I != LambdasMap.end())
8478       // for map(to: lambda): using user specified map type.
8479       return getMapTypeBits(
8480           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8481           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8482           /*AddPtrFlag=*/false,
8483           /*AddIsTargetParamFlag=*/false,
8484           /*isNonContiguous=*/false);
8485     return MappableExprsHandler::OMP_MAP_TO |
8486            MappableExprsHandler::OMP_MAP_FROM;
8487   }
8488 
8489   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8490     // Rotate by getFlagMemberOffset() bits.
8491     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8492                                                   << getFlagMemberOffset());
8493   }
8494 
8495   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8496                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8497     // If the entry is PTR_AND_OBJ but has not been marked with the special
8498     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8499     // marked as MEMBER_OF.
8500     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8501         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8502       return;
8503 
8504     // Reset the placeholder value to prepare the flag for the assignment of the
8505     // proper MEMBER_OF value.
8506     Flags &= ~OMP_MAP_MEMBER_OF;
8507     Flags |= MemberOfFlag;
8508   }
8509 
8510   void getPlainLayout(const CXXRecordDecl *RD,
8511                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8512                       bool AsBase) const {
8513     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8514 
8515     llvm::StructType *St =
8516         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8517 
8518     unsigned NumElements = St->getNumElements();
8519     llvm::SmallVector<
8520         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8521         RecordLayout(NumElements);
8522 
8523     // Fill bases.
8524     for (const auto &I : RD->bases()) {
8525       if (I.isVirtual())
8526         continue;
8527       const auto *Base = I.getType()->getAsCXXRecordDecl();
8528       // Ignore empty bases.
8529       if (Base->isEmpty() || CGF.getContext()
8530                                  .getASTRecordLayout(Base)
8531                                  .getNonVirtualSize()
8532                                  .isZero())
8533         continue;
8534 
8535       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8536       RecordLayout[FieldIndex] = Base;
8537     }
8538     // Fill in virtual bases.
8539     for (const auto &I : RD->vbases()) {
8540       const auto *Base = I.getType()->getAsCXXRecordDecl();
8541       // Ignore empty bases.
8542       if (Base->isEmpty())
8543         continue;
8544       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8545       if (RecordLayout[FieldIndex])
8546         continue;
8547       RecordLayout[FieldIndex] = Base;
8548     }
8549     // Fill in all the fields.
8550     assert(!RD->isUnion() && "Unexpected union.");
8551     for (const auto *Field : RD->fields()) {
8552       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8553       // will fill in later.)
8554       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8555         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8556         RecordLayout[FieldIndex] = Field;
8557       }
8558     }
8559     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8560              &Data : RecordLayout) {
8561       if (Data.isNull())
8562         continue;
8563       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8564         getPlainLayout(Base, Layout, /*AsBase=*/true);
8565       else
8566         Layout.push_back(Data.get<const FieldDecl *>());
8567     }
8568   }
8569 
8570   /// Generate all the base pointers, section pointers, sizes, map types, and
8571   /// mappers for the extracted mappable expressions (all included in \a
8572   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8573   /// pair of the relevant declaration and index where it occurs is appended to
8574   /// the device pointers info array.
8575   void generateAllInfoForClauses(
8576       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8577       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8578           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8579     // We have to process the component lists that relate with the same
8580     // declaration in a single chunk so that we can generate the map flags
8581     // correctly. Therefore, we organize all lists in a map.
8582     enum MapKind { Present, Allocs, Other, Total };
8583     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8584                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8585         Info;
8586 
8587     // Helper function to fill the information map for the different supported
8588     // clauses.
8589     auto &&InfoGen =
8590         [&Info, &SkipVarSet](
8591             const ValueDecl *D, MapKind Kind,
8592             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8593             OpenMPMapClauseKind MapType,
8594             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8595             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8596             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8597             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8598           if (SkipVarSet.contains(D))
8599             return;
8600           auto It = Info.find(D);
8601           if (It == Info.end())
8602             It = Info
8603                      .insert(std::make_pair(
8604                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8605                      .first;
8606           It->second[Kind].emplace_back(
8607               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8608               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8609         };
8610 
8611     for (const auto *Cl : Clauses) {
8612       const auto *C = dyn_cast<OMPMapClause>(Cl);
8613       if (!C)
8614         continue;
8615       MapKind Kind = Other;
8616       if (llvm::is_contained(C->getMapTypeModifiers(),
8617                              OMPC_MAP_MODIFIER_present))
8618         Kind = Present;
8619       else if (C->getMapType() == OMPC_MAP_alloc)
8620         Kind = Allocs;
8621       const auto *EI = C->getVarRefs().begin();
8622       for (const auto L : C->component_lists()) {
8623         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8624         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8625                 C->getMapTypeModifiers(), llvm::None,
8626                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8627                 E);
8628         ++EI;
8629       }
8630     }
8631     for (const auto *Cl : Clauses) {
8632       const auto *C = dyn_cast<OMPToClause>(Cl);
8633       if (!C)
8634         continue;
8635       MapKind Kind = Other;
8636       if (llvm::is_contained(C->getMotionModifiers(),
8637                              OMPC_MOTION_MODIFIER_present))
8638         Kind = Present;
8639       const auto *EI = C->getVarRefs().begin();
8640       for (const auto L : C->component_lists()) {
8641         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8642                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8643                 C->isImplicit(), std::get<2>(L), *EI);
8644         ++EI;
8645       }
8646     }
8647     for (const auto *Cl : Clauses) {
8648       const auto *C = dyn_cast<OMPFromClause>(Cl);
8649       if (!C)
8650         continue;
8651       MapKind Kind = Other;
8652       if (llvm::is_contained(C->getMotionModifiers(),
8653                              OMPC_MOTION_MODIFIER_present))
8654         Kind = Present;
8655       const auto *EI = C->getVarRefs().begin();
8656       for (const auto L : C->component_lists()) {
8657         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8658                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8659                 C->isImplicit(), std::get<2>(L), *EI);
8660         ++EI;
8661       }
8662     }
8663 
8664     // Look at the use_device_ptr clause information and mark the existing map
8665     // entries as such. If there is no map information for an entry in the
8666     // use_device_ptr list, we create one with map type 'alloc' and zero size
8667     // section. It is the user fault if that was not mapped before. If there is
8668     // no map information and the pointer is a struct member, then we defer the
8669     // emission of that entry until the whole struct has been processed.
8670     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8671                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8672         DeferredInfo;
8673     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8674 
8675     for (const auto *Cl : Clauses) {
8676       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8677       if (!C)
8678         continue;
8679       for (const auto L : C->component_lists()) {
8680         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8681             std::get<1>(L);
8682         assert(!Components.empty() &&
8683                "Not expecting empty list of components!");
8684         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8685         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8686         const Expr *IE = Components.back().getAssociatedExpression();
8687         // If the first component is a member expression, we have to look into
8688         // 'this', which maps to null in the map of map information. Otherwise
8689         // look directly for the information.
8690         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8691 
8692         // We potentially have map information for this declaration already.
8693         // Look for the first set of components that refer to it.
8694         if (It != Info.end()) {
8695           bool Found = false;
8696           for (auto &Data : It->second) {
8697             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8698               return MI.Components.back().getAssociatedDeclaration() == VD;
8699             });
8700             // If we found a map entry, signal that the pointer has to be
8701             // returned and move on to the next declaration. Exclude cases where
8702             // the base pointer is mapped as array subscript, array section or
8703             // array shaping. The base address is passed as a pointer to base in
8704             // this case and cannot be used as a base for use_device_ptr list
8705             // item.
8706             if (CI != Data.end()) {
8707               auto PrevCI = std::next(CI->Components.rbegin());
8708               const auto *VarD = dyn_cast<VarDecl>(VD);
8709               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8710                   isa<MemberExpr>(IE) ||
8711                   !VD->getType().getNonReferenceType()->isPointerType() ||
8712                   PrevCI == CI->Components.rend() ||
8713                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8714                   VarD->hasLocalStorage()) {
8715                 CI->ReturnDevicePointer = true;
8716                 Found = true;
8717                 break;
8718               }
8719             }
8720           }
8721           if (Found)
8722             continue;
8723         }
8724 
8725         // We didn't find any match in our map information - generate a zero
8726         // size array section - if the pointer is a struct member we defer this
8727         // action until the whole struct has been processed.
8728         if (isa<MemberExpr>(IE)) {
8729           // Insert the pointer into Info to be processed by
8730           // generateInfoForComponentList. Because it is a member pointer
8731           // without a pointee, no entry will be generated for it, therefore
8732           // we need to generate one after the whole struct has been processed.
8733           // Nonetheless, generateInfoForComponentList must be called to take
8734           // the pointer into account for the calculation of the range of the
8735           // partial struct.
8736           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8737                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8738                   nullptr);
8739           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8740         } else {
8741           llvm::Value *Ptr =
8742               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8743           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8744           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8745           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8746           UseDevicePtrCombinedInfo.Sizes.push_back(
8747               llvm::Constant::getNullValue(CGF.Int64Ty));
8748           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8749           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8750         }
8751       }
8752     }
8753 
8754     // Look at the use_device_addr clause information and mark the existing map
8755     // entries as such. If there is no map information for an entry in the
8756     // use_device_addr list, we create one with map type 'alloc' and zero size
8757     // section. It is the user fault if that was not mapped before. If there is
8758     // no map information and the pointer is a struct member, then we defer the
8759     // emission of that entry until the whole struct has been processed.
8760     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8761     for (const auto *Cl : Clauses) {
8762       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8763       if (!C)
8764         continue;
8765       for (const auto L : C->component_lists()) {
8766         assert(!std::get<1>(L).empty() &&
8767                "Not expecting empty list of components!");
8768         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8769         if (!Processed.insert(VD).second)
8770           continue;
8771         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8772         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8773         // If the first component is a member expression, we have to look into
8774         // 'this', which maps to null in the map of map information. Otherwise
8775         // look directly for the information.
8776         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8777 
8778         // We potentially have map information for this declaration already.
8779         // Look for the first set of components that refer to it.
8780         if (It != Info.end()) {
8781           bool Found = false;
8782           for (auto &Data : It->second) {
8783             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8784               return MI.Components.back().getAssociatedDeclaration() == VD;
8785             });
8786             // If we found a map entry, signal that the pointer has to be
8787             // returned and move on to the next declaration.
8788             if (CI != Data.end()) {
8789               CI->ReturnDevicePointer = true;
8790               Found = true;
8791               break;
8792             }
8793           }
8794           if (Found)
8795             continue;
8796         }
8797 
8798         // We didn't find any match in our map information - generate a zero
8799         // size array section - if the pointer is a struct member we defer this
8800         // action until the whole struct has been processed.
8801         if (isa<MemberExpr>(IE)) {
8802           // Insert the pointer into Info to be processed by
8803           // generateInfoForComponentList. Because it is a member pointer
8804           // without a pointee, no entry will be generated for it, therefore
8805           // we need to generate one after the whole struct has been processed.
8806           // Nonetheless, generateInfoForComponentList must be called to take
8807           // the pointer into account for the calculation of the range of the
8808           // partial struct.
8809           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8810                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8811                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8812           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8813         } else {
8814           llvm::Value *Ptr;
8815           if (IE->isGLValue())
8816             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8817           else
8818             Ptr = CGF.EmitScalarExpr(IE);
8819           CombinedInfo.Exprs.push_back(VD);
8820           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8821           CombinedInfo.Pointers.push_back(Ptr);
8822           CombinedInfo.Sizes.push_back(
8823               llvm::Constant::getNullValue(CGF.Int64Ty));
8824           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8825           CombinedInfo.Mappers.push_back(nullptr);
8826         }
8827       }
8828     }
8829 
8830     for (const auto &Data : Info) {
8831       StructRangeInfoTy PartialStruct;
8832       // Temporary generated information.
8833       MapCombinedInfoTy CurInfo;
8834       const Decl *D = Data.first;
8835       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8836       for (const auto &M : Data.second) {
8837         for (const MapInfo &L : M) {
8838           assert(!L.Components.empty() &&
8839                  "Not expecting declaration with no component lists.");
8840 
8841           // Remember the current base pointer index.
8842           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8843           CurInfo.NonContigInfo.IsNonContiguous =
8844               L.Components.back().isNonContiguous();
8845           generateInfoForComponentList(
8846               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8847               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8848               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8849 
8850           // If this entry relates with a device pointer, set the relevant
8851           // declaration and add the 'return pointer' flag.
8852           if (L.ReturnDevicePointer) {
8853             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8854                    "Unexpected number of mapped base pointers.");
8855 
8856             const ValueDecl *RelevantVD =
8857                 L.Components.back().getAssociatedDeclaration();
8858             assert(RelevantVD &&
8859                    "No relevant declaration related with device pointer??");
8860 
8861             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8862                 RelevantVD);
8863             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8864           }
8865         }
8866       }
8867 
8868       // Append any pending zero-length pointers which are struct members and
8869       // used with use_device_ptr or use_device_addr.
8870       auto CI = DeferredInfo.find(Data.first);
8871       if (CI != DeferredInfo.end()) {
8872         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8873           llvm::Value *BasePtr;
8874           llvm::Value *Ptr;
8875           if (L.ForDeviceAddr) {
8876             if (L.IE->isGLValue())
8877               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8878             else
8879               Ptr = this->CGF.EmitScalarExpr(L.IE);
8880             BasePtr = Ptr;
8881             // Entry is RETURN_PARAM. Also, set the placeholder value
8882             // MEMBER_OF=FFFF so that the entry is later updated with the
8883             // correct value of MEMBER_OF.
8884             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8885           } else {
8886             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8887             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8888                                              L.IE->getExprLoc());
8889             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8890             // placeholder value MEMBER_OF=FFFF so that the entry is later
8891             // updated with the correct value of MEMBER_OF.
8892             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8893                                     OMP_MAP_MEMBER_OF);
8894           }
8895           CurInfo.Exprs.push_back(L.VD);
8896           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8897           CurInfo.Pointers.push_back(Ptr);
8898           CurInfo.Sizes.push_back(
8899               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8900           CurInfo.Mappers.push_back(nullptr);
8901         }
8902       }
8903       // If there is an entry in PartialStruct it means we have a struct with
8904       // individual members mapped. Emit an extra combined entry.
8905       if (PartialStruct.Base.isValid()) {
8906         CurInfo.NonContigInfo.Dims.push_back(0);
8907         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8908       }
8909 
8910       // We need to append the results of this capture to what we already
8911       // have.
8912       CombinedInfo.append(CurInfo);
8913     }
8914     // Append data for use_device_ptr clauses.
8915     CombinedInfo.append(UseDevicePtrCombinedInfo);
8916   }
8917 
8918 public:
8919   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8920       : CurDir(&Dir), CGF(CGF) {
8921     // Extract firstprivate clause information.
8922     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8923       for (const auto *D : C->varlists())
8924         FirstPrivateDecls.try_emplace(
8925             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8926     // Extract implicit firstprivates from uses_allocators clauses.
8927     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8928       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8929         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8930         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8931           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8932                                         /*Implicit=*/true);
8933         else if (const auto *VD = dyn_cast<VarDecl>(
8934                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8935                          ->getDecl()))
8936           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8937       }
8938     }
8939     // Extract device pointer clause information.
8940     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8941       for (auto L : C->component_lists())
8942         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8943     // Extract map information.
8944     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8945       if (C->getMapType() != OMPC_MAP_to)
8946         continue;
8947       for (auto L : C->component_lists()) {
8948         const ValueDecl *VD = std::get<0>(L);
8949         const auto *RD = VD ? VD->getType()
8950                                   .getCanonicalType()
8951                                   .getNonReferenceType()
8952                                   ->getAsCXXRecordDecl()
8953                             : nullptr;
8954         if (RD && RD->isLambda())
8955           LambdasMap.try_emplace(std::get<0>(L), C);
8956       }
8957     }
8958   }
8959 
8960   /// Constructor for the declare mapper directive.
8961   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8962       : CurDir(&Dir), CGF(CGF) {}
8963 
8964   /// Generate code for the combined entry if we have a partially mapped struct
8965   /// and take care of the mapping flags of the arguments corresponding to
8966   /// individual struct members.
8967   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8968                          MapFlagsArrayTy &CurTypes,
8969                          const StructRangeInfoTy &PartialStruct,
8970                          const ValueDecl *VD = nullptr,
8971                          bool NotTargetParams = true) const {
8972     if (CurTypes.size() == 1 &&
8973         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8974         !PartialStruct.IsArraySection)
8975       return;
8976     Address LBAddr = PartialStruct.LowestElem.second;
8977     Address HBAddr = PartialStruct.HighestElem.second;
8978     if (PartialStruct.HasCompleteRecord) {
8979       LBAddr = PartialStruct.LB;
8980       HBAddr = PartialStruct.LB;
8981     }
8982     CombinedInfo.Exprs.push_back(VD);
8983     // Base is the base of the struct
8984     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8985     // Pointer is the address of the lowest element
8986     llvm::Value *LB = LBAddr.getPointer();
8987     CombinedInfo.Pointers.push_back(LB);
8988     // There should not be a mapper for a combined entry.
8989     CombinedInfo.Mappers.push_back(nullptr);
8990     // Size is (addr of {highest+1} element) - (addr of lowest element)
8991     llvm::Value *HB = HBAddr.getPointer();
8992     llvm::Value *HAddr =
8993         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8994     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8995     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8996     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8997     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8998                                                   /*isSigned=*/false);
8999     CombinedInfo.Sizes.push_back(Size);
9000     // Map type is always TARGET_PARAM, if generate info for captures.
9001     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
9002                                                  : OMP_MAP_TARGET_PARAM);
9003     // If any element has the present modifier, then make sure the runtime
9004     // doesn't attempt to allocate the struct.
9005     if (CurTypes.end() !=
9006         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9007           return Type & OMP_MAP_PRESENT;
9008         }))
9009       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
9010     // Remove TARGET_PARAM flag from the first element
9011     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
9012     // If any element has the ompx_hold modifier, then make sure the runtime
9013     // uses the hold reference count for the struct as a whole so that it won't
9014     // be unmapped by an extra dynamic reference count decrement.  Add it to all
9015     // elements as well so the runtime knows which reference count to check
9016     // when determining whether it's time for device-to-host transfers of
9017     // individual elements.
9018     if (CurTypes.end() !=
9019         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9020           return Type & OMP_MAP_OMPX_HOLD;
9021         })) {
9022       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9023       for (auto &M : CurTypes)
9024         M |= OMP_MAP_OMPX_HOLD;
9025     }
9026 
9027     // All other current entries will be MEMBER_OF the combined entry
9028     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9029     // 0xFFFF in the MEMBER_OF field).
9030     OpenMPOffloadMappingFlags MemberOfFlag =
9031         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9032     for (auto &M : CurTypes)
9033       setCorrectMemberOfFlag(M, MemberOfFlag);
9034   }
9035 
9036   /// Generate all the base pointers, section pointers, sizes, map types, and
9037   /// mappers for the extracted mappable expressions (all included in \a
9038   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9039   /// pair of the relevant declaration and index where it occurs is appended to
9040   /// the device pointers info array.
9041   void generateAllInfo(
9042       MapCombinedInfoTy &CombinedInfo,
9043       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9044           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9045     assert(CurDir.is<const OMPExecutableDirective *>() &&
9046            "Expect a executable directive");
9047     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9048     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9049   }
9050 
9051   /// Generate all the base pointers, section pointers, sizes, map types, and
9052   /// mappers for the extracted map clauses of user-defined mapper (all included
9053   /// in \a CombinedInfo).
9054   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9055     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9056            "Expect a declare mapper directive");
9057     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9058     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9059   }
9060 
9061   /// Emit capture info for lambdas for variables captured by reference.
9062   void generateInfoForLambdaCaptures(
9063       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9064       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9065     const auto *RD = VD->getType()
9066                          .getCanonicalType()
9067                          .getNonReferenceType()
9068                          ->getAsCXXRecordDecl();
9069     if (!RD || !RD->isLambda())
9070       return;
9071     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9072     LValue VDLVal = CGF.MakeAddrLValue(
9073         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9074     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9075     FieldDecl *ThisCapture = nullptr;
9076     RD->getCaptureFields(Captures, ThisCapture);
9077     if (ThisCapture) {
9078       LValue ThisLVal =
9079           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9080       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9081       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9082                                  VDLVal.getPointer(CGF));
9083       CombinedInfo.Exprs.push_back(VD);
9084       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9085       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9086       CombinedInfo.Sizes.push_back(
9087           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9088                                     CGF.Int64Ty, /*isSigned=*/true));
9089       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9090                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9091       CombinedInfo.Mappers.push_back(nullptr);
9092     }
9093     for (const LambdaCapture &LC : RD->captures()) {
9094       if (!LC.capturesVariable())
9095         continue;
9096       const VarDecl *VD = LC.getCapturedVar();
9097       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9098         continue;
9099       auto It = Captures.find(VD);
9100       assert(It != Captures.end() && "Found lambda capture without field.");
9101       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9102       if (LC.getCaptureKind() == LCK_ByRef) {
9103         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9104         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9105                                    VDLVal.getPointer(CGF));
9106         CombinedInfo.Exprs.push_back(VD);
9107         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9108         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9109         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9110             CGF.getTypeSize(
9111                 VD->getType().getCanonicalType().getNonReferenceType()),
9112             CGF.Int64Ty, /*isSigned=*/true));
9113       } else {
9114         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9115         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9116                                    VDLVal.getPointer(CGF));
9117         CombinedInfo.Exprs.push_back(VD);
9118         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9119         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9120         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9121       }
9122       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9123                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9124       CombinedInfo.Mappers.push_back(nullptr);
9125     }
9126   }
9127 
9128   /// Set correct indices for lambdas captures.
9129   void adjustMemberOfForLambdaCaptures(
9130       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9131       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9132       MapFlagsArrayTy &Types) const {
9133     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9134       // Set correct member_of idx for all implicit lambda captures.
9135       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9136                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9137         continue;
9138       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9139       assert(BasePtr && "Unable to find base lambda address.");
9140       int TgtIdx = -1;
9141       for (unsigned J = I; J > 0; --J) {
9142         unsigned Idx = J - 1;
9143         if (Pointers[Idx] != BasePtr)
9144           continue;
9145         TgtIdx = Idx;
9146         break;
9147       }
9148       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9149       // All other current entries will be MEMBER_OF the combined entry
9150       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9151       // 0xFFFF in the MEMBER_OF field).
9152       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9153       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9154     }
9155   }
9156 
9157   /// Generate the base pointers, section pointers, sizes, map types, and
9158   /// mappers associated to a given capture (all included in \a CombinedInfo).
9159   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9160                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9161                               StructRangeInfoTy &PartialStruct) const {
9162     assert(!Cap->capturesVariableArrayType() &&
9163            "Not expecting to generate map info for a variable array type!");
9164 
9165     // We need to know when we generating information for the first component
9166     const ValueDecl *VD = Cap->capturesThis()
9167                               ? nullptr
9168                               : Cap->getCapturedVar()->getCanonicalDecl();
9169 
9170     // for map(to: lambda): skip here, processing it in
9171     // generateDefaultMapInfo
9172     if (LambdasMap.count(VD))
9173       return;
9174 
9175     // If this declaration appears in a is_device_ptr clause we just have to
9176     // pass the pointer by value. If it is a reference to a declaration, we just
9177     // pass its value.
9178     if (DevPointersMap.count(VD)) {
9179       CombinedInfo.Exprs.push_back(VD);
9180       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9181       CombinedInfo.Pointers.push_back(Arg);
9182       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9183           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9184           /*isSigned=*/true));
9185       CombinedInfo.Types.push_back(
9186           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9187           OMP_MAP_TARGET_PARAM);
9188       CombinedInfo.Mappers.push_back(nullptr);
9189       return;
9190     }
9191 
9192     using MapData =
9193         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9194                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9195                    const ValueDecl *, const Expr *>;
9196     SmallVector<MapData, 4> DeclComponentLists;
9197     assert(CurDir.is<const OMPExecutableDirective *>() &&
9198            "Expect a executable directive");
9199     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9200     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9201       const auto *EI = C->getVarRefs().begin();
9202       for (const auto L : C->decl_component_lists(VD)) {
9203         const ValueDecl *VDecl, *Mapper;
9204         // The Expression is not correct if the mapping is implicit
9205         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9206         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9207         std::tie(VDecl, Components, Mapper) = L;
9208         assert(VDecl == VD && "We got information for the wrong declaration??");
9209         assert(!Components.empty() &&
9210                "Not expecting declaration with no component lists.");
9211         DeclComponentLists.emplace_back(Components, C->getMapType(),
9212                                         C->getMapTypeModifiers(),
9213                                         C->isImplicit(), Mapper, E);
9214         ++EI;
9215       }
9216     }
9217     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9218                                              const MapData &RHS) {
9219       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9220       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9221       bool HasPresent =
9222           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9223       bool HasAllocs = MapType == OMPC_MAP_alloc;
9224       MapModifiers = std::get<2>(RHS);
9225       MapType = std::get<1>(LHS);
9226       bool HasPresentR =
9227           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9228       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9229       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9230     });
9231 
9232     // Find overlapping elements (including the offset from the base element).
9233     llvm::SmallDenseMap<
9234         const MapData *,
9235         llvm::SmallVector<
9236             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9237         4>
9238         OverlappedData;
9239     size_t Count = 0;
9240     for (const MapData &L : DeclComponentLists) {
9241       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9242       OpenMPMapClauseKind MapType;
9243       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9244       bool IsImplicit;
9245       const ValueDecl *Mapper;
9246       const Expr *VarRef;
9247       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9248           L;
9249       ++Count;
9250       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9251         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9252         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9253                  VarRef) = L1;
9254         auto CI = Components.rbegin();
9255         auto CE = Components.rend();
9256         auto SI = Components1.rbegin();
9257         auto SE = Components1.rend();
9258         for (; CI != CE && SI != SE; ++CI, ++SI) {
9259           if (CI->getAssociatedExpression()->getStmtClass() !=
9260               SI->getAssociatedExpression()->getStmtClass())
9261             break;
9262           // Are we dealing with different variables/fields?
9263           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9264             break;
9265         }
9266         // Found overlapping if, at least for one component, reached the head
9267         // of the components list.
9268         if (CI == CE || SI == SE) {
9269           // Ignore it if it is the same component.
9270           if (CI == CE && SI == SE)
9271             continue;
9272           const auto It = (SI == SE) ? CI : SI;
9273           // If one component is a pointer and another one is a kind of
9274           // dereference of this pointer (array subscript, section, dereference,
9275           // etc.), it is not an overlapping.
9276           // Same, if one component is a base and another component is a
9277           // dereferenced pointer memberexpr with the same base.
9278           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9279               (std::prev(It)->getAssociatedDeclaration() &&
9280                std::prev(It)
9281                    ->getAssociatedDeclaration()
9282                    ->getType()
9283                    ->isPointerType()) ||
9284               (It->getAssociatedDeclaration() &&
9285                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9286                std::next(It) != CE && std::next(It) != SE))
9287             continue;
9288           const MapData &BaseData = CI == CE ? L : L1;
9289           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9290               SI == SE ? Components : Components1;
9291           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9292           OverlappedElements.getSecond().push_back(SubData);
9293         }
9294       }
9295     }
9296     // Sort the overlapped elements for each item.
9297     llvm::SmallVector<const FieldDecl *, 4> Layout;
9298     if (!OverlappedData.empty()) {
9299       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9300       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9301       while (BaseType != OrigType) {
9302         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9303         OrigType = BaseType->getPointeeOrArrayElementType();
9304       }
9305 
9306       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9307         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9308       else {
9309         const auto *RD = BaseType->getAsRecordDecl();
9310         Layout.append(RD->field_begin(), RD->field_end());
9311       }
9312     }
9313     for (auto &Pair : OverlappedData) {
9314       llvm::stable_sort(
9315           Pair.getSecond(),
9316           [&Layout](
9317               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9318               OMPClauseMappableExprCommon::MappableExprComponentListRef
9319                   Second) {
9320             auto CI = First.rbegin();
9321             auto CE = First.rend();
9322             auto SI = Second.rbegin();
9323             auto SE = Second.rend();
9324             for (; CI != CE && SI != SE; ++CI, ++SI) {
9325               if (CI->getAssociatedExpression()->getStmtClass() !=
9326                   SI->getAssociatedExpression()->getStmtClass())
9327                 break;
9328               // Are we dealing with different variables/fields?
9329               if (CI->getAssociatedDeclaration() !=
9330                   SI->getAssociatedDeclaration())
9331                 break;
9332             }
9333 
9334             // Lists contain the same elements.
9335             if (CI == CE && SI == SE)
9336               return false;
9337 
9338             // List with less elements is less than list with more elements.
9339             if (CI == CE || SI == SE)
9340               return CI == CE;
9341 
9342             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9343             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9344             if (FD1->getParent() == FD2->getParent())
9345               return FD1->getFieldIndex() < FD2->getFieldIndex();
9346             const auto *It =
9347                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9348                   return FD == FD1 || FD == FD2;
9349                 });
9350             return *It == FD1;
9351           });
9352     }
9353 
9354     // Associated with a capture, because the mapping flags depend on it.
9355     // Go through all of the elements with the overlapped elements.
9356     bool IsFirstComponentList = true;
9357     for (const auto &Pair : OverlappedData) {
9358       const MapData &L = *Pair.getFirst();
9359       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9360       OpenMPMapClauseKind MapType;
9361       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9362       bool IsImplicit;
9363       const ValueDecl *Mapper;
9364       const Expr *VarRef;
9365       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9366           L;
9367       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9368           OverlappedComponents = Pair.getSecond();
9369       generateInfoForComponentList(
9370           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9371           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9372           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9373       IsFirstComponentList = false;
9374     }
9375     // Go through other elements without overlapped elements.
9376     for (const MapData &L : DeclComponentLists) {
9377       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9378       OpenMPMapClauseKind MapType;
9379       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9380       bool IsImplicit;
9381       const ValueDecl *Mapper;
9382       const Expr *VarRef;
9383       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9384           L;
9385       auto It = OverlappedData.find(&L);
9386       if (It == OverlappedData.end())
9387         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9388                                      Components, CombinedInfo, PartialStruct,
9389                                      IsFirstComponentList, IsImplicit, Mapper,
9390                                      /*ForDeviceAddr=*/false, VD, VarRef);
9391       IsFirstComponentList = false;
9392     }
9393   }
9394 
9395   /// Generate the default map information for a given capture \a CI,
9396   /// record field declaration \a RI and captured value \a CV.
9397   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9398                               const FieldDecl &RI, llvm::Value *CV,
9399                               MapCombinedInfoTy &CombinedInfo) const {
9400     bool IsImplicit = true;
9401     // Do the default mapping.
9402     if (CI.capturesThis()) {
9403       CombinedInfo.Exprs.push_back(nullptr);
9404       CombinedInfo.BasePointers.push_back(CV);
9405       CombinedInfo.Pointers.push_back(CV);
9406       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9407       CombinedInfo.Sizes.push_back(
9408           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9409                                     CGF.Int64Ty, /*isSigned=*/true));
9410       // Default map type.
9411       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9412     } else if (CI.capturesVariableByCopy()) {
9413       const VarDecl *VD = CI.getCapturedVar();
9414       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9415       CombinedInfo.BasePointers.push_back(CV);
9416       CombinedInfo.Pointers.push_back(CV);
9417       if (!RI.getType()->isAnyPointerType()) {
9418         // We have to signal to the runtime captures passed by value that are
9419         // not pointers.
9420         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9421         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9422             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9423       } else {
9424         // Pointers are implicitly mapped with a zero size and no flags
9425         // (other than first map that is added for all implicit maps).
9426         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9427         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9428       }
9429       auto I = FirstPrivateDecls.find(VD);
9430       if (I != FirstPrivateDecls.end())
9431         IsImplicit = I->getSecond();
9432     } else {
9433       assert(CI.capturesVariable() && "Expected captured reference.");
9434       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9435       QualType ElementType = PtrTy->getPointeeType();
9436       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9437           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9438       // The default map type for a scalar/complex type is 'to' because by
9439       // default the value doesn't have to be retrieved. For an aggregate
9440       // type, the default is 'tofrom'.
9441       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9442       const VarDecl *VD = CI.getCapturedVar();
9443       auto I = FirstPrivateDecls.find(VD);
9444       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9445       CombinedInfo.BasePointers.push_back(CV);
9446       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9447         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9448             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9449             AlignmentSource::Decl));
9450         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9451       } else {
9452         CombinedInfo.Pointers.push_back(CV);
9453       }
9454       if (I != FirstPrivateDecls.end())
9455         IsImplicit = I->getSecond();
9456     }
9457     // Every default map produces a single argument which is a target parameter.
9458     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9459 
9460     // Add flag stating this is an implicit map.
9461     if (IsImplicit)
9462       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9463 
9464     // No user-defined mapper for default mapping.
9465     CombinedInfo.Mappers.push_back(nullptr);
9466   }
9467 };
9468 } // anonymous namespace
9469 
9470 static void emitNonContiguousDescriptor(
9471     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9472     CGOpenMPRuntime::TargetDataInfo &Info) {
9473   CodeGenModule &CGM = CGF.CGM;
9474   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9475       &NonContigInfo = CombinedInfo.NonContigInfo;
9476 
9477   // Build an array of struct descriptor_dim and then assign it to
9478   // offload_args.
9479   //
9480   // struct descriptor_dim {
9481   //  uint64_t offset;
9482   //  uint64_t count;
9483   //  uint64_t stride
9484   // };
9485   ASTContext &C = CGF.getContext();
9486   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9487   RecordDecl *RD;
9488   RD = C.buildImplicitRecord("descriptor_dim");
9489   RD->startDefinition();
9490   addFieldToRecordDecl(C, RD, Int64Ty);
9491   addFieldToRecordDecl(C, RD, Int64Ty);
9492   addFieldToRecordDecl(C, RD, Int64Ty);
9493   RD->completeDefinition();
9494   QualType DimTy = C.getRecordType(RD);
9495 
9496   enum { OffsetFD = 0, CountFD, StrideFD };
9497   // We need two index variable here since the size of "Dims" is the same as the
9498   // size of Components, however, the size of offset, count, and stride is equal
9499   // to the size of base declaration that is non-contiguous.
9500   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9501     // Skip emitting ir if dimension size is 1 since it cannot be
9502     // non-contiguous.
9503     if (NonContigInfo.Dims[I] == 1)
9504       continue;
9505     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9506     QualType ArrayTy =
9507         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9508     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9509     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9510       unsigned RevIdx = EE - II - 1;
9511       LValue DimsLVal = CGF.MakeAddrLValue(
9512           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9513       // Offset
9514       LValue OffsetLVal = CGF.EmitLValueForField(
9515           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9516       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9517       // Count
9518       LValue CountLVal = CGF.EmitLValueForField(
9519           DimsLVal, *std::next(RD->field_begin(), CountFD));
9520       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9521       // Stride
9522       LValue StrideLVal = CGF.EmitLValueForField(
9523           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9524       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9525     }
9526     // args[I] = &dims
9527     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9528         DimsAddr, CGM.Int8PtrTy);
9529     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9530         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9531         Info.PointersArray, 0, I);
9532     Address PAddr(P, CGF.getPointerAlign());
9533     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9534     ++L;
9535   }
9536 }
9537 
9538 // Try to extract the base declaration from a `this->x` expression if possible.
9539 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9540   if (!E)
9541     return nullptr;
9542 
9543   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9544     if (const MemberExpr *ME =
9545             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9546       return ME->getMemberDecl();
9547   return nullptr;
9548 }
9549 
9550 /// Emit a string constant containing the names of the values mapped to the
9551 /// offloading runtime library.
9552 llvm::Constant *
9553 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9554                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9555 
9556   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9557     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9558 
9559   SourceLocation Loc;
9560   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9561     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9562       Loc = VD->getLocation();
9563     else
9564       Loc = MapExprs.getMapExpr()->getExprLoc();
9565   } else {
9566     Loc = MapExprs.getMapDecl()->getLocation();
9567   }
9568 
9569   std::string ExprName = "";
9570   if (MapExprs.getMapExpr()) {
9571     PrintingPolicy P(CGF.getContext().getLangOpts());
9572     llvm::raw_string_ostream OS(ExprName);
9573     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9574     OS.flush();
9575   } else {
9576     ExprName = MapExprs.getMapDecl()->getNameAsString();
9577   }
9578 
9579   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9580   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9581                                          PLoc.getLine(), PLoc.getColumn());
9582 }
9583 
9584 /// Emit the arrays used to pass the captures and map information to the
9585 /// offloading runtime library. If there is no map or capture information,
9586 /// return nullptr by reference.
9587 static void emitOffloadingArrays(
9588     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9589     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9590     bool IsNonContiguous = false) {
9591   CodeGenModule &CGM = CGF.CGM;
9592   ASTContext &Ctx = CGF.getContext();
9593 
9594   // Reset the array information.
9595   Info.clearArrayInfo();
9596   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9597 
9598   if (Info.NumberOfPtrs) {
9599     // Detect if we have any capture size requiring runtime evaluation of the
9600     // size so that a constant array could be eventually used.
9601     bool hasRuntimeEvaluationCaptureSize = false;
9602     for (llvm::Value *S : CombinedInfo.Sizes)
9603       if (!isa<llvm::Constant>(S)) {
9604         hasRuntimeEvaluationCaptureSize = true;
9605         break;
9606       }
9607 
9608     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9609     QualType PointerArrayType = Ctx.getConstantArrayType(
9610         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9611         /*IndexTypeQuals=*/0);
9612 
9613     Info.BasePointersArray =
9614         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9615     Info.PointersArray =
9616         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9617     Address MappersArray =
9618         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9619     Info.MappersArray = MappersArray.getPointer();
9620 
9621     // If we don't have any VLA types or other types that require runtime
9622     // evaluation, we can use a constant array for the map sizes, otherwise we
9623     // need to fill up the arrays as we do for the pointers.
9624     QualType Int64Ty =
9625         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9626     if (hasRuntimeEvaluationCaptureSize) {
9627       QualType SizeArrayType = Ctx.getConstantArrayType(
9628           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9629           /*IndexTypeQuals=*/0);
9630       Info.SizesArray =
9631           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9632     } else {
9633       // We expect all the sizes to be constant, so we collect them to create
9634       // a constant array.
9635       SmallVector<llvm::Constant *, 16> ConstSizes;
9636       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9637         if (IsNonContiguous &&
9638             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9639           ConstSizes.push_back(llvm::ConstantInt::get(
9640               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9641         } else {
9642           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9643         }
9644       }
9645 
9646       auto *SizesArrayInit = llvm::ConstantArray::get(
9647           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9648       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9649       auto *SizesArrayGbl = new llvm::GlobalVariable(
9650           CGM.getModule(), SizesArrayInit->getType(),
9651           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9652           SizesArrayInit, Name);
9653       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9654       Info.SizesArray = SizesArrayGbl;
9655     }
9656 
9657     // The map types are always constant so we don't need to generate code to
9658     // fill arrays. Instead, we create an array constant.
9659     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9660     llvm::copy(CombinedInfo.Types, Mapping.begin());
9661     std::string MaptypesName =
9662         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9663     auto *MapTypesArrayGbl =
9664         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9665     Info.MapTypesArray = MapTypesArrayGbl;
9666 
9667     // The information types are only built if there is debug information
9668     // requested.
9669     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9670       Info.MapNamesArray = llvm::Constant::getNullValue(
9671           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9672     } else {
9673       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9674         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9675       };
9676       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9677       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9678       std::string MapnamesName =
9679           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9680       auto *MapNamesArrayGbl =
9681           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9682       Info.MapNamesArray = MapNamesArrayGbl;
9683     }
9684 
9685     // If there's a present map type modifier, it must not be applied to the end
9686     // of a region, so generate a separate map type array in that case.
9687     if (Info.separateBeginEndCalls()) {
9688       bool EndMapTypesDiffer = false;
9689       for (uint64_t &Type : Mapping) {
9690         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9691           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9692           EndMapTypesDiffer = true;
9693         }
9694       }
9695       if (EndMapTypesDiffer) {
9696         MapTypesArrayGbl =
9697             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9698         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9699       }
9700     }
9701 
9702     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9703       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9704       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9705           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9706           Info.BasePointersArray, 0, I);
9707       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9708           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9709       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9710       CGF.Builder.CreateStore(BPVal, BPAddr);
9711 
9712       if (Info.requiresDevicePointerInfo())
9713         if (const ValueDecl *DevVD =
9714                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9715           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9716 
9717       llvm::Value *PVal = CombinedInfo.Pointers[I];
9718       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9719           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9720           Info.PointersArray, 0, I);
9721       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9722           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9723       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9724       CGF.Builder.CreateStore(PVal, PAddr);
9725 
9726       if (hasRuntimeEvaluationCaptureSize) {
9727         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9728             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9729             Info.SizesArray,
9730             /*Idx0=*/0,
9731             /*Idx1=*/I);
9732         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9733         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9734                                                           CGM.Int64Ty,
9735                                                           /*isSigned=*/true),
9736                                 SAddr);
9737       }
9738 
9739       // Fill up the mapper array.
9740       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9741       if (CombinedInfo.Mappers[I]) {
9742         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9743             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9744         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9745         Info.HasMapper = true;
9746       }
9747       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9748       CGF.Builder.CreateStore(MFunc, MAddr);
9749     }
9750   }
9751 
9752   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9753       Info.NumberOfPtrs == 0)
9754     return;
9755 
9756   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9757 }
9758 
9759 namespace {
9760 /// Additional arguments for emitOffloadingArraysArgument function.
9761 struct ArgumentsOptions {
9762   bool ForEndCall = false;
9763   ArgumentsOptions() = default;
9764   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9765 };
9766 } // namespace
9767 
9768 /// Emit the arguments to be passed to the runtime library based on the
9769 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9770 /// ForEndCall, emit map types to be passed for the end of the region instead of
9771 /// the beginning.
9772 static void emitOffloadingArraysArgument(
9773     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9774     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9775     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9776     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9777     const ArgumentsOptions &Options = ArgumentsOptions()) {
9778   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9779          "expected region end call to runtime only when end call is separate");
9780   CodeGenModule &CGM = CGF.CGM;
9781   if (Info.NumberOfPtrs) {
9782     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9783         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9784         Info.BasePointersArray,
9785         /*Idx0=*/0, /*Idx1=*/0);
9786     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9787         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9788         Info.PointersArray,
9789         /*Idx0=*/0,
9790         /*Idx1=*/0);
9791     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9792         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9793         /*Idx0=*/0, /*Idx1=*/0);
9794     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9795         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9796         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9797                                                     : Info.MapTypesArray,
9798         /*Idx0=*/0,
9799         /*Idx1=*/0);
9800 
9801     // Only emit the mapper information arrays if debug information is
9802     // requested.
9803     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9804       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9805     else
9806       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9807           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9808           Info.MapNamesArray,
9809           /*Idx0=*/0,
9810           /*Idx1=*/0);
9811     // If there is no user-defined mapper, set the mapper array to nullptr to
9812     // avoid an unnecessary data privatization
9813     if (!Info.HasMapper)
9814       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9815     else
9816       MappersArrayArg =
9817           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9818   } else {
9819     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9820     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9821     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9822     MapTypesArrayArg =
9823         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9824     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9825     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9826   }
9827 }
9828 
9829 /// Check for inner distribute directive.
9830 static const OMPExecutableDirective *
9831 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9832   const auto *CS = D.getInnermostCapturedStmt();
9833   const auto *Body =
9834       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9835   const Stmt *ChildStmt =
9836       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9837 
9838   if (const auto *NestedDir =
9839           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9840     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9841     switch (D.getDirectiveKind()) {
9842     case OMPD_target:
9843       if (isOpenMPDistributeDirective(DKind))
9844         return NestedDir;
9845       if (DKind == OMPD_teams) {
9846         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9847             /*IgnoreCaptured=*/true);
9848         if (!Body)
9849           return nullptr;
9850         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9851         if (const auto *NND =
9852                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9853           DKind = NND->getDirectiveKind();
9854           if (isOpenMPDistributeDirective(DKind))
9855             return NND;
9856         }
9857       }
9858       return nullptr;
9859     case OMPD_target_teams:
9860       if (isOpenMPDistributeDirective(DKind))
9861         return NestedDir;
9862       return nullptr;
9863     case OMPD_target_parallel:
9864     case OMPD_target_simd:
9865     case OMPD_target_parallel_for:
9866     case OMPD_target_parallel_for_simd:
9867       return nullptr;
9868     case OMPD_target_teams_distribute:
9869     case OMPD_target_teams_distribute_simd:
9870     case OMPD_target_teams_distribute_parallel_for:
9871     case OMPD_target_teams_distribute_parallel_for_simd:
9872     case OMPD_parallel:
9873     case OMPD_for:
9874     case OMPD_parallel_for:
9875     case OMPD_parallel_master:
9876     case OMPD_parallel_sections:
9877     case OMPD_for_simd:
9878     case OMPD_parallel_for_simd:
9879     case OMPD_cancel:
9880     case OMPD_cancellation_point:
9881     case OMPD_ordered:
9882     case OMPD_threadprivate:
9883     case OMPD_allocate:
9884     case OMPD_task:
9885     case OMPD_simd:
9886     case OMPD_tile:
9887     case OMPD_unroll:
9888     case OMPD_sections:
9889     case OMPD_section:
9890     case OMPD_single:
9891     case OMPD_master:
9892     case OMPD_critical:
9893     case OMPD_taskyield:
9894     case OMPD_barrier:
9895     case OMPD_taskwait:
9896     case OMPD_taskgroup:
9897     case OMPD_atomic:
9898     case OMPD_flush:
9899     case OMPD_depobj:
9900     case OMPD_scan:
9901     case OMPD_teams:
9902     case OMPD_target_data:
9903     case OMPD_target_exit_data:
9904     case OMPD_target_enter_data:
9905     case OMPD_distribute:
9906     case OMPD_distribute_simd:
9907     case OMPD_distribute_parallel_for:
9908     case OMPD_distribute_parallel_for_simd:
9909     case OMPD_teams_distribute:
9910     case OMPD_teams_distribute_simd:
9911     case OMPD_teams_distribute_parallel_for:
9912     case OMPD_teams_distribute_parallel_for_simd:
9913     case OMPD_target_update:
9914     case OMPD_declare_simd:
9915     case OMPD_declare_variant:
9916     case OMPD_begin_declare_variant:
9917     case OMPD_end_declare_variant:
9918     case OMPD_declare_target:
9919     case OMPD_end_declare_target:
9920     case OMPD_declare_reduction:
9921     case OMPD_declare_mapper:
9922     case OMPD_taskloop:
9923     case OMPD_taskloop_simd:
9924     case OMPD_master_taskloop:
9925     case OMPD_master_taskloop_simd:
9926     case OMPD_parallel_master_taskloop:
9927     case OMPD_parallel_master_taskloop_simd:
9928     case OMPD_requires:
9929     case OMPD_metadirective:
9930     case OMPD_unknown:
9931     default:
9932       llvm_unreachable("Unexpected directive.");
9933     }
9934   }
9935 
9936   return nullptr;
9937 }
9938 
9939 /// Emit the user-defined mapper function. The code generation follows the
9940 /// pattern in the example below.
9941 /// \code
9942 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9943 ///                                           void *base, void *begin,
9944 ///                                           int64_t size, int64_t type,
9945 ///                                           void *name = nullptr) {
9946 ///   // Allocate space for an array section first or add a base/begin for
9947 ///   // pointer dereference.
9948 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9949 ///       !maptype.IsDelete)
9950 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9951 ///                                 size*sizeof(Ty), clearToFromMember(type));
9952 ///   // Map members.
9953 ///   for (unsigned i = 0; i < size; i++) {
9954 ///     // For each component specified by this mapper:
9955 ///     for (auto c : begin[i]->all_components) {
9956 ///       if (c.hasMapper())
9957 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9958 ///                       c.arg_type, c.arg_name);
9959 ///       else
9960 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9961 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9962 ///                                     c.arg_name);
9963 ///     }
9964 ///   }
9965 ///   // Delete the array section.
9966 ///   if (size > 1 && maptype.IsDelete)
9967 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9968 ///                                 size*sizeof(Ty), clearToFromMember(type));
9969 /// }
9970 /// \endcode
9971 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9972                                             CodeGenFunction *CGF) {
9973   if (UDMMap.count(D) > 0)
9974     return;
9975   ASTContext &C = CGM.getContext();
9976   QualType Ty = D->getType();
9977   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9978   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9979   auto *MapperVarDecl =
9980       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9981   SourceLocation Loc = D->getLocation();
9982   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9983 
9984   // Prepare mapper function arguments and attributes.
9985   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9986                               C.VoidPtrTy, ImplicitParamDecl::Other);
9987   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9988                             ImplicitParamDecl::Other);
9989   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9990                              C.VoidPtrTy, ImplicitParamDecl::Other);
9991   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9992                             ImplicitParamDecl::Other);
9993   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9994                             ImplicitParamDecl::Other);
9995   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9996                             ImplicitParamDecl::Other);
9997   FunctionArgList Args;
9998   Args.push_back(&HandleArg);
9999   Args.push_back(&BaseArg);
10000   Args.push_back(&BeginArg);
10001   Args.push_back(&SizeArg);
10002   Args.push_back(&TypeArg);
10003   Args.push_back(&NameArg);
10004   const CGFunctionInfo &FnInfo =
10005       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
10006   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
10007   SmallString<64> TyStr;
10008   llvm::raw_svector_ostream Out(TyStr);
10009   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
10010   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10011   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
10012                                     Name, &CGM.getModule());
10013   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
10014   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
10015   // Start the mapper function code generation.
10016   CodeGenFunction MapperCGF(CGM);
10017   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10018   // Compute the starting and end addresses of array elements.
10019   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10020       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10021       C.getPointerType(Int64Ty), Loc);
10022   // Prepare common arguments for array initiation and deletion.
10023   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10024       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10025       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10026   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10027       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10028       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10029   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10030       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10031       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10032   // Convert the size in bytes into the number of array elements.
10033   Size = MapperCGF.Builder.CreateExactUDiv(
10034       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10035   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10036       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10037   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10038       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10039   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10040       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10041       C.getPointerType(Int64Ty), Loc);
10042   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10043       MapperCGF.GetAddrOfLocalVar(&NameArg),
10044       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10045 
10046   // Emit array initiation if this is an array section and \p MapType indicates
10047   // that memory allocation is required.
10048   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10049   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10050                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10051 
10052   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10053 
10054   // Emit the loop header block.
10055   MapperCGF.EmitBlock(HeadBB);
10056   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10057   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10058   // Evaluate whether the initial condition is satisfied.
10059   llvm::Value *IsEmpty =
10060       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10061   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10062   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10063 
10064   // Emit the loop body block.
10065   MapperCGF.EmitBlock(BodyBB);
10066   llvm::BasicBlock *LastBB = BodyBB;
10067   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10068       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10069   PtrPHI->addIncoming(PtrBegin, EntryBB);
10070   Address PtrCurrent =
10071       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10072                           .getAlignment()
10073                           .alignmentOfArrayElement(ElementSize));
10074   // Privatize the declared variable of mapper to be the current array element.
10075   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10076   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10077   (void)Scope.Privatize();
10078 
10079   // Get map clause information. Fill up the arrays with all mapped variables.
10080   MappableExprsHandler::MapCombinedInfoTy Info;
10081   MappableExprsHandler MEHandler(*D, MapperCGF);
10082   MEHandler.generateAllInfoForMapper(Info);
10083 
10084   // Call the runtime API __tgt_mapper_num_components to get the number of
10085   // pre-existing components.
10086   llvm::Value *OffloadingArgs[] = {Handle};
10087   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10088       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10089                                             OMPRTL___tgt_mapper_num_components),
10090       OffloadingArgs);
10091   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10092       PreviousSize,
10093       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10094 
10095   // Fill up the runtime mapper handle for all components.
10096   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10097     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10098         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10099     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10100         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10101     llvm::Value *CurSizeArg = Info.Sizes[I];
10102     llvm::Value *CurNameArg =
10103         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10104             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10105             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10106 
10107     // Extract the MEMBER_OF field from the map type.
10108     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10109     llvm::Value *MemberMapType =
10110         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10111 
10112     // Combine the map type inherited from user-defined mapper with that
10113     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10114     // bits of the \a MapType, which is the input argument of the mapper
10115     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10116     // bits of MemberMapType.
10117     // [OpenMP 5.0], 1.2.6. map-type decay.
10118     //        | alloc |  to   | from  | tofrom | release | delete
10119     // ----------------------------------------------------------
10120     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10121     // to     | alloc |  to   | alloc |   to   | release | delete
10122     // from   | alloc | alloc | from  |  from  | release | delete
10123     // tofrom | alloc |  to   | from  | tofrom | release | delete
10124     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10125         MapType,
10126         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10127                                    MappableExprsHandler::OMP_MAP_FROM));
10128     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10129     llvm::BasicBlock *AllocElseBB =
10130         MapperCGF.createBasicBlock("omp.type.alloc.else");
10131     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10132     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10133     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10134     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10135     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10136     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10137     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10138     MapperCGF.EmitBlock(AllocBB);
10139     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10140         MemberMapType,
10141         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10142                                      MappableExprsHandler::OMP_MAP_FROM)));
10143     MapperCGF.Builder.CreateBr(EndBB);
10144     MapperCGF.EmitBlock(AllocElseBB);
10145     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10146         LeftToFrom,
10147         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10148     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10149     // In case of to, clear OMP_MAP_FROM.
10150     MapperCGF.EmitBlock(ToBB);
10151     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10152         MemberMapType,
10153         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10154     MapperCGF.Builder.CreateBr(EndBB);
10155     MapperCGF.EmitBlock(ToElseBB);
10156     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10157         LeftToFrom,
10158         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10159     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10160     // In case of from, clear OMP_MAP_TO.
10161     MapperCGF.EmitBlock(FromBB);
10162     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10163         MemberMapType,
10164         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10165     // In case of tofrom, do nothing.
10166     MapperCGF.EmitBlock(EndBB);
10167     LastBB = EndBB;
10168     llvm::PHINode *CurMapType =
10169         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10170     CurMapType->addIncoming(AllocMapType, AllocBB);
10171     CurMapType->addIncoming(ToMapType, ToBB);
10172     CurMapType->addIncoming(FromMapType, FromBB);
10173     CurMapType->addIncoming(MemberMapType, ToElseBB);
10174 
10175     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10176                                      CurSizeArg, CurMapType, CurNameArg};
10177     if (Info.Mappers[I]) {
10178       // Call the corresponding mapper function.
10179       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10180           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10181       assert(MapperFunc && "Expect a valid mapper function is available.");
10182       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10183     } else {
10184       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10185       // data structure.
10186       MapperCGF.EmitRuntimeCall(
10187           OMPBuilder.getOrCreateRuntimeFunction(
10188               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10189           OffloadingArgs);
10190     }
10191   }
10192 
10193   // Update the pointer to point to the next element that needs to be mapped,
10194   // and check whether we have mapped all elements.
10195   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10196   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10197       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10198   PtrPHI->addIncoming(PtrNext, LastBB);
10199   llvm::Value *IsDone =
10200       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10201   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10202   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10203 
10204   MapperCGF.EmitBlock(ExitBB);
10205   // Emit array deletion if this is an array section and \p MapType indicates
10206   // that deletion is required.
10207   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10208                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10209 
10210   // Emit the function exit block.
10211   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10212   MapperCGF.FinishFunction();
10213   UDMMap.try_emplace(D, Fn);
10214   if (CGF) {
10215     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10216     Decls.second.push_back(D);
10217   }
10218 }
10219 
10220 /// Emit the array initialization or deletion portion for user-defined mapper
10221 /// code generation. First, it evaluates whether an array section is mapped and
10222 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10223 /// true, and \a MapType indicates to not delete this array, array
10224 /// initialization code is generated. If \a IsInit is false, and \a MapType
10225 /// indicates to not this array, array deletion code is generated.
10226 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10227     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10228     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10229     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10230     bool IsInit) {
10231   StringRef Prefix = IsInit ? ".init" : ".del";
10232 
10233   // Evaluate if this is an array section.
10234   llvm::BasicBlock *BodyBB =
10235       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10236   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10237       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10238   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10239       MapType,
10240       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10241   llvm::Value *DeleteCond;
10242   llvm::Value *Cond;
10243   if (IsInit) {
10244     // base != begin?
10245     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10246         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10247     // IsPtrAndObj?
10248     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10249         MapType,
10250         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10251     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10252     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10253     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10254     DeleteCond = MapperCGF.Builder.CreateIsNull(
10255         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10256   } else {
10257     Cond = IsArray;
10258     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10259         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10260   }
10261   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10262   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10263 
10264   MapperCGF.EmitBlock(BodyBB);
10265   // Get the array size by multiplying element size and element number (i.e., \p
10266   // Size).
10267   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10268       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10269   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10270   // memory allocation/deletion purpose only.
10271   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10272       MapType,
10273       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10274                                    MappableExprsHandler::OMP_MAP_FROM)));
10275   MapTypeArg = MapperCGF.Builder.CreateOr(
10276       MapTypeArg,
10277       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10278 
10279   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10280   // data structure.
10281   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10282                                    ArraySize, MapTypeArg, MapName};
10283   MapperCGF.EmitRuntimeCall(
10284       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10285                                             OMPRTL___tgt_push_mapper_component),
10286       OffloadingArgs);
10287 }
10288 
10289 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10290     const OMPDeclareMapperDecl *D) {
10291   auto I = UDMMap.find(D);
10292   if (I != UDMMap.end())
10293     return I->second;
10294   emitUserDefinedMapper(D);
10295   return UDMMap.lookup(D);
10296 }
10297 
10298 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10299     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10300     llvm::Value *DeviceID,
10301     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10302                                      const OMPLoopDirective &D)>
10303         SizeEmitter) {
10304   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10305   const OMPExecutableDirective *TD = &D;
10306   // Get nested teams distribute kind directive, if any.
10307   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10308     TD = getNestedDistributeDirective(CGM.getContext(), D);
10309   if (!TD)
10310     return;
10311   const auto *LD = cast<OMPLoopDirective>(TD);
10312   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10313                                                          PrePostActionTy &) {
10314     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10315       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10316       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10317       CGF.EmitRuntimeCall(
10318           OMPBuilder.getOrCreateRuntimeFunction(
10319               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10320           Args);
10321     }
10322   };
10323   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10324 }
10325 
10326 void CGOpenMPRuntime::emitTargetCall(
10327     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10328     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10329     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10330     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10331                                      const OMPLoopDirective &D)>
10332         SizeEmitter) {
10333   if (!CGF.HaveInsertPoint())
10334     return;
10335 
10336   assert(OutlinedFn && "Invalid outlined function!");
10337 
10338   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10339                                  D.hasClausesOfKind<OMPNowaitClause>();
10340   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10341   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10342   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10343                                             PrePostActionTy &) {
10344     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10345   };
10346   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10347 
10348   CodeGenFunction::OMPTargetDataInfo InputInfo;
10349   llvm::Value *MapTypesArray = nullptr;
10350   llvm::Value *MapNamesArray = nullptr;
10351   // Fill up the pointer arrays and transfer execution to the device.
10352   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10353                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10354                     &CapturedVars,
10355                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10356     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10357       // Reverse offloading is not supported, so just execute on the host.
10358       if (RequiresOuterTask) {
10359         CapturedVars.clear();
10360         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10361       }
10362       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10363       return;
10364     }
10365 
10366     // On top of the arrays that were filled up, the target offloading call
10367     // takes as arguments the device id as well as the host pointer. The host
10368     // pointer is used by the runtime library to identify the current target
10369     // region, so it only has to be unique and not necessarily point to
10370     // anything. It could be the pointer to the outlined function that
10371     // implements the target region, but we aren't using that so that the
10372     // compiler doesn't need to keep that, and could therefore inline the host
10373     // function if proven worthwhile during optimization.
10374 
10375     // From this point on, we need to have an ID of the target region defined.
10376     assert(OutlinedFnID && "Invalid outlined function ID!");
10377 
10378     // Emit device ID if any.
10379     llvm::Value *DeviceID;
10380     if (Device.getPointer()) {
10381       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10382               Device.getInt() == OMPC_DEVICE_device_num) &&
10383              "Expected device_num modifier.");
10384       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10385       DeviceID =
10386           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10387     } else {
10388       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10389     }
10390 
10391     // Emit the number of elements in the offloading arrays.
10392     llvm::Value *PointerNum =
10393         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10394 
10395     // Return value of the runtime offloading call.
10396     llvm::Value *Return;
10397 
10398     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10399     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10400 
10401     // Source location for the ident struct
10402     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10403 
10404     // Emit tripcount for the target loop-based directive.
10405     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10406 
10407     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10408     // The target region is an outlined function launched by the runtime
10409     // via calls __tgt_target() or __tgt_target_teams().
10410     //
10411     // __tgt_target() launches a target region with one team and one thread,
10412     // executing a serial region.  This master thread may in turn launch
10413     // more threads within its team upon encountering a parallel region,
10414     // however, no additional teams can be launched on the device.
10415     //
10416     // __tgt_target_teams() launches a target region with one or more teams,
10417     // each with one or more threads.  This call is required for target
10418     // constructs such as:
10419     //  'target teams'
10420     //  'target' / 'teams'
10421     //  'target teams distribute parallel for'
10422     //  'target parallel'
10423     // and so on.
10424     //
10425     // Note that on the host and CPU targets, the runtime implementation of
10426     // these calls simply call the outlined function without forking threads.
10427     // The outlined functions themselves have runtime calls to
10428     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10429     // the compiler in emitTeamsCall() and emitParallelCall().
10430     //
10431     // In contrast, on the NVPTX target, the implementation of
10432     // __tgt_target_teams() launches a GPU kernel with the requested number
10433     // of teams and threads so no additional calls to the runtime are required.
10434     if (NumTeams) {
10435       // If we have NumTeams defined this means that we have an enclosed teams
10436       // region. Therefore we also expect to have NumThreads defined. These two
10437       // values should be defined in the presence of a teams directive,
10438       // regardless of having any clauses associated. If the user is using teams
10439       // but no clauses, these two values will be the default that should be
10440       // passed to the runtime library - a 32-bit integer with the value zero.
10441       assert(NumThreads && "Thread limit expression should be available along "
10442                            "with number of teams.");
10443       SmallVector<llvm::Value *> OffloadingArgs = {
10444           RTLoc,
10445           DeviceID,
10446           OutlinedFnID,
10447           PointerNum,
10448           InputInfo.BasePointersArray.getPointer(),
10449           InputInfo.PointersArray.getPointer(),
10450           InputInfo.SizesArray.getPointer(),
10451           MapTypesArray,
10452           MapNamesArray,
10453           InputInfo.MappersArray.getPointer(),
10454           NumTeams,
10455           NumThreads};
10456       if (HasNowait) {
10457         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10458         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10459         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10460         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10461         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10462         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10463       }
10464       Return = CGF.EmitRuntimeCall(
10465           OMPBuilder.getOrCreateRuntimeFunction(
10466               CGM.getModule(), HasNowait
10467                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10468                                    : OMPRTL___tgt_target_teams_mapper),
10469           OffloadingArgs);
10470     } else {
10471       SmallVector<llvm::Value *> OffloadingArgs = {
10472           RTLoc,
10473           DeviceID,
10474           OutlinedFnID,
10475           PointerNum,
10476           InputInfo.BasePointersArray.getPointer(),
10477           InputInfo.PointersArray.getPointer(),
10478           InputInfo.SizesArray.getPointer(),
10479           MapTypesArray,
10480           MapNamesArray,
10481           InputInfo.MappersArray.getPointer()};
10482       if (HasNowait) {
10483         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10484         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10485         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10486         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10487         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10488         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10489       }
10490       Return = CGF.EmitRuntimeCall(
10491           OMPBuilder.getOrCreateRuntimeFunction(
10492               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10493                                          : OMPRTL___tgt_target_mapper),
10494           OffloadingArgs);
10495     }
10496 
10497     // Check the error code and execute the host version if required.
10498     llvm::BasicBlock *OffloadFailedBlock =
10499         CGF.createBasicBlock("omp_offload.failed");
10500     llvm::BasicBlock *OffloadContBlock =
10501         CGF.createBasicBlock("omp_offload.cont");
10502     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10503     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10504 
10505     CGF.EmitBlock(OffloadFailedBlock);
10506     if (RequiresOuterTask) {
10507       CapturedVars.clear();
10508       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10509     }
10510     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10511     CGF.EmitBranch(OffloadContBlock);
10512 
10513     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10514   };
10515 
10516   // Notify that the host version must be executed.
10517   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10518                     RequiresOuterTask](CodeGenFunction &CGF,
10519                                        PrePostActionTy &) {
10520     if (RequiresOuterTask) {
10521       CapturedVars.clear();
10522       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10523     }
10524     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10525   };
10526 
10527   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10528                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10529                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10530     // Fill up the arrays with all the captured variables.
10531     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10532 
10533     // Get mappable expression information.
10534     MappableExprsHandler MEHandler(D, CGF);
10535     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10536     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10537 
10538     auto RI = CS.getCapturedRecordDecl()->field_begin();
10539     auto *CV = CapturedVars.begin();
10540     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10541                                               CE = CS.capture_end();
10542          CI != CE; ++CI, ++RI, ++CV) {
10543       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10544       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10545 
10546       // VLA sizes are passed to the outlined region by copy and do not have map
10547       // information associated.
10548       if (CI->capturesVariableArrayType()) {
10549         CurInfo.Exprs.push_back(nullptr);
10550         CurInfo.BasePointers.push_back(*CV);
10551         CurInfo.Pointers.push_back(*CV);
10552         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10553             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10554         // Copy to the device as an argument. No need to retrieve it.
10555         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10556                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10557                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10558         CurInfo.Mappers.push_back(nullptr);
10559       } else {
10560         // If we have any information in the map clause, we use it, otherwise we
10561         // just do a default mapping.
10562         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10563         if (!CI->capturesThis())
10564           MappedVarSet.insert(CI->getCapturedVar());
10565         else
10566           MappedVarSet.insert(nullptr);
10567         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10568           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10569         // Generate correct mapping for variables captured by reference in
10570         // lambdas.
10571         if (CI->capturesVariable())
10572           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10573                                                   CurInfo, LambdaPointers);
10574       }
10575       // We expect to have at least an element of information for this capture.
10576       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10577              "Non-existing map pointer for capture!");
10578       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10579              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10580              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10581              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10582              "Inconsistent map information sizes!");
10583 
10584       // If there is an entry in PartialStruct it means we have a struct with
10585       // individual members mapped. Emit an extra combined entry.
10586       if (PartialStruct.Base.isValid()) {
10587         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10588         MEHandler.emitCombinedEntry(
10589             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10590             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10591       }
10592 
10593       // We need to append the results of this capture to what we already have.
10594       CombinedInfo.append(CurInfo);
10595     }
10596     // Adjust MEMBER_OF flags for the lambdas captures.
10597     MEHandler.adjustMemberOfForLambdaCaptures(
10598         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10599         CombinedInfo.Types);
10600     // Map any list items in a map clause that were not captures because they
10601     // weren't referenced within the construct.
10602     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10603 
10604     TargetDataInfo Info;
10605     // Fill up the arrays and create the arguments.
10606     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10607     emitOffloadingArraysArgument(
10608         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10609         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10610         {/*ForEndTask=*/false});
10611 
10612     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10613     InputInfo.BasePointersArray =
10614         Address(Info.BasePointersArray, CGM.getPointerAlign());
10615     InputInfo.PointersArray =
10616         Address(Info.PointersArray, CGM.getPointerAlign());
10617     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10618     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10619     MapTypesArray = Info.MapTypesArray;
10620     MapNamesArray = Info.MapNamesArray;
10621     if (RequiresOuterTask)
10622       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10623     else
10624       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10625   };
10626 
10627   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10628                              CodeGenFunction &CGF, PrePostActionTy &) {
10629     if (RequiresOuterTask) {
10630       CodeGenFunction::OMPTargetDataInfo InputInfo;
10631       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10632     } else {
10633       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10634     }
10635   };
10636 
10637   // If we have a target function ID it means that we need to support
10638   // offloading, otherwise, just execute on the host. We need to execute on host
10639   // regardless of the conditional in the if clause if, e.g., the user do not
10640   // specify target triples.
10641   if (OutlinedFnID) {
10642     if (IfCond) {
10643       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10644     } else {
10645       RegionCodeGenTy ThenRCG(TargetThenGen);
10646       ThenRCG(CGF);
10647     }
10648   } else {
10649     RegionCodeGenTy ElseRCG(TargetElseGen);
10650     ElseRCG(CGF);
10651   }
10652 }
10653 
10654 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10655                                                     StringRef ParentName) {
10656   if (!S)
10657     return;
10658 
10659   // Codegen OMP target directives that offload compute to the device.
10660   bool RequiresDeviceCodegen =
10661       isa<OMPExecutableDirective>(S) &&
10662       isOpenMPTargetExecutionDirective(
10663           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10664 
10665   if (RequiresDeviceCodegen) {
10666     const auto &E = *cast<OMPExecutableDirective>(S);
10667     unsigned DeviceID;
10668     unsigned FileID;
10669     unsigned Line;
10670     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10671                              FileID, Line);
10672 
10673     // Is this a target region that should not be emitted as an entry point? If
10674     // so just signal we are done with this target region.
10675     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10676                                                             ParentName, Line))
10677       return;
10678 
10679     switch (E.getDirectiveKind()) {
10680     case OMPD_target:
10681       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10682                                                    cast<OMPTargetDirective>(E));
10683       break;
10684     case OMPD_target_parallel:
10685       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10686           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10687       break;
10688     case OMPD_target_teams:
10689       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10690           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10691       break;
10692     case OMPD_target_teams_distribute:
10693       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10694           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10695       break;
10696     case OMPD_target_teams_distribute_simd:
10697       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10698           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10699       break;
10700     case OMPD_target_parallel_for:
10701       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10702           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10703       break;
10704     case OMPD_target_parallel_for_simd:
10705       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10706           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10707       break;
10708     case OMPD_target_simd:
10709       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10710           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10711       break;
10712     case OMPD_target_teams_distribute_parallel_for:
10713       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10714           CGM, ParentName,
10715           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10716       break;
10717     case OMPD_target_teams_distribute_parallel_for_simd:
10718       CodeGenFunction::
10719           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10720               CGM, ParentName,
10721               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10722       break;
10723     case OMPD_parallel:
10724     case OMPD_for:
10725     case OMPD_parallel_for:
10726     case OMPD_parallel_master:
10727     case OMPD_parallel_sections:
10728     case OMPD_for_simd:
10729     case OMPD_parallel_for_simd:
10730     case OMPD_cancel:
10731     case OMPD_cancellation_point:
10732     case OMPD_ordered:
10733     case OMPD_threadprivate:
10734     case OMPD_allocate:
10735     case OMPD_task:
10736     case OMPD_simd:
10737     case OMPD_tile:
10738     case OMPD_unroll:
10739     case OMPD_sections:
10740     case OMPD_section:
10741     case OMPD_single:
10742     case OMPD_master:
10743     case OMPD_critical:
10744     case OMPD_taskyield:
10745     case OMPD_barrier:
10746     case OMPD_taskwait:
10747     case OMPD_taskgroup:
10748     case OMPD_atomic:
10749     case OMPD_flush:
10750     case OMPD_depobj:
10751     case OMPD_scan:
10752     case OMPD_teams:
10753     case OMPD_target_data:
10754     case OMPD_target_exit_data:
10755     case OMPD_target_enter_data:
10756     case OMPD_distribute:
10757     case OMPD_distribute_simd:
10758     case OMPD_distribute_parallel_for:
10759     case OMPD_distribute_parallel_for_simd:
10760     case OMPD_teams_distribute:
10761     case OMPD_teams_distribute_simd:
10762     case OMPD_teams_distribute_parallel_for:
10763     case OMPD_teams_distribute_parallel_for_simd:
10764     case OMPD_target_update:
10765     case OMPD_declare_simd:
10766     case OMPD_declare_variant:
10767     case OMPD_begin_declare_variant:
10768     case OMPD_end_declare_variant:
10769     case OMPD_declare_target:
10770     case OMPD_end_declare_target:
10771     case OMPD_declare_reduction:
10772     case OMPD_declare_mapper:
10773     case OMPD_taskloop:
10774     case OMPD_taskloop_simd:
10775     case OMPD_master_taskloop:
10776     case OMPD_master_taskloop_simd:
10777     case OMPD_parallel_master_taskloop:
10778     case OMPD_parallel_master_taskloop_simd:
10779     case OMPD_requires:
10780     case OMPD_metadirective:
10781     case OMPD_unknown:
10782     default:
10783       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10784     }
10785     return;
10786   }
10787 
10788   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10789     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10790       return;
10791 
10792     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10793     return;
10794   }
10795 
10796   // If this is a lambda function, look into its body.
10797   if (const auto *L = dyn_cast<LambdaExpr>(S))
10798     S = L->getBody();
10799 
10800   // Keep looking for target regions recursively.
10801   for (const Stmt *II : S->children())
10802     scanForTargetRegionsFunctions(II, ParentName);
10803 }
10804 
10805 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10806   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10807       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10808   if (!DevTy)
10809     return false;
10810   // Do not emit device_type(nohost) functions for the host.
10811   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10812     return true;
10813   // Do not emit device_type(host) functions for the device.
10814   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10815     return true;
10816   return false;
10817 }
10818 
10819 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10820   // If emitting code for the host, we do not process FD here. Instead we do
10821   // the normal code generation.
10822   if (!CGM.getLangOpts().OpenMPIsDevice) {
10823     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10824       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10825                                   CGM.getLangOpts().OpenMPIsDevice))
10826         return true;
10827     return false;
10828   }
10829 
10830   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10831   // Try to detect target regions in the function.
10832   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10833     StringRef Name = CGM.getMangledName(GD);
10834     scanForTargetRegionsFunctions(FD->getBody(), Name);
10835     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10836                                 CGM.getLangOpts().OpenMPIsDevice))
10837       return true;
10838   }
10839 
10840   // Do not to emit function if it is not marked as declare target.
10841   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10842          AlreadyEmittedTargetDecls.count(VD) == 0;
10843 }
10844 
10845 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10846   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10847                               CGM.getLangOpts().OpenMPIsDevice))
10848     return true;
10849 
10850   if (!CGM.getLangOpts().OpenMPIsDevice)
10851     return false;
10852 
10853   // Check if there are Ctors/Dtors in this declaration and look for target
10854   // regions in it. We use the complete variant to produce the kernel name
10855   // mangling.
10856   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10857   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10858     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10859       StringRef ParentName =
10860           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10861       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10862     }
10863     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10864       StringRef ParentName =
10865           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10866       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10867     }
10868   }
10869 
10870   // Do not to emit variable if it is not marked as declare target.
10871   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10872       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10873           cast<VarDecl>(GD.getDecl()));
10874   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10875       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10876        HasRequiresUnifiedSharedMemory)) {
10877     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10878     return true;
10879   }
10880   return false;
10881 }
10882 
10883 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10884                                                    llvm::Constant *Addr) {
10885   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10886       !CGM.getLangOpts().OpenMPIsDevice)
10887     return;
10888 
10889   // If we have host/nohost variables, they do not need to be registered.
10890   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10891       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10892   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10893     return;
10894 
10895   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10896       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10897   if (!Res) {
10898     if (CGM.getLangOpts().OpenMPIsDevice) {
10899       // Register non-target variables being emitted in device code (debug info
10900       // may cause this).
10901       StringRef VarName = CGM.getMangledName(VD);
10902       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10903     }
10904     return;
10905   }
10906   // Register declare target variables.
10907   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10908   StringRef VarName;
10909   CharUnits VarSize;
10910   llvm::GlobalValue::LinkageTypes Linkage;
10911 
10912   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10913       !HasRequiresUnifiedSharedMemory) {
10914     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10915     VarName = CGM.getMangledName(VD);
10916     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10917       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10918       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10919     } else {
10920       VarSize = CharUnits::Zero();
10921     }
10922     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10923     // Temp solution to prevent optimizations of the internal variables.
10924     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10925       // Do not create a "ref-variable" if the original is not also available
10926       // on the host.
10927       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10928         return;
10929       std::string RefName = getName({VarName, "ref"});
10930       if (!CGM.GetGlobalValue(RefName)) {
10931         llvm::Constant *AddrRef =
10932             getOrCreateInternalVariable(Addr->getType(), RefName);
10933         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10934         GVAddrRef->setConstant(/*Val=*/true);
10935         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10936         GVAddrRef->setInitializer(Addr);
10937         CGM.addCompilerUsedGlobal(GVAddrRef);
10938       }
10939     }
10940   } else {
10941     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10942             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10943              HasRequiresUnifiedSharedMemory)) &&
10944            "Declare target attribute must link or to with unified memory.");
10945     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10946       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10947     else
10948       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10949 
10950     if (CGM.getLangOpts().OpenMPIsDevice) {
10951       VarName = Addr->getName();
10952       Addr = nullptr;
10953     } else {
10954       VarName = getAddrOfDeclareTargetVar(VD).getName();
10955       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10956     }
10957     VarSize = CGM.getPointerSize();
10958     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10959   }
10960 
10961   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10962       VarName, Addr, VarSize, Flags, Linkage);
10963 }
10964 
10965 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10966   if (isa<FunctionDecl>(GD.getDecl()) ||
10967       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10968     return emitTargetFunctions(GD);
10969 
10970   return emitTargetGlobalVariable(GD);
10971 }
10972 
10973 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10974   for (const VarDecl *VD : DeferredGlobalVariables) {
10975     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10976         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10977     if (!Res)
10978       continue;
10979     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10980         !HasRequiresUnifiedSharedMemory) {
10981       CGM.EmitGlobal(VD);
10982     } else {
10983       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10984               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10985                HasRequiresUnifiedSharedMemory)) &&
10986              "Expected link clause or to clause with unified memory.");
10987       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10988     }
10989   }
10990 }
10991 
10992 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10993     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10994   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10995          " Expected target-based directive.");
10996 }
10997 
10998 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10999   for (const OMPClause *Clause : D->clauselists()) {
11000     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11001       HasRequiresUnifiedSharedMemory = true;
11002     } else if (const auto *AC =
11003                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11004       switch (AC->getAtomicDefaultMemOrderKind()) {
11005       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11006         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11007         break;
11008       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11009         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11010         break;
11011       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11012         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11013         break;
11014       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11015         break;
11016       }
11017     }
11018   }
11019 }
11020 
11021 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11022   return RequiresAtomicOrdering;
11023 }
11024 
11025 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11026                                                        LangAS &AS) {
11027   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11028     return false;
11029   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11030   switch(A->getAllocatorType()) {
11031   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11032   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11033   // Not supported, fallback to the default mem space.
11034   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11035   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11036   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11037   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11038   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11039   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11040   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11041     AS = LangAS::Default;
11042     return true;
11043   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11044     llvm_unreachable("Expected predefined allocator for the variables with the "
11045                      "static storage.");
11046   }
11047   return false;
11048 }
11049 
11050 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11051   return HasRequiresUnifiedSharedMemory;
11052 }
11053 
11054 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11055     CodeGenModule &CGM)
11056     : CGM(CGM) {
11057   if (CGM.getLangOpts().OpenMPIsDevice) {
11058     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11059     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11060   }
11061 }
11062 
11063 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11064   if (CGM.getLangOpts().OpenMPIsDevice)
11065     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11066 }
11067 
11068 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11069   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11070     return true;
11071 
11072   const auto *D = cast<FunctionDecl>(GD.getDecl());
11073   // Do not to emit function if it is marked as declare target as it was already
11074   // emitted.
11075   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11076     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11077       if (auto *F = dyn_cast_or_null<llvm::Function>(
11078               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11079         return !F->isDeclaration();
11080       return false;
11081     }
11082     return true;
11083   }
11084 
11085   return !AlreadyEmittedTargetDecls.insert(D).second;
11086 }
11087 
11088 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11089   // If we don't have entries or if we are emitting code for the device, we
11090   // don't need to do anything.
11091   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11092       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11093       (OffloadEntriesInfoManager.empty() &&
11094        !HasEmittedDeclareTargetRegion &&
11095        !HasEmittedTargetRegion))
11096     return nullptr;
11097 
11098   // Create and register the function that handles the requires directives.
11099   ASTContext &C = CGM.getContext();
11100 
11101   llvm::Function *RequiresRegFn;
11102   {
11103     CodeGenFunction CGF(CGM);
11104     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11105     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11106     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11107     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11108     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11109     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11110     // TODO: check for other requires clauses.
11111     // The requires directive takes effect only when a target region is
11112     // present in the compilation unit. Otherwise it is ignored and not
11113     // passed to the runtime. This avoids the runtime from throwing an error
11114     // for mismatching requires clauses across compilation units that don't
11115     // contain at least 1 target region.
11116     assert((HasEmittedTargetRegion ||
11117             HasEmittedDeclareTargetRegion ||
11118             !OffloadEntriesInfoManager.empty()) &&
11119            "Target or declare target region expected.");
11120     if (HasRequiresUnifiedSharedMemory)
11121       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11122     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11123                             CGM.getModule(), OMPRTL___tgt_register_requires),
11124                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11125     CGF.FinishFunction();
11126   }
11127   return RequiresRegFn;
11128 }
11129 
11130 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11131                                     const OMPExecutableDirective &D,
11132                                     SourceLocation Loc,
11133                                     llvm::Function *OutlinedFn,
11134                                     ArrayRef<llvm::Value *> CapturedVars) {
11135   if (!CGF.HaveInsertPoint())
11136     return;
11137 
11138   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11139   CodeGenFunction::RunCleanupsScope Scope(CGF);
11140 
11141   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11142   llvm::Value *Args[] = {
11143       RTLoc,
11144       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11145       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11146   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11147   RealArgs.append(std::begin(Args), std::end(Args));
11148   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11149 
11150   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11151       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11152   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11153 }
11154 
11155 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11156                                          const Expr *NumTeams,
11157                                          const Expr *ThreadLimit,
11158                                          SourceLocation Loc) {
11159   if (!CGF.HaveInsertPoint())
11160     return;
11161 
11162   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11163 
11164   llvm::Value *NumTeamsVal =
11165       NumTeams
11166           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11167                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11168           : CGF.Builder.getInt32(0);
11169 
11170   llvm::Value *ThreadLimitVal =
11171       ThreadLimit
11172           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11173                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11174           : CGF.Builder.getInt32(0);
11175 
11176   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11177   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11178                                      ThreadLimitVal};
11179   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11180                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11181                       PushNumTeamsArgs);
11182 }
11183 
11184 void CGOpenMPRuntime::emitTargetDataCalls(
11185     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11186     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11187   if (!CGF.HaveInsertPoint())
11188     return;
11189 
11190   // Action used to replace the default codegen action and turn privatization
11191   // off.
11192   PrePostActionTy NoPrivAction;
11193 
11194   // Generate the code for the opening of the data environment. Capture all the
11195   // arguments of the runtime call by reference because they are used in the
11196   // closing of the region.
11197   auto &&BeginThenGen = [this, &D, Device, &Info,
11198                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11199     // Fill up the arrays with all the mapped variables.
11200     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11201 
11202     // Get map clause information.
11203     MappableExprsHandler MEHandler(D, CGF);
11204     MEHandler.generateAllInfo(CombinedInfo);
11205 
11206     // Fill up the arrays and create the arguments.
11207     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11208                          /*IsNonContiguous=*/true);
11209 
11210     llvm::Value *BasePointersArrayArg = nullptr;
11211     llvm::Value *PointersArrayArg = nullptr;
11212     llvm::Value *SizesArrayArg = nullptr;
11213     llvm::Value *MapTypesArrayArg = nullptr;
11214     llvm::Value *MapNamesArrayArg = nullptr;
11215     llvm::Value *MappersArrayArg = nullptr;
11216     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11217                                  SizesArrayArg, MapTypesArrayArg,
11218                                  MapNamesArrayArg, MappersArrayArg, Info);
11219 
11220     // Emit device ID if any.
11221     llvm::Value *DeviceID = nullptr;
11222     if (Device) {
11223       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11224                                            CGF.Int64Ty, /*isSigned=*/true);
11225     } else {
11226       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11227     }
11228 
11229     // Emit the number of elements in the offloading arrays.
11230     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11231     //
11232     // Source location for the ident struct
11233     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11234 
11235     llvm::Value *OffloadingArgs[] = {RTLoc,
11236                                      DeviceID,
11237                                      PointerNum,
11238                                      BasePointersArrayArg,
11239                                      PointersArrayArg,
11240                                      SizesArrayArg,
11241                                      MapTypesArrayArg,
11242                                      MapNamesArrayArg,
11243                                      MappersArrayArg};
11244     CGF.EmitRuntimeCall(
11245         OMPBuilder.getOrCreateRuntimeFunction(
11246             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11247         OffloadingArgs);
11248 
11249     // If device pointer privatization is required, emit the body of the region
11250     // here. It will have to be duplicated: with and without privatization.
11251     if (!Info.CaptureDeviceAddrMap.empty())
11252       CodeGen(CGF);
11253   };
11254 
11255   // Generate code for the closing of the data region.
11256   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11257                                                 PrePostActionTy &) {
11258     assert(Info.isValid() && "Invalid data environment closing arguments.");
11259 
11260     llvm::Value *BasePointersArrayArg = nullptr;
11261     llvm::Value *PointersArrayArg = nullptr;
11262     llvm::Value *SizesArrayArg = nullptr;
11263     llvm::Value *MapTypesArrayArg = nullptr;
11264     llvm::Value *MapNamesArrayArg = nullptr;
11265     llvm::Value *MappersArrayArg = nullptr;
11266     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11267                                  SizesArrayArg, MapTypesArrayArg,
11268                                  MapNamesArrayArg, MappersArrayArg, Info,
11269                                  {/*ForEndCall=*/true});
11270 
11271     // Emit device ID if any.
11272     llvm::Value *DeviceID = nullptr;
11273     if (Device) {
11274       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11275                                            CGF.Int64Ty, /*isSigned=*/true);
11276     } else {
11277       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11278     }
11279 
11280     // Emit the number of elements in the offloading arrays.
11281     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11282 
11283     // Source location for the ident struct
11284     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11285 
11286     llvm::Value *OffloadingArgs[] = {RTLoc,
11287                                      DeviceID,
11288                                      PointerNum,
11289                                      BasePointersArrayArg,
11290                                      PointersArrayArg,
11291                                      SizesArrayArg,
11292                                      MapTypesArrayArg,
11293                                      MapNamesArrayArg,
11294                                      MappersArrayArg};
11295     CGF.EmitRuntimeCall(
11296         OMPBuilder.getOrCreateRuntimeFunction(
11297             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11298         OffloadingArgs);
11299   };
11300 
11301   // If we need device pointer privatization, we need to emit the body of the
11302   // region with no privatization in the 'else' branch of the conditional.
11303   // Otherwise, we don't have to do anything.
11304   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11305                                                          PrePostActionTy &) {
11306     if (!Info.CaptureDeviceAddrMap.empty()) {
11307       CodeGen.setAction(NoPrivAction);
11308       CodeGen(CGF);
11309     }
11310   };
11311 
11312   // We don't have to do anything to close the region if the if clause evaluates
11313   // to false.
11314   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11315 
11316   if (IfCond) {
11317     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11318   } else {
11319     RegionCodeGenTy RCG(BeginThenGen);
11320     RCG(CGF);
11321   }
11322 
11323   // If we don't require privatization of device pointers, we emit the body in
11324   // between the runtime calls. This avoids duplicating the body code.
11325   if (Info.CaptureDeviceAddrMap.empty()) {
11326     CodeGen.setAction(NoPrivAction);
11327     CodeGen(CGF);
11328   }
11329 
11330   if (IfCond) {
11331     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11332   } else {
11333     RegionCodeGenTy RCG(EndThenGen);
11334     RCG(CGF);
11335   }
11336 }
11337 
11338 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11339     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11340     const Expr *Device) {
11341   if (!CGF.HaveInsertPoint())
11342     return;
11343 
11344   assert((isa<OMPTargetEnterDataDirective>(D) ||
11345           isa<OMPTargetExitDataDirective>(D) ||
11346           isa<OMPTargetUpdateDirective>(D)) &&
11347          "Expecting either target enter, exit data, or update directives.");
11348 
11349   CodeGenFunction::OMPTargetDataInfo InputInfo;
11350   llvm::Value *MapTypesArray = nullptr;
11351   llvm::Value *MapNamesArray = nullptr;
11352   // Generate the code for the opening of the data environment.
11353   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11354                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11355     // Emit device ID if any.
11356     llvm::Value *DeviceID = nullptr;
11357     if (Device) {
11358       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11359                                            CGF.Int64Ty, /*isSigned=*/true);
11360     } else {
11361       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11362     }
11363 
11364     // Emit the number of elements in the offloading arrays.
11365     llvm::Constant *PointerNum =
11366         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11367 
11368     // Source location for the ident struct
11369     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11370 
11371     llvm::Value *OffloadingArgs[] = {RTLoc,
11372                                      DeviceID,
11373                                      PointerNum,
11374                                      InputInfo.BasePointersArray.getPointer(),
11375                                      InputInfo.PointersArray.getPointer(),
11376                                      InputInfo.SizesArray.getPointer(),
11377                                      MapTypesArray,
11378                                      MapNamesArray,
11379                                      InputInfo.MappersArray.getPointer()};
11380 
11381     // Select the right runtime function call for each standalone
11382     // directive.
11383     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11384     RuntimeFunction RTLFn;
11385     switch (D.getDirectiveKind()) {
11386     case OMPD_target_enter_data:
11387       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11388                         : OMPRTL___tgt_target_data_begin_mapper;
11389       break;
11390     case OMPD_target_exit_data:
11391       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11392                         : OMPRTL___tgt_target_data_end_mapper;
11393       break;
11394     case OMPD_target_update:
11395       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11396                         : OMPRTL___tgt_target_data_update_mapper;
11397       break;
11398     case OMPD_parallel:
11399     case OMPD_for:
11400     case OMPD_parallel_for:
11401     case OMPD_parallel_master:
11402     case OMPD_parallel_sections:
11403     case OMPD_for_simd:
11404     case OMPD_parallel_for_simd:
11405     case OMPD_cancel:
11406     case OMPD_cancellation_point:
11407     case OMPD_ordered:
11408     case OMPD_threadprivate:
11409     case OMPD_allocate:
11410     case OMPD_task:
11411     case OMPD_simd:
11412     case OMPD_tile:
11413     case OMPD_unroll:
11414     case OMPD_sections:
11415     case OMPD_section:
11416     case OMPD_single:
11417     case OMPD_master:
11418     case OMPD_critical:
11419     case OMPD_taskyield:
11420     case OMPD_barrier:
11421     case OMPD_taskwait:
11422     case OMPD_taskgroup:
11423     case OMPD_atomic:
11424     case OMPD_flush:
11425     case OMPD_depobj:
11426     case OMPD_scan:
11427     case OMPD_teams:
11428     case OMPD_target_data:
11429     case OMPD_distribute:
11430     case OMPD_distribute_simd:
11431     case OMPD_distribute_parallel_for:
11432     case OMPD_distribute_parallel_for_simd:
11433     case OMPD_teams_distribute:
11434     case OMPD_teams_distribute_simd:
11435     case OMPD_teams_distribute_parallel_for:
11436     case OMPD_teams_distribute_parallel_for_simd:
11437     case OMPD_declare_simd:
11438     case OMPD_declare_variant:
11439     case OMPD_begin_declare_variant:
11440     case OMPD_end_declare_variant:
11441     case OMPD_declare_target:
11442     case OMPD_end_declare_target:
11443     case OMPD_declare_reduction:
11444     case OMPD_declare_mapper:
11445     case OMPD_taskloop:
11446     case OMPD_taskloop_simd:
11447     case OMPD_master_taskloop:
11448     case OMPD_master_taskloop_simd:
11449     case OMPD_parallel_master_taskloop:
11450     case OMPD_parallel_master_taskloop_simd:
11451     case OMPD_target:
11452     case OMPD_target_simd:
11453     case OMPD_target_teams_distribute:
11454     case OMPD_target_teams_distribute_simd:
11455     case OMPD_target_teams_distribute_parallel_for:
11456     case OMPD_target_teams_distribute_parallel_for_simd:
11457     case OMPD_target_teams:
11458     case OMPD_target_parallel:
11459     case OMPD_target_parallel_for:
11460     case OMPD_target_parallel_for_simd:
11461     case OMPD_requires:
11462     case OMPD_metadirective:
11463     case OMPD_unknown:
11464     default:
11465       llvm_unreachable("Unexpected standalone target data directive.");
11466       break;
11467     }
11468     CGF.EmitRuntimeCall(
11469         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11470         OffloadingArgs);
11471   };
11472 
11473   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11474                           &MapNamesArray](CodeGenFunction &CGF,
11475                                           PrePostActionTy &) {
11476     // Fill up the arrays with all the mapped variables.
11477     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11478 
11479     // Get map clause information.
11480     MappableExprsHandler MEHandler(D, CGF);
11481     MEHandler.generateAllInfo(CombinedInfo);
11482 
11483     TargetDataInfo Info;
11484     // Fill up the arrays and create the arguments.
11485     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11486                          /*IsNonContiguous=*/true);
11487     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11488                              D.hasClausesOfKind<OMPNowaitClause>();
11489     emitOffloadingArraysArgument(
11490         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11491         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11492         {/*ForEndTask=*/false});
11493     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11494     InputInfo.BasePointersArray =
11495         Address(Info.BasePointersArray, CGM.getPointerAlign());
11496     InputInfo.PointersArray =
11497         Address(Info.PointersArray, CGM.getPointerAlign());
11498     InputInfo.SizesArray =
11499         Address(Info.SizesArray, CGM.getPointerAlign());
11500     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11501     MapTypesArray = Info.MapTypesArray;
11502     MapNamesArray = Info.MapNamesArray;
11503     if (RequiresOuterTask)
11504       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11505     else
11506       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11507   };
11508 
11509   if (IfCond) {
11510     emitIfClause(CGF, IfCond, TargetThenGen,
11511                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11512   } else {
11513     RegionCodeGenTy ThenRCG(TargetThenGen);
11514     ThenRCG(CGF);
11515   }
11516 }
11517 
11518 namespace {
11519   /// Kind of parameter in a function with 'declare simd' directive.
11520   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11521   /// Attribute set of the parameter.
11522   struct ParamAttrTy {
11523     ParamKindTy Kind = Vector;
11524     llvm::APSInt StrideOrArg;
11525     llvm::APSInt Alignment;
11526   };
11527 } // namespace
11528 
11529 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11530                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11531   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11532   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11533   // of that clause. The VLEN value must be power of 2.
11534   // In other case the notion of the function`s "characteristic data type" (CDT)
11535   // is used to compute the vector length.
11536   // CDT is defined in the following order:
11537   //   a) For non-void function, the CDT is the return type.
11538   //   b) If the function has any non-uniform, non-linear parameters, then the
11539   //   CDT is the type of the first such parameter.
11540   //   c) If the CDT determined by a) or b) above is struct, union, or class
11541   //   type which is pass-by-value (except for the type that maps to the
11542   //   built-in complex data type), the characteristic data type is int.
11543   //   d) If none of the above three cases is applicable, the CDT is int.
11544   // The VLEN is then determined based on the CDT and the size of vector
11545   // register of that ISA for which current vector version is generated. The
11546   // VLEN is computed using the formula below:
11547   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11548   // where vector register size specified in section 3.2.1 Registers and the
11549   // Stack Frame of original AMD64 ABI document.
11550   QualType RetType = FD->getReturnType();
11551   if (RetType.isNull())
11552     return 0;
11553   ASTContext &C = FD->getASTContext();
11554   QualType CDT;
11555   if (!RetType.isNull() && !RetType->isVoidType()) {
11556     CDT = RetType;
11557   } else {
11558     unsigned Offset = 0;
11559     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11560       if (ParamAttrs[Offset].Kind == Vector)
11561         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11562       ++Offset;
11563     }
11564     if (CDT.isNull()) {
11565       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11566         if (ParamAttrs[I + Offset].Kind == Vector) {
11567           CDT = FD->getParamDecl(I)->getType();
11568           break;
11569         }
11570       }
11571     }
11572   }
11573   if (CDT.isNull())
11574     CDT = C.IntTy;
11575   CDT = CDT->getCanonicalTypeUnqualified();
11576   if (CDT->isRecordType() || CDT->isUnionType())
11577     CDT = C.IntTy;
11578   return C.getTypeSize(CDT);
11579 }
11580 
11581 static void
11582 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11583                            const llvm::APSInt &VLENVal,
11584                            ArrayRef<ParamAttrTy> ParamAttrs,
11585                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11586   struct ISADataTy {
11587     char ISA;
11588     unsigned VecRegSize;
11589   };
11590   ISADataTy ISAData[] = {
11591       {
11592           'b', 128
11593       }, // SSE
11594       {
11595           'c', 256
11596       }, // AVX
11597       {
11598           'd', 256
11599       }, // AVX2
11600       {
11601           'e', 512
11602       }, // AVX512
11603   };
11604   llvm::SmallVector<char, 2> Masked;
11605   switch (State) {
11606   case OMPDeclareSimdDeclAttr::BS_Undefined:
11607     Masked.push_back('N');
11608     Masked.push_back('M');
11609     break;
11610   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11611     Masked.push_back('N');
11612     break;
11613   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11614     Masked.push_back('M');
11615     break;
11616   }
11617   for (char Mask : Masked) {
11618     for (const ISADataTy &Data : ISAData) {
11619       SmallString<256> Buffer;
11620       llvm::raw_svector_ostream Out(Buffer);
11621       Out << "_ZGV" << Data.ISA << Mask;
11622       if (!VLENVal) {
11623         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11624         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11625         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11626       } else {
11627         Out << VLENVal;
11628       }
11629       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11630         switch (ParamAttr.Kind){
11631         case LinearWithVarStride:
11632           Out << 's' << ParamAttr.StrideOrArg;
11633           break;
11634         case Linear:
11635           Out << 'l';
11636           if (ParamAttr.StrideOrArg != 1)
11637             Out << ParamAttr.StrideOrArg;
11638           break;
11639         case Uniform:
11640           Out << 'u';
11641           break;
11642         case Vector:
11643           Out << 'v';
11644           break;
11645         }
11646         if (!!ParamAttr.Alignment)
11647           Out << 'a' << ParamAttr.Alignment;
11648       }
11649       Out << '_' << Fn->getName();
11650       Fn->addFnAttr(Out.str());
11651     }
11652   }
11653 }
11654 
11655 // This are the Functions that are needed to mangle the name of the
11656 // vector functions generated by the compiler, according to the rules
11657 // defined in the "Vector Function ABI specifications for AArch64",
11658 // available at
11659 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11660 
11661 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11662 ///
11663 /// TODO: Need to implement the behavior for reference marked with a
11664 /// var or no linear modifiers (1.b in the section). For this, we
11665 /// need to extend ParamKindTy to support the linear modifiers.
11666 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11667   QT = QT.getCanonicalType();
11668 
11669   if (QT->isVoidType())
11670     return false;
11671 
11672   if (Kind == ParamKindTy::Uniform)
11673     return false;
11674 
11675   if (Kind == ParamKindTy::Linear)
11676     return false;
11677 
11678   // TODO: Handle linear references with modifiers
11679 
11680   if (Kind == ParamKindTy::LinearWithVarStride)
11681     return false;
11682 
11683   return true;
11684 }
11685 
11686 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11687 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11688   QT = QT.getCanonicalType();
11689   unsigned Size = C.getTypeSize(QT);
11690 
11691   // Only scalars and complex within 16 bytes wide set PVB to true.
11692   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11693     return false;
11694 
11695   if (QT->isFloatingType())
11696     return true;
11697 
11698   if (QT->isIntegerType())
11699     return true;
11700 
11701   if (QT->isPointerType())
11702     return true;
11703 
11704   // TODO: Add support for complex types (section 3.1.2, item 2).
11705 
11706   return false;
11707 }
11708 
11709 /// Computes the lane size (LS) of a return type or of an input parameter,
11710 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11711 /// TODO: Add support for references, section 3.2.1, item 1.
11712 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11713   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11714     QualType PTy = QT.getCanonicalType()->getPointeeType();
11715     if (getAArch64PBV(PTy, C))
11716       return C.getTypeSize(PTy);
11717   }
11718   if (getAArch64PBV(QT, C))
11719     return C.getTypeSize(QT);
11720 
11721   return C.getTypeSize(C.getUIntPtrType());
11722 }
11723 
11724 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11725 // signature of the scalar function, as defined in 3.2.2 of the
11726 // AAVFABI.
11727 static std::tuple<unsigned, unsigned, bool>
11728 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11729   QualType RetType = FD->getReturnType().getCanonicalType();
11730 
11731   ASTContext &C = FD->getASTContext();
11732 
11733   bool OutputBecomesInput = false;
11734 
11735   llvm::SmallVector<unsigned, 8> Sizes;
11736   if (!RetType->isVoidType()) {
11737     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11738     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11739       OutputBecomesInput = true;
11740   }
11741   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11742     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11743     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11744   }
11745 
11746   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11747   // The LS of a function parameter / return value can only be a power
11748   // of 2, starting from 8 bits, up to 128.
11749   assert(llvm::all_of(Sizes,
11750                       [](unsigned Size) {
11751                         return Size == 8 || Size == 16 || Size == 32 ||
11752                                Size == 64 || Size == 128;
11753                       }) &&
11754          "Invalid size");
11755 
11756   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11757                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11758                          OutputBecomesInput);
11759 }
11760 
11761 /// Mangle the parameter part of the vector function name according to
11762 /// their OpenMP classification. The mangling function is defined in
11763 /// section 3.5 of the AAVFABI.
11764 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11765   SmallString<256> Buffer;
11766   llvm::raw_svector_ostream Out(Buffer);
11767   for (const auto &ParamAttr : ParamAttrs) {
11768     switch (ParamAttr.Kind) {
11769     case LinearWithVarStride:
11770       Out << "ls" << ParamAttr.StrideOrArg;
11771       break;
11772     case Linear:
11773       Out << 'l';
11774       // Don't print the step value if it is not present or if it is
11775       // equal to 1.
11776       if (ParamAttr.StrideOrArg != 1)
11777         Out << ParamAttr.StrideOrArg;
11778       break;
11779     case Uniform:
11780       Out << 'u';
11781       break;
11782     case Vector:
11783       Out << 'v';
11784       break;
11785     }
11786 
11787     if (!!ParamAttr.Alignment)
11788       Out << 'a' << ParamAttr.Alignment;
11789   }
11790 
11791   return std::string(Out.str());
11792 }
11793 
11794 // Function used to add the attribute. The parameter `VLEN` is
11795 // templated to allow the use of "x" when targeting scalable functions
11796 // for SVE.
11797 template <typename T>
11798 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11799                                  char ISA, StringRef ParSeq,
11800                                  StringRef MangledName, bool OutputBecomesInput,
11801                                  llvm::Function *Fn) {
11802   SmallString<256> Buffer;
11803   llvm::raw_svector_ostream Out(Buffer);
11804   Out << Prefix << ISA << LMask << VLEN;
11805   if (OutputBecomesInput)
11806     Out << "v";
11807   Out << ParSeq << "_" << MangledName;
11808   Fn->addFnAttr(Out.str());
11809 }
11810 
11811 // Helper function to generate the Advanced SIMD names depending on
11812 // the value of the NDS when simdlen is not present.
11813 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11814                                       StringRef Prefix, char ISA,
11815                                       StringRef ParSeq, StringRef MangledName,
11816                                       bool OutputBecomesInput,
11817                                       llvm::Function *Fn) {
11818   switch (NDS) {
11819   case 8:
11820     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11821                          OutputBecomesInput, Fn);
11822     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11823                          OutputBecomesInput, Fn);
11824     break;
11825   case 16:
11826     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11827                          OutputBecomesInput, Fn);
11828     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11829                          OutputBecomesInput, Fn);
11830     break;
11831   case 32:
11832     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11833                          OutputBecomesInput, Fn);
11834     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11835                          OutputBecomesInput, Fn);
11836     break;
11837   case 64:
11838   case 128:
11839     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11840                          OutputBecomesInput, Fn);
11841     break;
11842   default:
11843     llvm_unreachable("Scalar type is too wide.");
11844   }
11845 }
11846 
11847 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11848 static void emitAArch64DeclareSimdFunction(
11849     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11850     ArrayRef<ParamAttrTy> ParamAttrs,
11851     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11852     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11853 
11854   // Get basic data for building the vector signature.
11855   const auto Data = getNDSWDS(FD, ParamAttrs);
11856   const unsigned NDS = std::get<0>(Data);
11857   const unsigned WDS = std::get<1>(Data);
11858   const bool OutputBecomesInput = std::get<2>(Data);
11859 
11860   // Check the values provided via `simdlen` by the user.
11861   // 1. A `simdlen(1)` doesn't produce vector signatures,
11862   if (UserVLEN == 1) {
11863     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11864         DiagnosticsEngine::Warning,
11865         "The clause simdlen(1) has no effect when targeting aarch64.");
11866     CGM.getDiags().Report(SLoc, DiagID);
11867     return;
11868   }
11869 
11870   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11871   // Advanced SIMD output.
11872   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11873     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11874         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11875                                     "power of 2 when targeting Advanced SIMD.");
11876     CGM.getDiags().Report(SLoc, DiagID);
11877     return;
11878   }
11879 
11880   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11881   // limits.
11882   if (ISA == 's' && UserVLEN != 0) {
11883     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11884       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11885           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11886                                       "lanes in the architectural constraints "
11887                                       "for SVE (min is 128-bit, max is "
11888                                       "2048-bit, by steps of 128-bit)");
11889       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11890       return;
11891     }
11892   }
11893 
11894   // Sort out parameter sequence.
11895   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11896   StringRef Prefix = "_ZGV";
11897   // Generate simdlen from user input (if any).
11898   if (UserVLEN) {
11899     if (ISA == 's') {
11900       // SVE generates only a masked function.
11901       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11902                            OutputBecomesInput, Fn);
11903     } else {
11904       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11905       // Advanced SIMD generates one or two functions, depending on
11906       // the `[not]inbranch` clause.
11907       switch (State) {
11908       case OMPDeclareSimdDeclAttr::BS_Undefined:
11909         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11910                              OutputBecomesInput, Fn);
11911         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11912                              OutputBecomesInput, Fn);
11913         break;
11914       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11915         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11916                              OutputBecomesInput, Fn);
11917         break;
11918       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11919         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11920                              OutputBecomesInput, Fn);
11921         break;
11922       }
11923     }
11924   } else {
11925     // If no user simdlen is provided, follow the AAVFABI rules for
11926     // generating the vector length.
11927     if (ISA == 's') {
11928       // SVE, section 3.4.1, item 1.
11929       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11930                            OutputBecomesInput, Fn);
11931     } else {
11932       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11933       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11934       // two vector names depending on the use of the clause
11935       // `[not]inbranch`.
11936       switch (State) {
11937       case OMPDeclareSimdDeclAttr::BS_Undefined:
11938         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11939                                   OutputBecomesInput, Fn);
11940         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11941                                   OutputBecomesInput, Fn);
11942         break;
11943       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11944         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11945                                   OutputBecomesInput, Fn);
11946         break;
11947       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11948         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11949                                   OutputBecomesInput, Fn);
11950         break;
11951       }
11952     }
11953   }
11954 }
11955 
11956 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11957                                               llvm::Function *Fn) {
11958   ASTContext &C = CGM.getContext();
11959   FD = FD->getMostRecentDecl();
11960   // Map params to their positions in function decl.
11961   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11962   if (isa<CXXMethodDecl>(FD))
11963     ParamPositions.try_emplace(FD, 0);
11964   unsigned ParamPos = ParamPositions.size();
11965   for (const ParmVarDecl *P : FD->parameters()) {
11966     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11967     ++ParamPos;
11968   }
11969   while (FD) {
11970     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11971       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11972       // Mark uniform parameters.
11973       for (const Expr *E : Attr->uniforms()) {
11974         E = E->IgnoreParenImpCasts();
11975         unsigned Pos;
11976         if (isa<CXXThisExpr>(E)) {
11977           Pos = ParamPositions[FD];
11978         } else {
11979           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11980                                 ->getCanonicalDecl();
11981           Pos = ParamPositions[PVD];
11982         }
11983         ParamAttrs[Pos].Kind = Uniform;
11984       }
11985       // Get alignment info.
11986       auto NI = Attr->alignments_begin();
11987       for (const Expr *E : Attr->aligneds()) {
11988         E = E->IgnoreParenImpCasts();
11989         unsigned Pos;
11990         QualType ParmTy;
11991         if (isa<CXXThisExpr>(E)) {
11992           Pos = ParamPositions[FD];
11993           ParmTy = E->getType();
11994         } else {
11995           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11996                                 ->getCanonicalDecl();
11997           Pos = ParamPositions[PVD];
11998           ParmTy = PVD->getType();
11999         }
12000         ParamAttrs[Pos].Alignment =
12001             (*NI)
12002                 ? (*NI)->EvaluateKnownConstInt(C)
12003                 : llvm::APSInt::getUnsigned(
12004                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12005                           .getQuantity());
12006         ++NI;
12007       }
12008       // Mark linear parameters.
12009       auto SI = Attr->steps_begin();
12010       auto MI = Attr->modifiers_begin();
12011       for (const Expr *E : Attr->linears()) {
12012         E = E->IgnoreParenImpCasts();
12013         unsigned Pos;
12014         // Rescaling factor needed to compute the linear parameter
12015         // value in the mangled name.
12016         unsigned PtrRescalingFactor = 1;
12017         if (isa<CXXThisExpr>(E)) {
12018           Pos = ParamPositions[FD];
12019         } else {
12020           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12021                                 ->getCanonicalDecl();
12022           Pos = ParamPositions[PVD];
12023           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12024             PtrRescalingFactor = CGM.getContext()
12025                                      .getTypeSizeInChars(P->getPointeeType())
12026                                      .getQuantity();
12027         }
12028         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12029         ParamAttr.Kind = Linear;
12030         // Assuming a stride of 1, for `linear` without modifiers.
12031         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12032         if (*SI) {
12033           Expr::EvalResult Result;
12034           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12035             if (const auto *DRE =
12036                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12037               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12038                 ParamAttr.Kind = LinearWithVarStride;
12039                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12040                     ParamPositions[StridePVD->getCanonicalDecl()]);
12041               }
12042             }
12043           } else {
12044             ParamAttr.StrideOrArg = Result.Val.getInt();
12045           }
12046         }
12047         // If we are using a linear clause on a pointer, we need to
12048         // rescale the value of linear_step with the byte size of the
12049         // pointee type.
12050         if (Linear == ParamAttr.Kind)
12051           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12052         ++SI;
12053         ++MI;
12054       }
12055       llvm::APSInt VLENVal;
12056       SourceLocation ExprLoc;
12057       const Expr *VLENExpr = Attr->getSimdlen();
12058       if (VLENExpr) {
12059         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12060         ExprLoc = VLENExpr->getExprLoc();
12061       }
12062       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12063       if (CGM.getTriple().isX86()) {
12064         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12065       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12066         unsigned VLEN = VLENVal.getExtValue();
12067         StringRef MangledName = Fn->getName();
12068         if (CGM.getTarget().hasFeature("sve"))
12069           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12070                                          MangledName, 's', 128, Fn, ExprLoc);
12071         if (CGM.getTarget().hasFeature("neon"))
12072           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12073                                          MangledName, 'n', 128, Fn, ExprLoc);
12074       }
12075     }
12076     FD = FD->getPreviousDecl();
12077   }
12078 }
12079 
12080 namespace {
12081 /// Cleanup action for doacross support.
12082 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12083 public:
12084   static const int DoacrossFinArgs = 2;
12085 
12086 private:
12087   llvm::FunctionCallee RTLFn;
12088   llvm::Value *Args[DoacrossFinArgs];
12089 
12090 public:
12091   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12092                     ArrayRef<llvm::Value *> CallArgs)
12093       : RTLFn(RTLFn) {
12094     assert(CallArgs.size() == DoacrossFinArgs);
12095     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12096   }
12097   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12098     if (!CGF.HaveInsertPoint())
12099       return;
12100     CGF.EmitRuntimeCall(RTLFn, Args);
12101   }
12102 };
12103 } // namespace
12104 
12105 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12106                                        const OMPLoopDirective &D,
12107                                        ArrayRef<Expr *> NumIterations) {
12108   if (!CGF.HaveInsertPoint())
12109     return;
12110 
12111   ASTContext &C = CGM.getContext();
12112   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12113   RecordDecl *RD;
12114   if (KmpDimTy.isNull()) {
12115     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12116     //  kmp_int64 lo; // lower
12117     //  kmp_int64 up; // upper
12118     //  kmp_int64 st; // stride
12119     // };
12120     RD = C.buildImplicitRecord("kmp_dim");
12121     RD->startDefinition();
12122     addFieldToRecordDecl(C, RD, Int64Ty);
12123     addFieldToRecordDecl(C, RD, Int64Ty);
12124     addFieldToRecordDecl(C, RD, Int64Ty);
12125     RD->completeDefinition();
12126     KmpDimTy = C.getRecordType(RD);
12127   } else {
12128     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12129   }
12130   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12131   QualType ArrayTy =
12132       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12133 
12134   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12135   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12136   enum { LowerFD = 0, UpperFD, StrideFD };
12137   // Fill dims with data.
12138   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12139     LValue DimsLVal = CGF.MakeAddrLValue(
12140         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12141     // dims.upper = num_iterations;
12142     LValue UpperLVal = CGF.EmitLValueForField(
12143         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12144     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12145         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12146         Int64Ty, NumIterations[I]->getExprLoc());
12147     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12148     // dims.stride = 1;
12149     LValue StrideLVal = CGF.EmitLValueForField(
12150         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12151     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12152                           StrideLVal);
12153   }
12154 
12155   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12156   // kmp_int32 num_dims, struct kmp_dim * dims);
12157   llvm::Value *Args[] = {
12158       emitUpdateLocation(CGF, D.getBeginLoc()),
12159       getThreadID(CGF, D.getBeginLoc()),
12160       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12161       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12162           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12163           CGM.VoidPtrTy)};
12164 
12165   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12166       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12167   CGF.EmitRuntimeCall(RTLFn, Args);
12168   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12169       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12170   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12171       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12172   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12173                                              llvm::makeArrayRef(FiniArgs));
12174 }
12175 
12176 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12177                                           const OMPDependClause *C) {
12178   QualType Int64Ty =
12179       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12180   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12181   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12182       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12183   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12184   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12185     const Expr *CounterVal = C->getLoopData(I);
12186     assert(CounterVal);
12187     llvm::Value *CntVal = CGF.EmitScalarConversion(
12188         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12189         CounterVal->getExprLoc());
12190     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12191                           /*Volatile=*/false, Int64Ty);
12192   }
12193   llvm::Value *Args[] = {
12194       emitUpdateLocation(CGF, C->getBeginLoc()),
12195       getThreadID(CGF, C->getBeginLoc()),
12196       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12197   llvm::FunctionCallee RTLFn;
12198   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12199     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12200                                                   OMPRTL___kmpc_doacross_post);
12201   } else {
12202     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12203     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12204                                                   OMPRTL___kmpc_doacross_wait);
12205   }
12206   CGF.EmitRuntimeCall(RTLFn, Args);
12207 }
12208 
12209 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12210                                llvm::FunctionCallee Callee,
12211                                ArrayRef<llvm::Value *> Args) const {
12212   assert(Loc.isValid() && "Outlined function call location must be valid.");
12213   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12214 
12215   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12216     if (Fn->doesNotThrow()) {
12217       CGF.EmitNounwindRuntimeCall(Fn, Args);
12218       return;
12219     }
12220   }
12221   CGF.EmitRuntimeCall(Callee, Args);
12222 }
12223 
12224 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12225     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12226     ArrayRef<llvm::Value *> Args) const {
12227   emitCall(CGF, Loc, OutlinedFn, Args);
12228 }
12229 
12230 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12231   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12232     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12233       HasEmittedDeclareTargetRegion = true;
12234 }
12235 
12236 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12237                                              const VarDecl *NativeParam,
12238                                              const VarDecl *TargetParam) const {
12239   return CGF.GetAddrOfLocalVar(NativeParam);
12240 }
12241 
12242 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12243                                                    const VarDecl *VD) {
12244   if (!VD)
12245     return Address::invalid();
12246   Address UntiedAddr = Address::invalid();
12247   Address UntiedRealAddr = Address::invalid();
12248   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12249   if (It != FunctionToUntiedTaskStackMap.end()) {
12250     const UntiedLocalVarsAddressesMap &UntiedData =
12251         UntiedLocalVarsStack[It->second];
12252     auto I = UntiedData.find(VD);
12253     if (I != UntiedData.end()) {
12254       UntiedAddr = I->second.first;
12255       UntiedRealAddr = I->second.second;
12256     }
12257   }
12258   const VarDecl *CVD = VD->getCanonicalDecl();
12259   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12260     // Use the default allocation.
12261     if (!isAllocatableDecl(VD))
12262       return UntiedAddr;
12263     llvm::Value *Size;
12264     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12265     if (CVD->getType()->isVariablyModifiedType()) {
12266       Size = CGF.getTypeSize(CVD->getType());
12267       // Align the size: ((size + align - 1) / align) * align
12268       Size = CGF.Builder.CreateNUWAdd(
12269           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12270       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12271       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12272     } else {
12273       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12274       Size = CGM.getSize(Sz.alignTo(Align));
12275     }
12276     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12277     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12278     assert(AA->getAllocator() &&
12279            "Expected allocator expression for non-default allocator.");
12280     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12281     // According to the standard, the original allocator type is a enum
12282     // (integer). Convert to pointer type, if required.
12283     Allocator = CGF.EmitScalarConversion(
12284         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12285         AA->getAllocator()->getExprLoc());
12286     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12287 
12288     llvm::Value *Addr =
12289         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12290                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12291                             Args, getName({CVD->getName(), ".void.addr"}));
12292     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12293         CGM.getModule(), OMPRTL___kmpc_free);
12294     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12295     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12296         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12297     if (UntiedAddr.isValid())
12298       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12299 
12300     // Cleanup action for allocate support.
12301     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12302       llvm::FunctionCallee RTLFn;
12303       SourceLocation::UIntTy LocEncoding;
12304       Address Addr;
12305       const Expr *Allocator;
12306 
12307     public:
12308       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12309                            SourceLocation::UIntTy LocEncoding, Address Addr,
12310                            const Expr *Allocator)
12311           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12312             Allocator(Allocator) {}
12313       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12314         if (!CGF.HaveInsertPoint())
12315           return;
12316         llvm::Value *Args[3];
12317         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12318             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12319         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12320             Addr.getPointer(), CGF.VoidPtrTy);
12321         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12322         // According to the standard, the original allocator type is a enum
12323         // (integer). Convert to pointer type, if required.
12324         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12325                                             CGF.getContext().VoidPtrTy,
12326                                             Allocator->getExprLoc());
12327         Args[2] = AllocVal;
12328 
12329         CGF.EmitRuntimeCall(RTLFn, Args);
12330       }
12331     };
12332     Address VDAddr =
12333         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12334     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12335         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12336         VDAddr, AA->getAllocator());
12337     if (UntiedRealAddr.isValid())
12338       if (auto *Region =
12339               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12340         Region->emitUntiedSwitch(CGF);
12341     return VDAddr;
12342   }
12343   return UntiedAddr;
12344 }
12345 
12346 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12347                                              const VarDecl *VD) const {
12348   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12349   if (It == FunctionToUntiedTaskStackMap.end())
12350     return false;
12351   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12352 }
12353 
12354 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12355     CodeGenModule &CGM, const OMPLoopDirective &S)
12356     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12357   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12358   if (!NeedToPush)
12359     return;
12360   NontemporalDeclsSet &DS =
12361       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12362   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12363     for (const Stmt *Ref : C->private_refs()) {
12364       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12365       const ValueDecl *VD;
12366       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12367         VD = DRE->getDecl();
12368       } else {
12369         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12370         assert((ME->isImplicitCXXThis() ||
12371                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12372                "Expected member of current class.");
12373         VD = ME->getMemberDecl();
12374       }
12375       DS.insert(VD);
12376     }
12377   }
12378 }
12379 
12380 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12381   if (!NeedToPush)
12382     return;
12383   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12384 }
12385 
12386 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12387     CodeGenFunction &CGF,
12388     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12389                           std::pair<Address, Address>> &LocalVars)
12390     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12391   if (!NeedToPush)
12392     return;
12393   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12394       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12395   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12396 }
12397 
12398 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12399   if (!NeedToPush)
12400     return;
12401   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12402 }
12403 
12404 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12405   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12406 
12407   return llvm::any_of(
12408       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12409       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12410 }
12411 
12412 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12413     const OMPExecutableDirective &S,
12414     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12415     const {
12416   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12417   // Vars in target/task regions must be excluded completely.
12418   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12419       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12420     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12421     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12422     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12423     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12424       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12425         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12426     }
12427   }
12428   // Exclude vars in private clauses.
12429   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12430     for (const Expr *Ref : C->varlists()) {
12431       if (!Ref->getType()->isScalarType())
12432         continue;
12433       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12434       if (!DRE)
12435         continue;
12436       NeedToCheckForLPCs.insert(DRE->getDecl());
12437     }
12438   }
12439   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12440     for (const Expr *Ref : C->varlists()) {
12441       if (!Ref->getType()->isScalarType())
12442         continue;
12443       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12444       if (!DRE)
12445         continue;
12446       NeedToCheckForLPCs.insert(DRE->getDecl());
12447     }
12448   }
12449   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12450     for (const Expr *Ref : C->varlists()) {
12451       if (!Ref->getType()->isScalarType())
12452         continue;
12453       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12454       if (!DRE)
12455         continue;
12456       NeedToCheckForLPCs.insert(DRE->getDecl());
12457     }
12458   }
12459   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12460     for (const Expr *Ref : C->varlists()) {
12461       if (!Ref->getType()->isScalarType())
12462         continue;
12463       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12464       if (!DRE)
12465         continue;
12466       NeedToCheckForLPCs.insert(DRE->getDecl());
12467     }
12468   }
12469   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12470     for (const Expr *Ref : C->varlists()) {
12471       if (!Ref->getType()->isScalarType())
12472         continue;
12473       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12474       if (!DRE)
12475         continue;
12476       NeedToCheckForLPCs.insert(DRE->getDecl());
12477     }
12478   }
12479   for (const Decl *VD : NeedToCheckForLPCs) {
12480     for (const LastprivateConditionalData &Data :
12481          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12482       if (Data.DeclToUniqueName.count(VD) > 0) {
12483         if (!Data.Disabled)
12484           NeedToAddForLPCsAsDisabled.insert(VD);
12485         break;
12486       }
12487     }
12488   }
12489 }
12490 
12491 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12492     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12493     : CGM(CGF.CGM),
12494       Action((CGM.getLangOpts().OpenMP >= 50 &&
12495               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12496                            [](const OMPLastprivateClause *C) {
12497                              return C->getKind() ==
12498                                     OMPC_LASTPRIVATE_conditional;
12499                            }))
12500                  ? ActionToDo::PushAsLastprivateConditional
12501                  : ActionToDo::DoNotPush) {
12502   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12503   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12504     return;
12505   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12506          "Expected a push action.");
12507   LastprivateConditionalData &Data =
12508       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12509   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12510     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12511       continue;
12512 
12513     for (const Expr *Ref : C->varlists()) {
12514       Data.DeclToUniqueName.insert(std::make_pair(
12515           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12516           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12517     }
12518   }
12519   Data.IVLVal = IVLVal;
12520   Data.Fn = CGF.CurFn;
12521 }
12522 
12523 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12524     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12525     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12526   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12527   if (CGM.getLangOpts().OpenMP < 50)
12528     return;
12529   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12530   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12531   if (!NeedToAddForLPCsAsDisabled.empty()) {
12532     Action = ActionToDo::DisableLastprivateConditional;
12533     LastprivateConditionalData &Data =
12534         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12535     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12536       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12537     Data.Fn = CGF.CurFn;
12538     Data.Disabled = true;
12539   }
12540 }
12541 
12542 CGOpenMPRuntime::LastprivateConditionalRAII
12543 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12544     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12545   return LastprivateConditionalRAII(CGF, S);
12546 }
12547 
12548 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12549   if (CGM.getLangOpts().OpenMP < 50)
12550     return;
12551   if (Action == ActionToDo::DisableLastprivateConditional) {
12552     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12553            "Expected list of disabled private vars.");
12554     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12555   }
12556   if (Action == ActionToDo::PushAsLastprivateConditional) {
12557     assert(
12558         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12559         "Expected list of lastprivate conditional vars.");
12560     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12561   }
12562 }
12563 
12564 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12565                                                         const VarDecl *VD) {
12566   ASTContext &C = CGM.getContext();
12567   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12568   if (I == LastprivateConditionalToTypes.end())
12569     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12570   QualType NewType;
12571   const FieldDecl *VDField;
12572   const FieldDecl *FiredField;
12573   LValue BaseLVal;
12574   auto VI = I->getSecond().find(VD);
12575   if (VI == I->getSecond().end()) {
12576     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12577     RD->startDefinition();
12578     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12579     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12580     RD->completeDefinition();
12581     NewType = C.getRecordType(RD);
12582     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12583     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12584     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12585   } else {
12586     NewType = std::get<0>(VI->getSecond());
12587     VDField = std::get<1>(VI->getSecond());
12588     FiredField = std::get<2>(VI->getSecond());
12589     BaseLVal = std::get<3>(VI->getSecond());
12590   }
12591   LValue FiredLVal =
12592       CGF.EmitLValueForField(BaseLVal, FiredField);
12593   CGF.EmitStoreOfScalar(
12594       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12595       FiredLVal);
12596   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12597 }
12598 
12599 namespace {
12600 /// Checks if the lastprivate conditional variable is referenced in LHS.
12601 class LastprivateConditionalRefChecker final
12602     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12603   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12604   const Expr *FoundE = nullptr;
12605   const Decl *FoundD = nullptr;
12606   StringRef UniqueDeclName;
12607   LValue IVLVal;
12608   llvm::Function *FoundFn = nullptr;
12609   SourceLocation Loc;
12610 
12611 public:
12612   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12613     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12614          llvm::reverse(LPM)) {
12615       auto It = D.DeclToUniqueName.find(E->getDecl());
12616       if (It == D.DeclToUniqueName.end())
12617         continue;
12618       if (D.Disabled)
12619         return false;
12620       FoundE = E;
12621       FoundD = E->getDecl()->getCanonicalDecl();
12622       UniqueDeclName = It->second;
12623       IVLVal = D.IVLVal;
12624       FoundFn = D.Fn;
12625       break;
12626     }
12627     return FoundE == E;
12628   }
12629   bool VisitMemberExpr(const MemberExpr *E) {
12630     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12631       return false;
12632     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12633          llvm::reverse(LPM)) {
12634       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12635       if (It == D.DeclToUniqueName.end())
12636         continue;
12637       if (D.Disabled)
12638         return false;
12639       FoundE = E;
12640       FoundD = E->getMemberDecl()->getCanonicalDecl();
12641       UniqueDeclName = It->second;
12642       IVLVal = D.IVLVal;
12643       FoundFn = D.Fn;
12644       break;
12645     }
12646     return FoundE == E;
12647   }
12648   bool VisitStmt(const Stmt *S) {
12649     for (const Stmt *Child : S->children()) {
12650       if (!Child)
12651         continue;
12652       if (const auto *E = dyn_cast<Expr>(Child))
12653         if (!E->isGLValue())
12654           continue;
12655       if (Visit(Child))
12656         return true;
12657     }
12658     return false;
12659   }
12660   explicit LastprivateConditionalRefChecker(
12661       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12662       : LPM(LPM) {}
12663   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12664   getFoundData() const {
12665     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12666   }
12667 };
12668 } // namespace
12669 
12670 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12671                                                        LValue IVLVal,
12672                                                        StringRef UniqueDeclName,
12673                                                        LValue LVal,
12674                                                        SourceLocation Loc) {
12675   // Last updated loop counter for the lastprivate conditional var.
12676   // int<xx> last_iv = 0;
12677   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12678   llvm::Constant *LastIV =
12679       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12680   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12681       IVLVal.getAlignment().getAsAlign());
12682   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12683 
12684   // Last value of the lastprivate conditional.
12685   // decltype(priv_a) last_a;
12686   llvm::Constant *Last = getOrCreateInternalVariable(
12687       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12688   cast<llvm::GlobalVariable>(Last)->setAlignment(
12689       LVal.getAlignment().getAsAlign());
12690   LValue LastLVal =
12691       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12692 
12693   // Global loop counter. Required to handle inner parallel-for regions.
12694   // iv
12695   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12696 
12697   // #pragma omp critical(a)
12698   // if (last_iv <= iv) {
12699   //   last_iv = iv;
12700   //   last_a = priv_a;
12701   // }
12702   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12703                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12704     Action.Enter(CGF);
12705     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12706     // (last_iv <= iv) ? Check if the variable is updated and store new
12707     // value in global var.
12708     llvm::Value *CmpRes;
12709     if (IVLVal.getType()->isSignedIntegerType()) {
12710       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12711     } else {
12712       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12713              "Loop iteration variable must be integer.");
12714       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12715     }
12716     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12717     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12718     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12719     // {
12720     CGF.EmitBlock(ThenBB);
12721 
12722     //   last_iv = iv;
12723     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12724 
12725     //   last_a = priv_a;
12726     switch (CGF.getEvaluationKind(LVal.getType())) {
12727     case TEK_Scalar: {
12728       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12729       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12730       break;
12731     }
12732     case TEK_Complex: {
12733       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12734       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12735       break;
12736     }
12737     case TEK_Aggregate:
12738       llvm_unreachable(
12739           "Aggregates are not supported in lastprivate conditional.");
12740     }
12741     // }
12742     CGF.EmitBranch(ExitBB);
12743     // There is no need to emit line number for unconditional branch.
12744     (void)ApplyDebugLocation::CreateEmpty(CGF);
12745     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12746   };
12747 
12748   if (CGM.getLangOpts().OpenMPSimd) {
12749     // Do not emit as a critical region as no parallel region could be emitted.
12750     RegionCodeGenTy ThenRCG(CodeGen);
12751     ThenRCG(CGF);
12752   } else {
12753     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12754   }
12755 }
12756 
12757 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12758                                                          const Expr *LHS) {
12759   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12760     return;
12761   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12762   if (!Checker.Visit(LHS))
12763     return;
12764   const Expr *FoundE;
12765   const Decl *FoundD;
12766   StringRef UniqueDeclName;
12767   LValue IVLVal;
12768   llvm::Function *FoundFn;
12769   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12770       Checker.getFoundData();
12771   if (FoundFn != CGF.CurFn) {
12772     // Special codegen for inner parallel regions.
12773     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12774     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12775     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12776            "Lastprivate conditional is not found in outer region.");
12777     QualType StructTy = std::get<0>(It->getSecond());
12778     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12779     LValue PrivLVal = CGF.EmitLValue(FoundE);
12780     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12781         PrivLVal.getAddress(CGF),
12782         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12783     LValue BaseLVal =
12784         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12785     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12786     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12787                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12788                         FiredLVal, llvm::AtomicOrdering::Unordered,
12789                         /*IsVolatile=*/true, /*isInit=*/false);
12790     return;
12791   }
12792 
12793   // Private address of the lastprivate conditional in the current context.
12794   // priv_a
12795   LValue LVal = CGF.EmitLValue(FoundE);
12796   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12797                                    FoundE->getExprLoc());
12798 }
12799 
12800 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12801     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12802     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12803   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12804     return;
12805   auto Range = llvm::reverse(LastprivateConditionalStack);
12806   auto It = llvm::find_if(
12807       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12808   if (It == Range.end() || It->Fn != CGF.CurFn)
12809     return;
12810   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12811   assert(LPCI != LastprivateConditionalToTypes.end() &&
12812          "Lastprivates must be registered already.");
12813   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12814   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12815   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12816   for (const auto &Pair : It->DeclToUniqueName) {
12817     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12818     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12819       continue;
12820     auto I = LPCI->getSecond().find(Pair.first);
12821     assert(I != LPCI->getSecond().end() &&
12822            "Lastprivate must be rehistered already.");
12823     // bool Cmp = priv_a.Fired != 0;
12824     LValue BaseLVal = std::get<3>(I->getSecond());
12825     LValue FiredLVal =
12826         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12827     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12828     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12829     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12830     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12831     // if (Cmp) {
12832     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12833     CGF.EmitBlock(ThenBB);
12834     Address Addr = CGF.GetAddrOfLocalVar(VD);
12835     LValue LVal;
12836     if (VD->getType()->isReferenceType())
12837       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12838                                            AlignmentSource::Decl);
12839     else
12840       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12841                                 AlignmentSource::Decl);
12842     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12843                                      D.getBeginLoc());
12844     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12845     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12846     // }
12847   }
12848 }
12849 
12850 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12851     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12852     SourceLocation Loc) {
12853   if (CGF.getLangOpts().OpenMP < 50)
12854     return;
12855   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12856   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12857          "Unknown lastprivate conditional variable.");
12858   StringRef UniqueName = It->second;
12859   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12860   // The variable was not updated in the region - exit.
12861   if (!GV)
12862     return;
12863   LValue LPLVal = CGF.MakeAddrLValue(
12864       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12865   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12866   CGF.EmitStoreOfScalar(Res, PrivLVal);
12867 }
12868 
12869 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12870     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12871     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12872   llvm_unreachable("Not supported in SIMD-only mode");
12873 }
12874 
12875 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12876     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12877     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12878   llvm_unreachable("Not supported in SIMD-only mode");
12879 }
12880 
12881 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12882     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12883     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12884     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12885     bool Tied, unsigned &NumberOfParts) {
12886   llvm_unreachable("Not supported in SIMD-only mode");
12887 }
12888 
12889 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12890                                            SourceLocation Loc,
12891                                            llvm::Function *OutlinedFn,
12892                                            ArrayRef<llvm::Value *> CapturedVars,
12893                                            const Expr *IfCond) {
12894   llvm_unreachable("Not supported in SIMD-only mode");
12895 }
12896 
12897 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12898     CodeGenFunction &CGF, StringRef CriticalName,
12899     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12900     const Expr *Hint) {
12901   llvm_unreachable("Not supported in SIMD-only mode");
12902 }
12903 
12904 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12905                                            const RegionCodeGenTy &MasterOpGen,
12906                                            SourceLocation Loc) {
12907   llvm_unreachable("Not supported in SIMD-only mode");
12908 }
12909 
12910 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12911                                            const RegionCodeGenTy &MasterOpGen,
12912                                            SourceLocation Loc,
12913                                            const Expr *Filter) {
12914   llvm_unreachable("Not supported in SIMD-only mode");
12915 }
12916 
12917 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12918                                             SourceLocation Loc) {
12919   llvm_unreachable("Not supported in SIMD-only mode");
12920 }
12921 
12922 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12923     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12924     SourceLocation Loc) {
12925   llvm_unreachable("Not supported in SIMD-only mode");
12926 }
12927 
12928 void CGOpenMPSIMDRuntime::emitSingleRegion(
12929     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12930     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12931     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12932     ArrayRef<const Expr *> AssignmentOps) {
12933   llvm_unreachable("Not supported in SIMD-only mode");
12934 }
12935 
12936 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12937                                             const RegionCodeGenTy &OrderedOpGen,
12938                                             SourceLocation Loc,
12939                                             bool IsThreads) {
12940   llvm_unreachable("Not supported in SIMD-only mode");
12941 }
12942 
12943 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12944                                           SourceLocation Loc,
12945                                           OpenMPDirectiveKind Kind,
12946                                           bool EmitChecks,
12947                                           bool ForceSimpleCall) {
12948   llvm_unreachable("Not supported in SIMD-only mode");
12949 }
12950 
12951 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12952     CodeGenFunction &CGF, SourceLocation Loc,
12953     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12954     bool Ordered, const DispatchRTInput &DispatchValues) {
12955   llvm_unreachable("Not supported in SIMD-only mode");
12956 }
12957 
12958 void CGOpenMPSIMDRuntime::emitForStaticInit(
12959     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12960     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12961   llvm_unreachable("Not supported in SIMD-only mode");
12962 }
12963 
12964 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12965     CodeGenFunction &CGF, SourceLocation Loc,
12966     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12967   llvm_unreachable("Not supported in SIMD-only mode");
12968 }
12969 
12970 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12971                                                      SourceLocation Loc,
12972                                                      unsigned IVSize,
12973                                                      bool IVSigned) {
12974   llvm_unreachable("Not supported in SIMD-only mode");
12975 }
12976 
12977 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12978                                               SourceLocation Loc,
12979                                               OpenMPDirectiveKind DKind) {
12980   llvm_unreachable("Not supported in SIMD-only mode");
12981 }
12982 
12983 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12984                                               SourceLocation Loc,
12985                                               unsigned IVSize, bool IVSigned,
12986                                               Address IL, Address LB,
12987                                               Address UB, Address ST) {
12988   llvm_unreachable("Not supported in SIMD-only mode");
12989 }
12990 
12991 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12992                                                llvm::Value *NumThreads,
12993                                                SourceLocation Loc) {
12994   llvm_unreachable("Not supported in SIMD-only mode");
12995 }
12996 
12997 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12998                                              ProcBindKind ProcBind,
12999                                              SourceLocation Loc) {
13000   llvm_unreachable("Not supported in SIMD-only mode");
13001 }
13002 
13003 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13004                                                     const VarDecl *VD,
13005                                                     Address VDAddr,
13006                                                     SourceLocation Loc) {
13007   llvm_unreachable("Not supported in SIMD-only mode");
13008 }
13009 
13010 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13011     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13012     CodeGenFunction *CGF) {
13013   llvm_unreachable("Not supported in SIMD-only mode");
13014 }
13015 
13016 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13017     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13018   llvm_unreachable("Not supported in SIMD-only mode");
13019 }
13020 
13021 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13022                                     ArrayRef<const Expr *> Vars,
13023                                     SourceLocation Loc,
13024                                     llvm::AtomicOrdering AO) {
13025   llvm_unreachable("Not supported in SIMD-only mode");
13026 }
13027 
13028 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13029                                        const OMPExecutableDirective &D,
13030                                        llvm::Function *TaskFunction,
13031                                        QualType SharedsTy, Address Shareds,
13032                                        const Expr *IfCond,
13033                                        const OMPTaskDataTy &Data) {
13034   llvm_unreachable("Not supported in SIMD-only mode");
13035 }
13036 
13037 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13038     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13039     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13040     const Expr *IfCond, const OMPTaskDataTy &Data) {
13041   llvm_unreachable("Not supported in SIMD-only mode");
13042 }
13043 
13044 void CGOpenMPSIMDRuntime::emitReduction(
13045     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13046     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13047     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13048   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13049   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13050                                  ReductionOps, Options);
13051 }
13052 
13053 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13054     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13055     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13056   llvm_unreachable("Not supported in SIMD-only mode");
13057 }
13058 
13059 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13060                                                 SourceLocation Loc,
13061                                                 bool IsWorksharingReduction) {
13062   llvm_unreachable("Not supported in SIMD-only mode");
13063 }
13064 
13065 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13066                                                   SourceLocation Loc,
13067                                                   ReductionCodeGen &RCG,
13068                                                   unsigned N) {
13069   llvm_unreachable("Not supported in SIMD-only mode");
13070 }
13071 
13072 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13073                                                   SourceLocation Loc,
13074                                                   llvm::Value *ReductionsPtr,
13075                                                   LValue SharedLVal) {
13076   llvm_unreachable("Not supported in SIMD-only mode");
13077 }
13078 
13079 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13080                                            SourceLocation Loc,
13081                                            const OMPTaskDataTy &Data) {
13082   llvm_unreachable("Not supported in SIMD-only mode");
13083 }
13084 
13085 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13086     CodeGenFunction &CGF, SourceLocation Loc,
13087     OpenMPDirectiveKind CancelRegion) {
13088   llvm_unreachable("Not supported in SIMD-only mode");
13089 }
13090 
13091 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13092                                          SourceLocation Loc, const Expr *IfCond,
13093                                          OpenMPDirectiveKind CancelRegion) {
13094   llvm_unreachable("Not supported in SIMD-only mode");
13095 }
13096 
13097 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13098     const OMPExecutableDirective &D, StringRef ParentName,
13099     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13100     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13101   llvm_unreachable("Not supported in SIMD-only mode");
13102 }
13103 
13104 void CGOpenMPSIMDRuntime::emitTargetCall(
13105     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13106     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13107     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13108     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13109                                      const OMPLoopDirective &D)>
13110         SizeEmitter) {
13111   llvm_unreachable("Not supported in SIMD-only mode");
13112 }
13113 
13114 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13115   llvm_unreachable("Not supported in SIMD-only mode");
13116 }
13117 
13118 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13119   llvm_unreachable("Not supported in SIMD-only mode");
13120 }
13121 
13122 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13123   return false;
13124 }
13125 
13126 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13127                                         const OMPExecutableDirective &D,
13128                                         SourceLocation Loc,
13129                                         llvm::Function *OutlinedFn,
13130                                         ArrayRef<llvm::Value *> CapturedVars) {
13131   llvm_unreachable("Not supported in SIMD-only mode");
13132 }
13133 
13134 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13135                                              const Expr *NumTeams,
13136                                              const Expr *ThreadLimit,
13137                                              SourceLocation Loc) {
13138   llvm_unreachable("Not supported in SIMD-only mode");
13139 }
13140 
13141 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13142     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13143     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13144   llvm_unreachable("Not supported in SIMD-only mode");
13145 }
13146 
13147 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13148     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13149     const Expr *Device) {
13150   llvm_unreachable("Not supported in SIMD-only mode");
13151 }
13152 
13153 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13154                                            const OMPLoopDirective &D,
13155                                            ArrayRef<Expr *> NumIterations) {
13156   llvm_unreachable("Not supported in SIMD-only mode");
13157 }
13158 
13159 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13160                                               const OMPDependClause *C) {
13161   llvm_unreachable("Not supported in SIMD-only mode");
13162 }
13163 
13164 const VarDecl *
13165 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13166                                         const VarDecl *NativeParam) const {
13167   llvm_unreachable("Not supported in SIMD-only mode");
13168 }
13169 
13170 Address
13171 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13172                                          const VarDecl *NativeParam,
13173                                          const VarDecl *TargetParam) const {
13174   llvm_unreachable("Not supported in SIMD-only mode");
13175 }
13176