1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/Format.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <numeric>
43 
44 using namespace clang;
45 using namespace CodeGen;
46 using namespace llvm::omp;
47 
48 namespace {
49 /// Base class for handling code generation inside OpenMP regions.
50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51 public:
52   /// Kinds of OpenMP regions used in codegen.
53   enum CGOpenMPRegionKind {
54     /// Region with outlined function for standalone 'parallel'
55     /// directive.
56     ParallelOutlinedRegion,
57     /// Region with outlined function for standalone 'task' directive.
58     TaskOutlinedRegion,
59     /// Region for constructs that do not require function outlining,
60     /// like 'for', 'sections', 'atomic' etc. directives.
61     InlinedRegion,
62     /// Region with outlined function for standalone 'target' directive.
63     TargetRegion,
64   };
65 
66   CGOpenMPRegionInfo(const CapturedStmt &CS,
67                      const CGOpenMPRegionKind RegionKind,
68                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69                      bool HasCancel)
70       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72 
73   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75                      bool HasCancel)
76       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77         Kind(Kind), HasCancel(HasCancel) {}
78 
79   /// Get a variable or parameter for storing global thread id
80   /// inside OpenMP construct.
81   virtual const VarDecl *getThreadIDVariable() const = 0;
82 
83   /// Emit the captured statement body.
84   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85 
86   /// Get an LValue for the current ThreadID variable.
87   /// \return LValue for thread id variable. This LValue always has type int32*.
88   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89 
90   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91 
92   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93 
94   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95 
96   bool hasCancel() const { return HasCancel; }
97 
98   static bool classof(const CGCapturedStmtInfo *Info) {
99     return Info->getKind() == CR_OpenMP;
100   }
101 
102   ~CGOpenMPRegionInfo() override = default;
103 
104 protected:
105   CGOpenMPRegionKind RegionKind;
106   RegionCodeGenTy CodeGen;
107   OpenMPDirectiveKind Kind;
108   bool HasCancel;
109 };
110 
111 /// API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115                              const RegionCodeGenTy &CodeGen,
116                              OpenMPDirectiveKind Kind, bool HasCancel,
117                              StringRef HelperName)
118       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119                            HasCancel),
120         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
122   }
123 
124   /// Get a variable or parameter for storing global thread id
125   /// inside OpenMP construct.
126   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127 
128   /// Get the name of the capture helper.
129   StringRef getHelperName() const override { return HelperName; }
130 
131   static bool classof(const CGCapturedStmtInfo *Info) {
132     return CGOpenMPRegionInfo::classof(Info) &&
133            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134                ParallelOutlinedRegion;
135   }
136 
137 private:
138   /// A variable or parameter storing global thread id for OpenMP
139   /// constructs.
140   const VarDecl *ThreadIDVar;
141   StringRef HelperName;
142 };
143 
144 /// API for captured statement code generation in OpenMP constructs.
145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146 public:
147   class UntiedTaskActionTy final : public PrePostActionTy {
148     bool Untied;
149     const VarDecl *PartIDVar;
150     const RegionCodeGenTy UntiedCodeGen;
151     llvm::SwitchInst *UntiedSwitch = nullptr;
152 
153   public:
154     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155                        const RegionCodeGenTy &UntiedCodeGen)
156         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157     void Enter(CodeGenFunction &CGF) override {
158       if (Untied) {
159         // Emit task switching point.
160         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         llvm::Value *Res =
164             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167         CGF.EmitBlock(DoneBB);
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171                               CGF.Builder.GetInsertBlock());
172         emitUntiedSwitch(CGF);
173       }
174     }
175     void emitUntiedSwitch(CodeGenFunction &CGF) const {
176       if (Untied) {
177         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178             CGF.GetAddrOfLocalVar(PartIDVar),
179             PartIDVar->getType()->castAs<PointerType>());
180         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181                               PartIdLVal);
182         UntiedCodeGen(CGF);
183         CodeGenFunction::JumpDest CurPoint =
184             CGF.getJumpDestInCurrentScope(".untied.next.");
185         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188                               CGF.Builder.GetInsertBlock());
189         CGF.EmitBranchThroughCleanup(CurPoint);
190         CGF.EmitBlock(CurPoint.getBlock());
191       }
192     }
193     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194   };
195   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196                                  const VarDecl *ThreadIDVar,
197                                  const RegionCodeGenTy &CodeGen,
198                                  OpenMPDirectiveKind Kind, bool HasCancel,
199                                  const UntiedTaskActionTy &Action)
200       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201         ThreadIDVar(ThreadIDVar), Action(Action) {
202     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
203   }
204 
205   /// Get a variable or parameter for storing global thread id
206   /// inside OpenMP construct.
207   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208 
209   /// Get an LValue for the current ThreadID variable.
210   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211 
212   /// Get the name of the capture helper.
213   StringRef getHelperName() const override { return ".omp_outlined."; }
214 
215   void emitUntiedSwitch(CodeGenFunction &CGF) override {
216     Action.emitUntiedSwitch(CGF);
217   }
218 
219   static bool classof(const CGCapturedStmtInfo *Info) {
220     return CGOpenMPRegionInfo::classof(Info) &&
221            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222                TaskOutlinedRegion;
223   }
224 
225 private:
226   /// A variable or parameter storing global thread id for OpenMP
227   /// constructs.
228   const VarDecl *ThreadIDVar;
229   /// Action for emitting code for untied tasks.
230   const UntiedTaskActionTy &Action;
231 };
232 
233 /// API for inlined captured statement code generation in OpenMP
234 /// constructs.
235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236 public:
237   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238                             const RegionCodeGenTy &CodeGen,
239                             OpenMPDirectiveKind Kind, bool HasCancel)
240       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241         OldCSI(OldCSI),
242         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243 
244   // Retrieve the value of the context parameter.
245   llvm::Value *getContextValue() const override {
246     if (OuterRegionInfo)
247       return OuterRegionInfo->getContextValue();
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   void setContextValue(llvm::Value *V) override {
252     if (OuterRegionInfo) {
253       OuterRegionInfo->setContextValue(V);
254       return;
255     }
256     llvm_unreachable("No context value for inlined OpenMP region");
257   }
258 
259   /// Lookup the captured field decl for a variable.
260   const FieldDecl *lookup(const VarDecl *VD) const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->lookup(VD);
263     // If there is no outer outlined region,no need to lookup in a list of
264     // captured variables, we can use the original one.
265     return nullptr;
266   }
267 
268   FieldDecl *getThisFieldDecl() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThisFieldDecl();
271     return nullptr;
272   }
273 
274   /// Get a variable or parameter for storing global thread id
275   /// inside OpenMP construct.
276   const VarDecl *getThreadIDVariable() const override {
277     if (OuterRegionInfo)
278       return OuterRegionInfo->getThreadIDVariable();
279     return nullptr;
280   }
281 
282   /// Get an LValue for the current ThreadID variable.
283   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284     if (OuterRegionInfo)
285       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286     llvm_unreachable("No LValue for inlined OpenMP construct");
287   }
288 
289   /// Get the name of the capture helper.
290   StringRef getHelperName() const override {
291     if (auto *OuterRegionInfo = getOldCSI())
292       return OuterRegionInfo->getHelperName();
293     llvm_unreachable("No helper name for inlined OpenMP construct");
294   }
295 
296   void emitUntiedSwitch(CodeGenFunction &CGF) override {
297     if (OuterRegionInfo)
298       OuterRegionInfo->emitUntiedSwitch(CGF);
299   }
300 
301   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302 
303   static bool classof(const CGCapturedStmtInfo *Info) {
304     return CGOpenMPRegionInfo::classof(Info) &&
305            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306   }
307 
308   ~CGOpenMPInlinedRegionInfo() override = default;
309 
310 private:
311   /// CodeGen info about outer OpenMP region.
312   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313   CGOpenMPRegionInfo *OuterRegionInfo;
314 };
315 
316 /// API for captured statement code generation in OpenMP target
317 /// constructs. For this captures, implicit parameters are used instead of the
318 /// captured fields. The name of the target region has to be unique in a given
319 /// application so it is provided by the client, because only the client has
320 /// the information to generate that.
321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322 public:
323   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
325       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326                            /*HasCancel=*/false),
327         HelperName(HelperName) {}
328 
329   /// This is unused for target regions because each starts executing
330   /// with a single thread.
331   const VarDecl *getThreadIDVariable() const override { return nullptr; }
332 
333   /// Get the name of the capture helper.
334   StringRef getHelperName() const override { return HelperName; }
335 
336   static bool classof(const CGCapturedStmtInfo *Info) {
337     return CGOpenMPRegionInfo::classof(Info) &&
338            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339   }
340 
341 private:
342   StringRef HelperName;
343 };
344 
345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346   llvm_unreachable("No codegen for expressions");
347 }
348 /// API for generation of expressions captured in a innermost OpenMP
349 /// region.
350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351 public:
352   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354                                   OMPD_unknown,
355                                   /*HasCancel=*/false),
356         PrivScope(CGF) {
357     // Make sure the globals captured in the provided statement are local by
358     // using the privatization logic. We assume the same variable is not
359     // captured more than once.
360     for (const auto &C : CS.captures()) {
361       if (!C.capturesVariable() && !C.capturesVariableByCopy())
362         continue;
363 
364       const VarDecl *VD = C.getCapturedVar();
365       if (VD->isLocalVarDeclOrParm())
366         continue;
367 
368       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369                       /*RefersToEnclosingVariableOrCapture=*/false,
370                       VD->getType().getNonReferenceType(), VK_LValue,
371                       C.getLocation());
372       PrivScope.addPrivate(
373           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 namespace {
481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
482 /// Values for bit flags for marking which requires clauses have been used.
483 enum OpenMPOffloadingRequiresDirFlags : int64_t {
484   /// flag undefined.
485   OMP_REQ_UNDEFINED               = 0x000,
486   /// no requires clause present.
487   OMP_REQ_NONE                    = 0x001,
488   /// reverse_offload clause.
489   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
490   /// unified_address clause.
491   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
492   /// unified_shared_memory clause.
493   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
494   /// dynamic_allocators clause.
495   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
496   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
497 };
498 
499 enum OpenMPOffloadingReservedDeviceIDs {
500   /// Device ID if the device was not defined, runtime should get it
501   /// from environment variables in the spec.
502   OMP_DEVICEID_UNDEF = -1,
503 };
504 } // anonymous namespace
505 
506 /// Describes ident structure that describes a source location.
507 /// All descriptions are taken from
508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509 /// Original structure:
510 /// typedef struct ident {
511 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
512 ///                                  see above  */
513 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
514 ///                                  KMP_IDENT_KMPC identifies this union
515 ///                                  member  */
516 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
517 ///                                  see above */
518 ///#if USE_ITT_BUILD
519 ///                            /*  but currently used for storing
520 ///                                region-specific ITT */
521 ///                            /*  contextual information. */
522 ///#endif /* USE_ITT_BUILD */
523 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
524 ///                                 C++  */
525 ///    char const *psource;    /**< String describing the source location.
526 ///                            The string is composed of semi-colon separated
527 //                             fields which describe the source file,
528 ///                            the function and a pair of line numbers that
529 ///                            delimit the construct.
530 ///                             */
531 /// } ident_t;
532 enum IdentFieldIndex {
533   /// might be used in Fortran
534   IdentField_Reserved_1,
535   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536   IdentField_Flags,
537   /// Not really used in Fortran any more
538   IdentField_Reserved_2,
539   /// Source[4] in Fortran, do not use for C++
540   IdentField_Reserved_3,
541   /// String describing the source location. The string is composed of
542   /// semi-colon separated fields which describe the source file, the function
543   /// and a pair of line numbers that delimit the construct.
544   IdentField_PSource
545 };
546 
547 /// Schedule types for 'omp for' loops (these enumerators are taken from
548 /// the enum sched_type in kmp.h).
549 enum OpenMPSchedType {
550   /// Lower bound for default (unordered) versions.
551   OMP_sch_lower = 32,
552   OMP_sch_static_chunked = 33,
553   OMP_sch_static = 34,
554   OMP_sch_dynamic_chunked = 35,
555   OMP_sch_guided_chunked = 36,
556   OMP_sch_runtime = 37,
557   OMP_sch_auto = 38,
558   /// static with chunk adjustment (e.g., simd)
559   OMP_sch_static_balanced_chunked = 45,
560   /// Lower bound for 'ordered' versions.
561   OMP_ord_lower = 64,
562   OMP_ord_static_chunked = 65,
563   OMP_ord_static = 66,
564   OMP_ord_dynamic_chunked = 67,
565   OMP_ord_guided_chunked = 68,
566   OMP_ord_runtime = 69,
567   OMP_ord_auto = 70,
568   OMP_sch_default = OMP_sch_static,
569   /// dist_schedule types
570   OMP_dist_sch_static_chunked = 91,
571   OMP_dist_sch_static = 92,
572   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573   /// Set if the monotonic schedule modifier was present.
574   OMP_sch_modifier_monotonic = (1 << 29),
575   /// Set if the nonmonotonic schedule modifier was present.
576   OMP_sch_modifier_nonmonotonic = (1 << 30),
577 };
578 
579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580 /// region.
581 class CleanupTy final : public EHScopeStack::Cleanup {
582   PrePostActionTy *Action;
583 
584 public:
585   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587     if (!CGF.HaveInsertPoint())
588       return;
589     Action->Exit(CGF);
590   }
591 };
592 
593 } // anonymous namespace
594 
595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596   CodeGenFunction::RunCleanupsScope Scope(CGF);
597   if (PrePostAction) {
598     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599     Callback(CodeGen, CGF, *PrePostAction);
600   } else {
601     PrePostActionTy Action;
602     Callback(CodeGen, CGF, Action);
603   }
604 }
605 
606 /// Check if the combiner is a call to UDR combiner and if it is so return the
607 /// UDR decl used for reduction.
608 static const OMPDeclareReductionDecl *
609 getReductionInit(const Expr *ReductionOp) {
610   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612       if (const auto *DRE =
613               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615           return DRD;
616   return nullptr;
617 }
618 
619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620                                              const OMPDeclareReductionDecl *DRD,
621                                              const Expr *InitOp,
622                                              Address Private, Address Original,
623                                              QualType Ty) {
624   if (DRD->getInitializer()) {
625     std::pair<llvm::Function *, llvm::Function *> Reduction =
626         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627     const auto *CE = cast<CallExpr>(InitOp);
628     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631     const auto *LHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633     const auto *RHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637                             [=]() { return Private; });
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639                             [=]() { return Original; });
640     (void)PrivateScope.Privatize();
641     RValue Func = RValue::get(Reduction.second);
642     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643     CGF.EmitIgnoredExpr(InitOp);
644   } else {
645     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647     auto *GV = new llvm::GlobalVariable(
648         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649         llvm::GlobalValue::PrivateLinkage, Init, Name);
650     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651     RValue InitRVal;
652     switch (CGF.getEvaluationKind(Ty)) {
653     case TEK_Scalar:
654       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655       break;
656     case TEK_Complex:
657       InitRVal =
658           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659       break;
660     case TEK_Aggregate: {
661       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664                            /*IsInitializer=*/false);
665       return;
666     }
667     }
668     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671                          /*IsInitializer=*/false);
672   }
673 }
674 
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681                                  QualType Type, bool EmitDeclareReductionInit,
682                                  const Expr *Init,
683                                  const OMPDeclareReductionDecl *DRD,
684                                  Address SrcAddr = Address::invalid()) {
685   // Perform element-by-element initialization.
686   QualType ElementTy;
687 
688   // Drill down to the base element type on both arrays.
689   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691   DestAddr =
692       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
693   if (DRD)
694     SrcAddr =
695         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
696 
697   llvm::Value *SrcBegin = nullptr;
698   if (DRD)
699     SrcBegin = SrcAddr.getPointer();
700   llvm::Value *DestBegin = DestAddr.getPointer();
701   // Cast from pointer to array type to pointer to single element.
702   llvm::Value *DestEnd =
703       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
704   // The basic structure here is a while-do loop.
705   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
706   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
707   llvm::Value *IsEmpty =
708       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
709   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
710 
711   // Enter the loop body, making that address the current address.
712   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
713   CGF.EmitBlock(BodyBB);
714 
715   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
716 
717   llvm::PHINode *SrcElementPHI = nullptr;
718   Address SrcElementCurrent = Address::invalid();
719   if (DRD) {
720     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
721                                           "omp.arraycpy.srcElementPast");
722     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
723     SrcElementCurrent =
724         Address(SrcElementPHI,
725                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726   }
727   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
728       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
729   DestElementPHI->addIncoming(DestBegin, EntryBB);
730   Address DestElementCurrent =
731       Address(DestElementPHI,
732               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
733 
734   // Emit copy.
735   {
736     CodeGenFunction::RunCleanupsScope InitScope(CGF);
737     if (EmitDeclareReductionInit) {
738       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
739                                        SrcElementCurrent, ElementTy);
740     } else
741       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
742                            /*IsInitializer=*/false);
743   }
744 
745   if (DRD) {
746     // Shift the address forward by one element.
747     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
748         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
749         "omp.arraycpy.dest.element");
750     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
751   }
752 
753   // Shift the address forward by one element.
754   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
755       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
756       "omp.arraycpy.dest.element");
757   // Check whether we've reached the end.
758   llvm::Value *Done =
759       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
760   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
761   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
762 
763   // Done.
764   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
765 }
766 
767 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
768   return CGF.EmitOMPSharedLValue(E);
769 }
770 
771 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
772                                             const Expr *E) {
773   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
774     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
775   return LValue();
776 }
777 
778 void ReductionCodeGen::emitAggregateInitialization(
779     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
780     const OMPDeclareReductionDecl *DRD) {
781   // Emit VarDecl with copy init for arrays.
782   // Get the address of the original variable captured in current
783   // captured region.
784   const auto *PrivateVD =
785       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
786   bool EmitDeclareReductionInit =
787       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
788   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
789                        EmitDeclareReductionInit,
790                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
791                                                 : PrivateVD->getInit(),
792                        DRD, SharedLVal.getAddress(CGF));
793 }
794 
795 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
796                                    ArrayRef<const Expr *> Origs,
797                                    ArrayRef<const Expr *> Privates,
798                                    ArrayRef<const Expr *> ReductionOps) {
799   ClausesData.reserve(Shareds.size());
800   SharedAddresses.reserve(Shareds.size());
801   Sizes.reserve(Shareds.size());
802   BaseDecls.reserve(Shareds.size());
803   const auto *IOrig = Origs.begin();
804   const auto *IPriv = Privates.begin();
805   const auto *IRed = ReductionOps.begin();
806   for (const Expr *Ref : Shareds) {
807     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
808     std::advance(IOrig, 1);
809     std::advance(IPriv, 1);
810     std::advance(IRed, 1);
811   }
812 }
813 
814 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
815   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
816          "Number of generated lvalues must be exactly N.");
817   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
818   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
819   SharedAddresses.emplace_back(First, Second);
820   if (ClausesData[N].Shared == ClausesData[N].Ref) {
821     OrigAddresses.emplace_back(First, Second);
822   } else {
823     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
824     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
825     OrigAddresses.emplace_back(First, Second);
826   }
827 }
828 
829 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
830   const auto *PrivateVD =
831       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
832   QualType PrivateType = PrivateVD->getType();
833   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
834   if (!PrivateType->isVariablyModifiedType()) {
835     Sizes.emplace_back(
836         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
837         nullptr);
838     return;
839   }
840   llvm::Value *Size;
841   llvm::Value *SizeInChars;
842   auto *ElemType =
843       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
844           ->getElementType();
845   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
846   if (AsArraySection) {
847     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
848                                      OrigAddresses[N].first.getPointer(CGF));
849     Size = CGF.Builder.CreateNUWAdd(
850         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
851     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
852   } else {
853     SizeInChars =
854         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
855     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
856   }
857   Sizes.emplace_back(SizeInChars, Size);
858   CodeGenFunction::OpaqueValueMapping OpaqueMap(
859       CGF,
860       cast<OpaqueValueExpr>(
861           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
862       RValue::get(Size));
863   CGF.EmitVariablyModifiedType(PrivateType);
864 }
865 
866 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
867                                          llvm::Value *Size) {
868   const auto *PrivateVD =
869       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
870   QualType PrivateType = PrivateVD->getType();
871   if (!PrivateType->isVariablyModifiedType()) {
872     assert(!Size && !Sizes[N].second &&
873            "Size should be nullptr for non-variably modified reduction "
874            "items.");
875     return;
876   }
877   CodeGenFunction::OpaqueValueMapping OpaqueMap(
878       CGF,
879       cast<OpaqueValueExpr>(
880           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
881       RValue::get(Size));
882   CGF.EmitVariablyModifiedType(PrivateType);
883 }
884 
885 void ReductionCodeGen::emitInitialization(
886     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
887     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
888   assert(SharedAddresses.size() > N && "No variable was generated");
889   const auto *PrivateVD =
890       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
891   const OMPDeclareReductionDecl *DRD =
892       getReductionInit(ClausesData[N].ReductionOp);
893   QualType PrivateType = PrivateVD->getType();
894   PrivateAddr = CGF.Builder.CreateElementBitCast(
895       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
896   QualType SharedType = SharedAddresses[N].first.getType();
897   SharedLVal = CGF.MakeAddrLValue(
898       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
899                                        CGF.ConvertTypeForMem(SharedType)),
900       SharedType, SharedAddresses[N].first.getBaseInfo(),
901       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
902   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
903     if (DRD && DRD->getInitializer())
904       (void)DefaultInit(CGF);
905     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
906   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
907     (void)DefaultInit(CGF);
908     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
909                                      PrivateAddr, SharedLVal.getAddress(CGF),
910                                      SharedLVal.getType());
911   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
912              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
913     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
914                          PrivateVD->getType().getQualifiers(),
915                          /*IsInitializer=*/false);
916   }
917 }
918 
919 bool ReductionCodeGen::needCleanups(unsigned N) {
920   const auto *PrivateVD =
921       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922   QualType PrivateType = PrivateVD->getType();
923   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
924   return DTorKind != QualType::DK_none;
925 }
926 
927 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
928                                     Address PrivateAddr) {
929   const auto *PrivateVD =
930       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
931   QualType PrivateType = PrivateVD->getType();
932   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
933   if (needCleanups(N)) {
934     PrivateAddr = CGF.Builder.CreateElementBitCast(
935         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
936     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
937   }
938 }
939 
940 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
941                           LValue BaseLV) {
942   BaseTy = BaseTy.getNonReferenceType();
943   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
944          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
945     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
946       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
947     } else {
948       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
949       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
950     }
951     BaseTy = BaseTy->getPointeeType();
952   }
953   return CGF.MakeAddrLValue(
954       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
955                                        CGF.ConvertTypeForMem(ElTy)),
956       BaseLV.getType(), BaseLV.getBaseInfo(),
957       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
958 }
959 
960 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
961                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
962                           llvm::Value *Addr) {
963   Address Tmp = Address::invalid();
964   Address TopTmp = Address::invalid();
965   Address MostTopTmp = Address::invalid();
966   BaseTy = BaseTy.getNonReferenceType();
967   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
968          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
969     Tmp = CGF.CreateMemTemp(BaseTy);
970     if (TopTmp.isValid())
971       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
972     else
973       MostTopTmp = Tmp;
974     TopTmp = Tmp;
975     BaseTy = BaseTy->getPointeeType();
976   }
977   llvm::Type *Ty = BaseLVType;
978   if (Tmp.isValid())
979     Ty = Tmp.getElementType();
980   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
981   if (Tmp.isValid()) {
982     CGF.Builder.CreateStore(Addr, Tmp);
983     return MostTopTmp;
984   }
985   return Address(Addr, BaseLVAlignment);
986 }
987 
988 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
989   const VarDecl *OrigVD = nullptr;
990   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
991     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
992     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
993       Base = TempOASE->getBase()->IgnoreParenImpCasts();
994     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
995       Base = TempASE->getBase()->IgnoreParenImpCasts();
996     DE = cast<DeclRefExpr>(Base);
997     OrigVD = cast<VarDecl>(DE->getDecl());
998   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
999     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1000     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1001       Base = TempASE->getBase()->IgnoreParenImpCasts();
1002     DE = cast<DeclRefExpr>(Base);
1003     OrigVD = cast<VarDecl>(DE->getDecl());
1004   }
1005   return OrigVD;
1006 }
1007 
1008 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1009                                                Address PrivateAddr) {
1010   const DeclRefExpr *DE;
1011   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1012     BaseDecls.emplace_back(OrigVD);
1013     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1014     LValue BaseLValue =
1015         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1016                     OriginalBaseLValue);
1017     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1018     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1019         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1020     llvm::Value *PrivatePointer =
1021         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1022             PrivateAddr.getPointer(), SharedAddr.getType());
1023     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1024         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1025     return castToBase(CGF, OrigVD->getType(),
1026                       SharedAddresses[N].first.getType(),
1027                       OriginalBaseLValue.getAddress(CGF).getType(),
1028                       OriginalBaseLValue.getAlignment(), Ptr);
1029   }
1030   BaseDecls.emplace_back(
1031       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1032   return PrivateAddr;
1033 }
1034 
1035 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1036   const OMPDeclareReductionDecl *DRD =
1037       getReductionInit(ClausesData[N].ReductionOp);
1038   return DRD && DRD->getInitializer();
1039 }
1040 
1041 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1042   return CGF.EmitLoadOfPointerLValue(
1043       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1044       getThreadIDVariable()->getType()->castAs<PointerType>());
1045 }
1046 
1047 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1048   if (!CGF.HaveInsertPoint())
1049     return;
1050   // 1.2.2 OpenMP Language Terminology
1051   // Structured block - An executable statement with a single entry at the
1052   // top and a single exit at the bottom.
1053   // The point of exit cannot be a branch out of the structured block.
1054   // longjmp() and throw() must not violate the entry/exit criteria.
1055   CGF.EHStack.pushTerminate();
1056   if (S)
1057     CGF.incrementProfileCounter(S);
1058   CodeGen(CGF);
1059   CGF.EHStack.popTerminate();
1060 }
1061 
1062 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1063     CodeGenFunction &CGF) {
1064   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1065                             getThreadIDVariable()->getType(),
1066                             AlignmentSource::Decl);
1067 }
1068 
1069 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1070                                        QualType FieldTy) {
1071   auto *Field = FieldDecl::Create(
1072       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1073       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1074       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1075   Field->setAccess(AS_public);
1076   DC->addDecl(Field);
1077   return Field;
1078 }
1079 
1080 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1081                                  StringRef Separator)
1082     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1083       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1084   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1085 
1086   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1087   OMPBuilder.initialize();
1088   loadOffloadInfoMetadata();
1089 }
1090 
1091 void CGOpenMPRuntime::clear() {
1092   InternalVars.clear();
1093   // Clean non-target variable declarations possibly used only in debug info.
1094   for (const auto &Data : EmittedNonTargetVariables) {
1095     if (!Data.getValue().pointsToAliveValue())
1096       continue;
1097     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1098     if (!GV)
1099       continue;
1100     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1101       continue;
1102     GV->eraseFromParent();
1103   }
1104 }
1105 
1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1107   SmallString<128> Buffer;
1108   llvm::raw_svector_ostream OS(Buffer);
1109   StringRef Sep = FirstSeparator;
1110   for (StringRef Part : Parts) {
1111     OS << Sep << Part;
1112     Sep = Separator;
1113   }
1114   return std::string(OS.str());
1115 }
1116 
1117 static llvm::Function *
1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1119                           const Expr *CombinerInitializer, const VarDecl *In,
1120                           const VarDecl *Out, bool IsCombiner) {
1121   // void .omp_combiner.(Ty *in, Ty *out);
1122   ASTContext &C = CGM.getContext();
1123   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1124   FunctionArgList Args;
1125   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1126                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1127   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1128                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1129   Args.push_back(&OmpOutParm);
1130   Args.push_back(&OmpInParm);
1131   const CGFunctionInfo &FnInfo =
1132       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1133   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1134   std::string Name = CGM.getOpenMPRuntime().getName(
1135       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1136   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1137                                     Name, &CGM.getModule());
1138   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1139   if (CGM.getLangOpts().Optimize) {
1140     Fn->removeFnAttr(llvm::Attribute::NoInline);
1141     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1142     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1143   }
1144   CodeGenFunction CGF(CGM);
1145   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1146   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1147   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1148                     Out->getLocation());
1149   CodeGenFunction::OMPPrivateScope Scope(CGF);
1150   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1151   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1156   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1157     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1158         .getAddress(CGF);
1159   });
1160   (void)Scope.Privatize();
1161   if (!IsCombiner && Out->hasInit() &&
1162       !CGF.isTrivialInitializer(Out->getInit())) {
1163     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1164                          Out->getType().getQualifiers(),
1165                          /*IsInitializer=*/true);
1166   }
1167   if (CombinerInitializer)
1168     CGF.EmitIgnoredExpr(CombinerInitializer);
1169   Scope.ForceCleanup();
1170   CGF.FinishFunction();
1171   return Fn;
1172 }
1173 
1174 void CGOpenMPRuntime::emitUserDefinedReduction(
1175     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1176   if (UDRMap.count(D) > 0)
1177     return;
1178   llvm::Function *Combiner = emitCombinerOrInitializer(
1179       CGM, D->getType(), D->getCombiner(),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1181       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1182       /*IsCombiner=*/true);
1183   llvm::Function *Initializer = nullptr;
1184   if (const Expr *Init = D->getInitializer()) {
1185     Initializer = emitCombinerOrInitializer(
1186         CGM, D->getType(),
1187         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1188                                                                      : nullptr,
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1190         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1191         /*IsCombiner=*/false);
1192   }
1193   UDRMap.try_emplace(D, Combiner, Initializer);
1194   if (CGF) {
1195     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1196     Decls.second.push_back(D);
1197   }
1198 }
1199 
1200 std::pair<llvm::Function *, llvm::Function *>
1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1202   auto I = UDRMap.find(D);
1203   if (I != UDRMap.end())
1204     return I->second;
1205   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1206   return UDRMap.lookup(D);
1207 }
1208 
1209 namespace {
1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1211 // Builder if one is present.
1212 struct PushAndPopStackRAII {
1213   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1214                       bool HasCancel, llvm::omp::Directive Kind)
1215       : OMPBuilder(OMPBuilder) {
1216     if (!OMPBuilder)
1217       return;
1218 
1219     // The following callback is the crucial part of clangs cleanup process.
1220     //
1221     // NOTE:
1222     // Once the OpenMPIRBuilder is used to create parallel regions (and
1223     // similar), the cancellation destination (Dest below) is determined via
1224     // IP. That means if we have variables to finalize we split the block at IP,
1225     // use the new block (=BB) as destination to build a JumpDest (via
1226     // getJumpDestInCurrentScope(BB)) which then is fed to
1227     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1228     // to push & pop an FinalizationInfo object.
1229     // The FiniCB will still be needed but at the point where the
1230     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1231     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1232       assert(IP.getBlock()->end() == IP.getPoint() &&
1233              "Clang CG should cause non-terminated block!");
1234       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1235       CGF.Builder.restoreIP(IP);
1236       CodeGenFunction::JumpDest Dest =
1237           CGF.getOMPCancelDestination(OMPD_parallel);
1238       CGF.EmitBranchThroughCleanup(Dest);
1239     };
1240 
1241     // TODO: Remove this once we emit parallel regions through the
1242     //       OpenMPIRBuilder as it can do this setup internally.
1243     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1244     OMPBuilder->pushFinalizationCB(std::move(FI));
1245   }
1246   ~PushAndPopStackRAII() {
1247     if (OMPBuilder)
1248       OMPBuilder->popFinalizationCB();
1249   }
1250   llvm::OpenMPIRBuilder *OMPBuilder;
1251 };
1252 } // namespace
1253 
1254 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1255     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1256     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1257     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1258   assert(ThreadIDVar->getType()->isPointerType() &&
1259          "thread id variable must be of type kmp_int32 *");
1260   CodeGenFunction CGF(CGM, true);
1261   bool HasCancel = false;
1262   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1263     HasCancel = OPD->hasCancel();
1264   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1265     HasCancel = OPD->hasCancel();
1266   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1267     HasCancel = OPSD->hasCancel();
1268   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1269     HasCancel = OPFD->hasCancel();
1270   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1273     HasCancel = OPFD->hasCancel();
1274   else if (const auto *OPFD =
1275                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1276     HasCancel = OPFD->hasCancel();
1277   else if (const auto *OPFD =
1278                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1279     HasCancel = OPFD->hasCancel();
1280 
1281   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1282   //       parallel region to make cancellation barriers work properly.
1283   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1284   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1285   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1286                                     HasCancel, OutlinedHelperName);
1287   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1288   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1289 }
1290 
1291 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1292     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1295   return emitParallelOrTeamsOutlinedFunction(
1296       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297 }
1298 
1299 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1300     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1302   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1303   return emitParallelOrTeamsOutlinedFunction(
1304       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1305 }
1306 
1307 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1308     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1309     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1310     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1311     bool Tied, unsigned &NumberOfParts) {
1312   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1313                                               PrePostActionTy &) {
1314     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1315     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1316     llvm::Value *TaskArgs[] = {
1317         UpLoc, ThreadID,
1318         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1319                                     TaskTVar->getType()->castAs<PointerType>())
1320             .getPointer(CGF)};
1321     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1322                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1323                         TaskArgs);
1324   };
1325   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1326                                                             UntiedCodeGen);
1327   CodeGen.setAction(Action);
1328   assert(!ThreadIDVar->getType()->isPointerType() &&
1329          "thread id variable must be of type kmp_int32 for tasks");
1330   const OpenMPDirectiveKind Region =
1331       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1332                                                       : OMPD_task;
1333   const CapturedStmt *CS = D.getCapturedStmt(Region);
1334   bool HasCancel = false;
1335   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1336     HasCancel = TD->hasCancel();
1337   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1338     HasCancel = TD->hasCancel();
1339   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1340     HasCancel = TD->hasCancel();
1341   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1342     HasCancel = TD->hasCancel();
1343 
1344   CodeGenFunction CGF(CGM, true);
1345   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1346                                         InnermostKind, HasCancel, Action);
1347   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1348   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1349   if (!Tied)
1350     NumberOfParts = Action.getNumberOfParts();
1351   return Res;
1352 }
1353 
1354 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1355                              const RecordDecl *RD, const CGRecordLayout &RL,
1356                              ArrayRef<llvm::Constant *> Data) {
1357   llvm::StructType *StructTy = RL.getLLVMType();
1358   unsigned PrevIdx = 0;
1359   ConstantInitBuilder CIBuilder(CGM);
1360   auto DI = Data.begin();
1361   for (const FieldDecl *FD : RD->fields()) {
1362     unsigned Idx = RL.getLLVMFieldNo(FD);
1363     // Fill the alignment.
1364     for (unsigned I = PrevIdx; I < Idx; ++I)
1365       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1366     PrevIdx = Idx + 1;
1367     Fields.add(*DI);
1368     ++DI;
1369   }
1370 }
1371 
1372 template <class... As>
1373 static llvm::GlobalVariable *
1374 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1375                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1376                    As &&... Args) {
1377   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1378   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1379   ConstantInitBuilder CIBuilder(CGM);
1380   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1381   buildStructValue(Fields, CGM, RD, RL, Data);
1382   return Fields.finishAndCreateGlobal(
1383       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1384       std::forward<As>(Args)...);
1385 }
1386 
1387 template <typename T>
1388 static void
1389 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1390                                          ArrayRef<llvm::Constant *> Data,
1391                                          T &Parent) {
1392   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1393   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1394   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1395   buildStructValue(Fields, CGM, RD, RL, Data);
1396   Fields.finishAndAddTo(Parent);
1397 }
1398 
1399 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1400                                              bool AtCurrentPoint) {
1401   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1402   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1403 
1404   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1405   if (AtCurrentPoint) {
1406     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1407         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1408   } else {
1409     Elem.second.ServiceInsertPt =
1410         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1411     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1412   }
1413 }
1414 
1415 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1416   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1417   if (Elem.second.ServiceInsertPt) {
1418     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1419     Elem.second.ServiceInsertPt = nullptr;
1420     Ptr->eraseFromParent();
1421   }
1422 }
1423 
1424 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1425                                                   SourceLocation Loc,
1426                                                   SmallString<128> &Buffer) {
1427   llvm::raw_svector_ostream OS(Buffer);
1428   // Build debug location
1429   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1430   OS << ";" << PLoc.getFilename() << ";";
1431   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1432     OS << FD->getQualifiedNameAsString();
1433   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1434   return OS.str();
1435 }
1436 
1437 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1438                                                  SourceLocation Loc,
1439                                                  unsigned Flags) {
1440   llvm::Constant *SrcLocStr;
1441   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1442       Loc.isInvalid()) {
1443     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1444   } else {
1445     std::string FunctionName = "";
1446     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1447       FunctionName = FD->getQualifiedNameAsString();
1448     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1449     const char *FileName = PLoc.getFilename();
1450     unsigned Line = PLoc.getLine();
1451     unsigned Column = PLoc.getColumn();
1452     SrcLocStr =
1453         OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1454   }
1455   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1456   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1457                                      Reserved2Flags);
1458 }
1459 
1460 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1461                                           SourceLocation Loc) {
1462   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1464   // the clang invariants used below might be broken.
1465   if (CGM.getLangOpts().OpenMPIRBuilder) {
1466     SmallString<128> Buffer;
1467     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1468     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1469         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1470     return OMPBuilder.getOrCreateThreadID(
1471         OMPBuilder.getOrCreateIdent(SrcLocStr));
1472   }
1473 
1474   llvm::Value *ThreadID = nullptr;
1475   // Check whether we've already cached a load of the thread id in this
1476   // function.
1477   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1478   if (I != OpenMPLocThreadIDMap.end()) {
1479     ThreadID = I->second.ThreadID;
1480     if (ThreadID != nullptr)
1481       return ThreadID;
1482   }
1483   // If exceptions are enabled, do not use parameter to avoid possible crash.
1484   if (auto *OMPRegionInfo =
1485           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1486     if (OMPRegionInfo->getThreadIDVariable()) {
1487       // Check if this an outlined function with thread id passed as argument.
1488       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1489       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1490       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1491           !CGF.getLangOpts().CXXExceptions ||
1492           CGF.Builder.GetInsertBlock() == TopBlock ||
1493           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1494           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1495               TopBlock ||
1496           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1497               CGF.Builder.GetInsertBlock()) {
1498         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1499         // If value loaded in entry block, cache it and use it everywhere in
1500         // function.
1501         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1502           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1503           Elem.second.ThreadID = ThreadID;
1504         }
1505         return ThreadID;
1506       }
1507     }
1508   }
1509 
1510   // This is not an outlined function region - need to call __kmpc_int32
1511   // kmpc_global_thread_num(ident_t *loc).
1512   // Generate thread id value and cache this value for use across the
1513   // function.
1514   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1515   if (!Elem.second.ServiceInsertPt)
1516     setLocThreadIdInsertPt(CGF);
1517   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1518   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1519   llvm::CallInst *Call = CGF.Builder.CreateCall(
1520       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1521                                             OMPRTL___kmpc_global_thread_num),
1522       emitUpdateLocation(CGF, Loc));
1523   Call->setCallingConv(CGF.getRuntimeCC());
1524   Elem.second.ThreadID = Call;
1525   return Call;
1526 }
1527 
1528 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1529   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1530   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1531     clearLocThreadIdInsertPt(CGF);
1532     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1533   }
1534   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1535     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1536       UDRMap.erase(D);
1537     FunctionUDRMap.erase(CGF.CurFn);
1538   }
1539   auto I = FunctionUDMMap.find(CGF.CurFn);
1540   if (I != FunctionUDMMap.end()) {
1541     for(const auto *D : I->second)
1542       UDMMap.erase(D);
1543     FunctionUDMMap.erase(I);
1544   }
1545   LastprivateConditionalToTypes.erase(CGF.CurFn);
1546   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1547 }
1548 
1549 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1550   return OMPBuilder.IdentPtr;
1551 }
1552 
1553 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1554   if (!Kmpc_MicroTy) {
1555     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1556     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1557                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1558     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1559   }
1560   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1561 }
1562 
1563 llvm::FunctionCallee
1564 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1565                                              bool IsGPUDistribute) {
1566   assert((IVSize == 32 || IVSize == 64) &&
1567          "IV size is not compatible with the omp runtime");
1568   StringRef Name;
1569   if (IsGPUDistribute)
1570     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1571                                     : "__kmpc_distribute_static_init_4u")
1572                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1573                                     : "__kmpc_distribute_static_init_8u");
1574   else
1575     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1576                                     : "__kmpc_for_static_init_4u")
1577                         : (IVSigned ? "__kmpc_for_static_init_8"
1578                                     : "__kmpc_for_static_init_8u");
1579 
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582   llvm::Type *TypeParams[] = {
1583     getIdentTyPointerTy(),                     // loc
1584     CGM.Int32Ty,                               // tid
1585     CGM.Int32Ty,                               // schedtype
1586     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1587     PtrTy,                                     // p_lower
1588     PtrTy,                                     // p_upper
1589     PtrTy,                                     // p_stride
1590     ITy,                                       // incr
1591     ITy                                        // chunk
1592   };
1593   auto *FnTy =
1594       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1595   return CGM.CreateRuntimeFunction(FnTy, Name);
1596 }
1597 
1598 llvm::FunctionCallee
1599 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1600   assert((IVSize == 32 || IVSize == 64) &&
1601          "IV size is not compatible with the omp runtime");
1602   StringRef Name =
1603       IVSize == 32
1604           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1605           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1606   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1607   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1608                                CGM.Int32Ty,           // tid
1609                                CGM.Int32Ty,           // schedtype
1610                                ITy,                   // lower
1611                                ITy,                   // upper
1612                                ITy,                   // stride
1613                                ITy                    // chunk
1614   };
1615   auto *FnTy =
1616       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1617   return CGM.CreateRuntimeFunction(FnTy, Name);
1618 }
1619 
1620 llvm::FunctionCallee
1621 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1622   assert((IVSize == 32 || IVSize == 64) &&
1623          "IV size is not compatible with the omp runtime");
1624   StringRef Name =
1625       IVSize == 32
1626           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1627           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1628   llvm::Type *TypeParams[] = {
1629       getIdentTyPointerTy(), // loc
1630       CGM.Int32Ty,           // tid
1631   };
1632   auto *FnTy =
1633       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1634   return CGM.CreateRuntimeFunction(FnTy, Name);
1635 }
1636 
1637 llvm::FunctionCallee
1638 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1639   assert((IVSize == 32 || IVSize == 64) &&
1640          "IV size is not compatible with the omp runtime");
1641   StringRef Name =
1642       IVSize == 32
1643           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1644           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1645   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1646   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1647   llvm::Type *TypeParams[] = {
1648     getIdentTyPointerTy(),                     // loc
1649     CGM.Int32Ty,                               // tid
1650     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1651     PtrTy,                                     // p_lower
1652     PtrTy,                                     // p_upper
1653     PtrTy                                      // p_stride
1654   };
1655   auto *FnTy =
1656       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1657   return CGM.CreateRuntimeFunction(FnTy, Name);
1658 }
1659 
1660 /// Obtain information that uniquely identifies a target entry. This
1661 /// consists of the file and device IDs as well as line number associated with
1662 /// the relevant entry source location.
1663 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1664                                      unsigned &DeviceID, unsigned &FileID,
1665                                      unsigned &LineNum) {
1666   SourceManager &SM = C.getSourceManager();
1667 
1668   // The loc should be always valid and have a file ID (the user cannot use
1669   // #pragma directives in macros)
1670 
1671   assert(Loc.isValid() && "Source location is expected to be always valid.");
1672 
1673   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1674   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1675 
1676   llvm::sys::fs::UniqueID ID;
1677   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1678     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1679     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1680     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1681       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1682           << PLoc.getFilename() << EC.message();
1683   }
1684 
1685   DeviceID = ID.getDevice();
1686   FileID = ID.getFile();
1687   LineNum = PLoc.getLine();
1688 }
1689 
1690 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1691   if (CGM.getLangOpts().OpenMPSimd)
1692     return Address::invalid();
1693   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1694       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1695   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1696               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1697                HasRequiresUnifiedSharedMemory))) {
1698     SmallString<64> PtrName;
1699     {
1700       llvm::raw_svector_ostream OS(PtrName);
1701       OS << CGM.getMangledName(GlobalDecl(VD));
1702       if (!VD->isExternallyVisible()) {
1703         unsigned DeviceID, FileID, Line;
1704         getTargetEntryUniqueInfo(CGM.getContext(),
1705                                  VD->getCanonicalDecl()->getBeginLoc(),
1706                                  DeviceID, FileID, Line);
1707         OS << llvm::format("_%x", FileID);
1708       }
1709       OS << "_decl_tgt_ref_ptr";
1710     }
1711     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1712     if (!Ptr) {
1713       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1714       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1715                                         PtrName);
1716 
1717       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1718       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1719 
1720       if (!CGM.getLangOpts().OpenMPIsDevice)
1721         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1722       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1723     }
1724     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1725   }
1726   return Address::invalid();
1727 }
1728 
1729 llvm::Constant *
1730 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1731   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1732          !CGM.getContext().getTargetInfo().isTLSSupported());
1733   // Lookup the entry, lazily creating it if necessary.
1734   std::string Suffix = getName({"cache", ""});
1735   return getOrCreateInternalVariable(
1736       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1737 }
1738 
1739 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1740                                                 const VarDecl *VD,
1741                                                 Address VDAddr,
1742                                                 SourceLocation Loc) {
1743   if (CGM.getLangOpts().OpenMPUseTLS &&
1744       CGM.getContext().getTargetInfo().isTLSSupported())
1745     return VDAddr;
1746 
1747   llvm::Type *VarTy = VDAddr.getElementType();
1748   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1749                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1750                                                        CGM.Int8PtrTy),
1751                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1752                          getOrCreateThreadPrivateCache(VD)};
1753   return Address(CGF.EmitRuntimeCall(
1754                      OMPBuilder.getOrCreateRuntimeFunction(
1755                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1756                      Args),
1757                  VDAddr.getAlignment());
1758 }
1759 
1760 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1761     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1762     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1763   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1764   // library.
1765   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1766   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1767                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1768                       OMPLoc);
1769   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1770   // to register constructor/destructor for variable.
1771   llvm::Value *Args[] = {
1772       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1773       Ctor, CopyCtor, Dtor};
1774   CGF.EmitRuntimeCall(
1775       OMPBuilder.getOrCreateRuntimeFunction(
1776           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1777       Args);
1778 }
1779 
1780 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1781     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1782     bool PerformInit, CodeGenFunction *CGF) {
1783   if (CGM.getLangOpts().OpenMPUseTLS &&
1784       CGM.getContext().getTargetInfo().isTLSSupported())
1785     return nullptr;
1786 
1787   VD = VD->getDefinition(CGM.getContext());
1788   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1789     QualType ASTTy = VD->getType();
1790 
1791     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1792     const Expr *Init = VD->getAnyInitializer();
1793     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1794       // Generate function that re-emits the declaration's initializer into the
1795       // threadprivate copy of the variable VD
1796       CodeGenFunction CtorCGF(CGM);
1797       FunctionArgList Args;
1798       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1799                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1800                             ImplicitParamDecl::Other);
1801       Args.push_back(&Dst);
1802 
1803       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1804           CGM.getContext().VoidPtrTy, Args);
1805       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1806       std::string Name = getName({"__kmpc_global_ctor_", ""});
1807       llvm::Function *Fn =
1808           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1809       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1810                             Args, Loc, Loc);
1811       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1812           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1813           CGM.getContext().VoidPtrTy, Dst.getLocation());
1814       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1815       Arg = CtorCGF.Builder.CreateElementBitCast(
1816           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1817       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1818                                /*IsInitializer=*/true);
1819       ArgVal = CtorCGF.EmitLoadOfScalar(
1820           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1821           CGM.getContext().VoidPtrTy, Dst.getLocation());
1822       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1823       CtorCGF.FinishFunction();
1824       Ctor = Fn;
1825     }
1826     if (VD->getType().isDestructedType() != QualType::DK_none) {
1827       // Generate function that emits destructor call for the threadprivate copy
1828       // of the variable VD
1829       CodeGenFunction DtorCGF(CGM);
1830       FunctionArgList Args;
1831       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1832                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1833                             ImplicitParamDecl::Other);
1834       Args.push_back(&Dst);
1835 
1836       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1837           CGM.getContext().VoidTy, Args);
1838       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1839       std::string Name = getName({"__kmpc_global_dtor_", ""});
1840       llvm::Function *Fn =
1841           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1842       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1843       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1844                             Loc, Loc);
1845       // Create a scope with an artificial location for the body of this function.
1846       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1847       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1848           DtorCGF.GetAddrOfLocalVar(&Dst),
1849           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1850       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1851                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1852                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1853       DtorCGF.FinishFunction();
1854       Dtor = Fn;
1855     }
1856     // Do not emit init function if it is not required.
1857     if (!Ctor && !Dtor)
1858       return nullptr;
1859 
1860     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1861     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1862                                                /*isVarArg=*/false)
1863                            ->getPointerTo();
1864     // Copying constructor for the threadprivate variable.
1865     // Must be NULL - reserved by runtime, but currently it requires that this
1866     // parameter is always NULL. Otherwise it fires assertion.
1867     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1868     if (Ctor == nullptr) {
1869       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1870                                              /*isVarArg=*/false)
1871                          ->getPointerTo();
1872       Ctor = llvm::Constant::getNullValue(CtorTy);
1873     }
1874     if (Dtor == nullptr) {
1875       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1876                                              /*isVarArg=*/false)
1877                          ->getPointerTo();
1878       Dtor = llvm::Constant::getNullValue(DtorTy);
1879     }
1880     if (!CGF) {
1881       auto *InitFunctionTy =
1882           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1883       std::string Name = getName({"__omp_threadprivate_init_", ""});
1884       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1885           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1886       CodeGenFunction InitCGF(CGM);
1887       FunctionArgList ArgList;
1888       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1889                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1890                             Loc, Loc);
1891       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1892       InitCGF.FinishFunction();
1893       return InitFunction;
1894     }
1895     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1896   }
1897   return nullptr;
1898 }
1899 
1900 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1901                                                      llvm::GlobalVariable *Addr,
1902                                                      bool PerformInit) {
1903   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1904       !CGM.getLangOpts().OpenMPIsDevice)
1905     return false;
1906   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1907       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1908   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1909       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1910        HasRequiresUnifiedSharedMemory))
1911     return CGM.getLangOpts().OpenMPIsDevice;
1912   VD = VD->getDefinition(CGM.getContext());
1913   assert(VD && "Unknown VarDecl");
1914 
1915   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1916     return CGM.getLangOpts().OpenMPIsDevice;
1917 
1918   QualType ASTTy = VD->getType();
1919   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1920 
1921   // Produce the unique prefix to identify the new target regions. We use
1922   // the source location of the variable declaration which we know to not
1923   // conflict with any target region.
1924   unsigned DeviceID;
1925   unsigned FileID;
1926   unsigned Line;
1927   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1928   SmallString<128> Buffer, Out;
1929   {
1930     llvm::raw_svector_ostream OS(Buffer);
1931     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1932        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1933   }
1934 
1935   const Expr *Init = VD->getAnyInitializer();
1936   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1937     llvm::Constant *Ctor;
1938     llvm::Constant *ID;
1939     if (CGM.getLangOpts().OpenMPIsDevice) {
1940       // Generate function that re-emits the declaration's initializer into
1941       // the threadprivate copy of the variable VD
1942       CodeGenFunction CtorCGF(CGM);
1943 
1944       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1945       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1946       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1947           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1948       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1949       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1950                             FunctionArgList(), Loc, Loc);
1951       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1952       CtorCGF.EmitAnyExprToMem(Init,
1953                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1954                                Init->getType().getQualifiers(),
1955                                /*IsInitializer=*/true);
1956       CtorCGF.FinishFunction();
1957       Ctor = Fn;
1958       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1959       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1960     } else {
1961       Ctor = new llvm::GlobalVariable(
1962           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1963           llvm::GlobalValue::PrivateLinkage,
1964           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1965       ID = Ctor;
1966     }
1967 
1968     // Register the information for the entry associated with the constructor.
1969     Out.clear();
1970     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1971         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1972         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1973   }
1974   if (VD->getType().isDestructedType() != QualType::DK_none) {
1975     llvm::Constant *Dtor;
1976     llvm::Constant *ID;
1977     if (CGM.getLangOpts().OpenMPIsDevice) {
1978       // Generate function that emits destructor call for the threadprivate
1979       // copy of the variable VD
1980       CodeGenFunction DtorCGF(CGM);
1981 
1982       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1983       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1984       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1985           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1986       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1987       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1988                             FunctionArgList(), Loc, Loc);
1989       // Create a scope with an artificial location for the body of this
1990       // function.
1991       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1992       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1993                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1994                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1995       DtorCGF.FinishFunction();
1996       Dtor = Fn;
1997       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1998       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1999     } else {
2000       Dtor = new llvm::GlobalVariable(
2001           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2002           llvm::GlobalValue::PrivateLinkage,
2003           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2004       ID = Dtor;
2005     }
2006     // Register the information for the entry associated with the destructor.
2007     Out.clear();
2008     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2009         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2010         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2011   }
2012   return CGM.getLangOpts().OpenMPIsDevice;
2013 }
2014 
2015 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2016                                                           QualType VarType,
2017                                                           StringRef Name) {
2018   std::string Suffix = getName({"artificial", ""});
2019   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2020   llvm::Value *GAddr =
2021       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2022   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2023       CGM.getTarget().isTLSSupported()) {
2024     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2025     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2026   }
2027   std::string CacheSuffix = getName({"cache", ""});
2028   llvm::Value *Args[] = {
2029       emitUpdateLocation(CGF, SourceLocation()),
2030       getThreadID(CGF, SourceLocation()),
2031       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2032       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2033                                 /*isSigned=*/false),
2034       getOrCreateInternalVariable(
2035           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2036   return Address(
2037       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2038           CGF.EmitRuntimeCall(
2039               OMPBuilder.getOrCreateRuntimeFunction(
2040                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2041               Args),
2042           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2043       CGM.getContext().getTypeAlignInChars(VarType));
2044 }
2045 
2046 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2047                                    const RegionCodeGenTy &ThenGen,
2048                                    const RegionCodeGenTy &ElseGen) {
2049   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2050 
2051   // If the condition constant folds and can be elided, try to avoid emitting
2052   // the condition and the dead arm of the if/else.
2053   bool CondConstant;
2054   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2055     if (CondConstant)
2056       ThenGen(CGF);
2057     else
2058       ElseGen(CGF);
2059     return;
2060   }
2061 
2062   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2063   // emit the conditional branch.
2064   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2065   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2066   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2067   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2068 
2069   // Emit the 'then' code.
2070   CGF.EmitBlock(ThenBlock);
2071   ThenGen(CGF);
2072   CGF.EmitBranch(ContBlock);
2073   // Emit the 'else' code if present.
2074   // There is no need to emit line number for unconditional branch.
2075   (void)ApplyDebugLocation::CreateEmpty(CGF);
2076   CGF.EmitBlock(ElseBlock);
2077   ElseGen(CGF);
2078   // There is no need to emit line number for unconditional branch.
2079   (void)ApplyDebugLocation::CreateEmpty(CGF);
2080   CGF.EmitBranch(ContBlock);
2081   // Emit the continuation block for code after the if.
2082   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2083 }
2084 
2085 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2086                                        llvm::Function *OutlinedFn,
2087                                        ArrayRef<llvm::Value *> CapturedVars,
2088                                        const Expr *IfCond,
2089                                        llvm::Value *NumThreads) {
2090   if (!CGF.HaveInsertPoint())
2091     return;
2092   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2093   auto &M = CGM.getModule();
2094   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2095                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2096     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2097     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2098     llvm::Value *Args[] = {
2099         RTLoc,
2100         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2101         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2102     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2103     RealArgs.append(std::begin(Args), std::end(Args));
2104     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2105 
2106     llvm::FunctionCallee RTLFn =
2107         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2108     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2109   };
2110   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2111                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2112     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2113     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2114     // Build calls:
2115     // __kmpc_serialized_parallel(&Loc, GTid);
2116     llvm::Value *Args[] = {RTLoc, ThreadID};
2117     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2118                             M, OMPRTL___kmpc_serialized_parallel),
2119                         Args);
2120 
2121     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2122     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2123     Address ZeroAddrBound =
2124         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2125                                          /*Name=*/".bound.zero.addr");
2126     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2127     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2128     // ThreadId for serialized parallels is 0.
2129     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2130     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2131     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2132 
2133     // Ensure we do not inline the function. This is trivially true for the ones
2134     // passed to __kmpc_fork_call but the ones called in serialized regions
2135     // could be inlined. This is not a perfect but it is closer to the invariant
2136     // we want, namely, every data environment starts with a new function.
2137     // TODO: We should pass the if condition to the runtime function and do the
2138     //       handling there. Much cleaner code.
2139     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2140     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2141     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2142 
2143     // __kmpc_end_serialized_parallel(&Loc, GTid);
2144     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2145     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2146                             M, OMPRTL___kmpc_end_serialized_parallel),
2147                         EndArgs);
2148   };
2149   if (IfCond) {
2150     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2151   } else {
2152     RegionCodeGenTy ThenRCG(ThenGen);
2153     ThenRCG(CGF);
2154   }
2155 }
2156 
2157 // If we're inside an (outlined) parallel region, use the region info's
2158 // thread-ID variable (it is passed in a first argument of the outlined function
2159 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2160 // regular serial code region, get thread ID by calling kmp_int32
2161 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2162 // return the address of that temp.
2163 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2164                                              SourceLocation Loc) {
2165   if (auto *OMPRegionInfo =
2166           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2167     if (OMPRegionInfo->getThreadIDVariable())
2168       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2169 
2170   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2171   QualType Int32Ty =
2172       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2173   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2174   CGF.EmitStoreOfScalar(ThreadID,
2175                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2176 
2177   return ThreadIDTemp;
2178 }
2179 
2180 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2181     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2182   SmallString<256> Buffer;
2183   llvm::raw_svector_ostream Out(Buffer);
2184   Out << Name;
2185   StringRef RuntimeName = Out.str();
2186   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2187   if (Elem.second) {
2188     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2189            "OMP internal variable has different type than requested");
2190     return &*Elem.second;
2191   }
2192 
2193   return Elem.second = new llvm::GlobalVariable(
2194              CGM.getModule(), Ty, /*IsConstant*/ false,
2195              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2196              Elem.first(), /*InsertBefore=*/nullptr,
2197              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2198 }
2199 
2200 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2201   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2202   std::string Name = getName({Prefix, "var"});
2203   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2204 }
2205 
2206 namespace {
2207 /// Common pre(post)-action for different OpenMP constructs.
2208 class CommonActionTy final : public PrePostActionTy {
2209   llvm::FunctionCallee EnterCallee;
2210   ArrayRef<llvm::Value *> EnterArgs;
2211   llvm::FunctionCallee ExitCallee;
2212   ArrayRef<llvm::Value *> ExitArgs;
2213   bool Conditional;
2214   llvm::BasicBlock *ContBlock = nullptr;
2215 
2216 public:
2217   CommonActionTy(llvm::FunctionCallee EnterCallee,
2218                  ArrayRef<llvm::Value *> EnterArgs,
2219                  llvm::FunctionCallee ExitCallee,
2220                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2221       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2222         ExitArgs(ExitArgs), Conditional(Conditional) {}
2223   void Enter(CodeGenFunction &CGF) override {
2224     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2225     if (Conditional) {
2226       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2227       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2228       ContBlock = CGF.createBasicBlock("omp_if.end");
2229       // Generate the branch (If-stmt)
2230       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2231       CGF.EmitBlock(ThenBlock);
2232     }
2233   }
2234   void Done(CodeGenFunction &CGF) {
2235     // Emit the rest of blocks/branches
2236     CGF.EmitBranch(ContBlock);
2237     CGF.EmitBlock(ContBlock, true);
2238   }
2239   void Exit(CodeGenFunction &CGF) override {
2240     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2241   }
2242 };
2243 } // anonymous namespace
2244 
2245 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2246                                          StringRef CriticalName,
2247                                          const RegionCodeGenTy &CriticalOpGen,
2248                                          SourceLocation Loc, const Expr *Hint) {
2249   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2250   // CriticalOpGen();
2251   // __kmpc_end_critical(ident_t *, gtid, Lock);
2252   // Prepare arguments and build a call to __kmpc_critical
2253   if (!CGF.HaveInsertPoint())
2254     return;
2255   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2256                          getCriticalRegionLock(CriticalName)};
2257   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2258                                                 std::end(Args));
2259   if (Hint) {
2260     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2261         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2262   }
2263   CommonActionTy Action(
2264       OMPBuilder.getOrCreateRuntimeFunction(
2265           CGM.getModule(),
2266           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2267       EnterArgs,
2268       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2269                                             OMPRTL___kmpc_end_critical),
2270       Args);
2271   CriticalOpGen.setAction(Action);
2272   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2273 }
2274 
2275 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2276                                        const RegionCodeGenTy &MasterOpGen,
2277                                        SourceLocation Loc) {
2278   if (!CGF.HaveInsertPoint())
2279     return;
2280   // if(__kmpc_master(ident_t *, gtid)) {
2281   //   MasterOpGen();
2282   //   __kmpc_end_master(ident_t *, gtid);
2283   // }
2284   // Prepare arguments and build a call to __kmpc_master
2285   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2286   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2287                             CGM.getModule(), OMPRTL___kmpc_master),
2288                         Args,
2289                         OMPBuilder.getOrCreateRuntimeFunction(
2290                             CGM.getModule(), OMPRTL___kmpc_end_master),
2291                         Args,
2292                         /*Conditional=*/true);
2293   MasterOpGen.setAction(Action);
2294   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2295   Action.Done(CGF);
2296 }
2297 
2298 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2299                                        const RegionCodeGenTy &MaskedOpGen,
2300                                        SourceLocation Loc, const Expr *Filter) {
2301   if (!CGF.HaveInsertPoint())
2302     return;
2303   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2304   //   MaskedOpGen();
2305   //   __kmpc_end_masked(iden_t *, gtid);
2306   // }
2307   // Prepare arguments and build a call to __kmpc_masked
2308   llvm::Value *FilterVal = Filter
2309                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2310                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2311   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2312                          FilterVal};
2313   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2314                             getThreadID(CGF, Loc)};
2315   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2316                             CGM.getModule(), OMPRTL___kmpc_masked),
2317                         Args,
2318                         OMPBuilder.getOrCreateRuntimeFunction(
2319                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2320                         ArgsEnd,
2321                         /*Conditional=*/true);
2322   MaskedOpGen.setAction(Action);
2323   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2324   Action.Done(CGF);
2325 }
2326 
2327 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2328                                         SourceLocation Loc) {
2329   if (!CGF.HaveInsertPoint())
2330     return;
2331   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2332     OMPBuilder.createTaskyield(CGF.Builder);
2333   } else {
2334     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2335     llvm::Value *Args[] = {
2336         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2337         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2338     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2339                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2340                         Args);
2341   }
2342 
2343   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2344     Region->emitUntiedSwitch(CGF);
2345 }
2346 
2347 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2348                                           const RegionCodeGenTy &TaskgroupOpGen,
2349                                           SourceLocation Loc) {
2350   if (!CGF.HaveInsertPoint())
2351     return;
2352   // __kmpc_taskgroup(ident_t *, gtid);
2353   // TaskgroupOpGen();
2354   // __kmpc_end_taskgroup(ident_t *, gtid);
2355   // Prepare arguments and build a call to __kmpc_taskgroup
2356   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2357   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2358                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2359                         Args,
2360                         OMPBuilder.getOrCreateRuntimeFunction(
2361                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2362                         Args);
2363   TaskgroupOpGen.setAction(Action);
2364   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2365 }
2366 
2367 /// Given an array of pointers to variables, project the address of a
2368 /// given variable.
2369 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2370                                       unsigned Index, const VarDecl *Var) {
2371   // Pull out the pointer to the variable.
2372   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2373   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2374 
2375   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2376   Addr = CGF.Builder.CreateElementBitCast(
2377       Addr, CGF.ConvertTypeForMem(Var->getType()));
2378   return Addr;
2379 }
2380 
2381 static llvm::Value *emitCopyprivateCopyFunction(
2382     CodeGenModule &CGM, llvm::Type *ArgsType,
2383     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2384     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2385     SourceLocation Loc) {
2386   ASTContext &C = CGM.getContext();
2387   // void copy_func(void *LHSArg, void *RHSArg);
2388   FunctionArgList Args;
2389   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2390                            ImplicitParamDecl::Other);
2391   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2392                            ImplicitParamDecl::Other);
2393   Args.push_back(&LHSArg);
2394   Args.push_back(&RHSArg);
2395   const auto &CGFI =
2396       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2397   std::string Name =
2398       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2399   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2400                                     llvm::GlobalValue::InternalLinkage, Name,
2401                                     &CGM.getModule());
2402   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2403   Fn->setDoesNotRecurse();
2404   CodeGenFunction CGF(CGM);
2405   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2406   // Dest = (void*[n])(LHSArg);
2407   // Src = (void*[n])(RHSArg);
2408   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2409       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2410       ArgsType), CGF.getPointerAlign());
2411   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2412       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2413       ArgsType), CGF.getPointerAlign());
2414   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2415   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2416   // ...
2417   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2418   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2419     const auto *DestVar =
2420         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2421     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2422 
2423     const auto *SrcVar =
2424         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2425     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2426 
2427     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2428     QualType Type = VD->getType();
2429     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2430   }
2431   CGF.FinishFunction();
2432   return Fn;
2433 }
2434 
2435 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2436                                        const RegionCodeGenTy &SingleOpGen,
2437                                        SourceLocation Loc,
2438                                        ArrayRef<const Expr *> CopyprivateVars,
2439                                        ArrayRef<const Expr *> SrcExprs,
2440                                        ArrayRef<const Expr *> DstExprs,
2441                                        ArrayRef<const Expr *> AssignmentOps) {
2442   if (!CGF.HaveInsertPoint())
2443     return;
2444   assert(CopyprivateVars.size() == SrcExprs.size() &&
2445          CopyprivateVars.size() == DstExprs.size() &&
2446          CopyprivateVars.size() == AssignmentOps.size());
2447   ASTContext &C = CGM.getContext();
2448   // int32 did_it = 0;
2449   // if(__kmpc_single(ident_t *, gtid)) {
2450   //   SingleOpGen();
2451   //   __kmpc_end_single(ident_t *, gtid);
2452   //   did_it = 1;
2453   // }
2454   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2455   // <copy_func>, did_it);
2456 
2457   Address DidIt = Address::invalid();
2458   if (!CopyprivateVars.empty()) {
2459     // int32 did_it = 0;
2460     QualType KmpInt32Ty =
2461         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2462     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2463     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2464   }
2465   // Prepare arguments and build a call to __kmpc_single
2466   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2467   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2468                             CGM.getModule(), OMPRTL___kmpc_single),
2469                         Args,
2470                         OMPBuilder.getOrCreateRuntimeFunction(
2471                             CGM.getModule(), OMPRTL___kmpc_end_single),
2472                         Args,
2473                         /*Conditional=*/true);
2474   SingleOpGen.setAction(Action);
2475   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2476   if (DidIt.isValid()) {
2477     // did_it = 1;
2478     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2479   }
2480   Action.Done(CGF);
2481   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2482   // <copy_func>, did_it);
2483   if (DidIt.isValid()) {
2484     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2485     QualType CopyprivateArrayTy = C.getConstantArrayType(
2486         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2487         /*IndexTypeQuals=*/0);
2488     // Create a list of all private variables for copyprivate.
2489     Address CopyprivateList =
2490         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2491     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2492       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2493       CGF.Builder.CreateStore(
2494           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2495               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2496               CGF.VoidPtrTy),
2497           Elem);
2498     }
2499     // Build function that copies private values from single region to all other
2500     // threads in the corresponding parallel region.
2501     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2502         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2503         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2504     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2505     Address CL =
2506       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2507                                                       CGF.VoidPtrTy);
2508     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2509     llvm::Value *Args[] = {
2510         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2511         getThreadID(CGF, Loc),        // i32 <gtid>
2512         BufSize,                      // size_t <buf_size>
2513         CL.getPointer(),              // void *<copyprivate list>
2514         CpyFn,                        // void (*) (void *, void *) <copy_func>
2515         DidItVal                      // i32 did_it
2516     };
2517     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2518                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2519                         Args);
2520   }
2521 }
2522 
2523 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2524                                         const RegionCodeGenTy &OrderedOpGen,
2525                                         SourceLocation Loc, bool IsThreads) {
2526   if (!CGF.HaveInsertPoint())
2527     return;
2528   // __kmpc_ordered(ident_t *, gtid);
2529   // OrderedOpGen();
2530   // __kmpc_end_ordered(ident_t *, gtid);
2531   // Prepare arguments and build a call to __kmpc_ordered
2532   if (IsThreads) {
2533     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2534     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2535                               CGM.getModule(), OMPRTL___kmpc_ordered),
2536                           Args,
2537                           OMPBuilder.getOrCreateRuntimeFunction(
2538                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2539                           Args);
2540     OrderedOpGen.setAction(Action);
2541     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2542     return;
2543   }
2544   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2545 }
2546 
2547 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2548   unsigned Flags;
2549   if (Kind == OMPD_for)
2550     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2551   else if (Kind == OMPD_sections)
2552     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2553   else if (Kind == OMPD_single)
2554     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2555   else if (Kind == OMPD_barrier)
2556     Flags = OMP_IDENT_BARRIER_EXPL;
2557   else
2558     Flags = OMP_IDENT_BARRIER_IMPL;
2559   return Flags;
2560 }
2561 
2562 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2563     CodeGenFunction &CGF, const OMPLoopDirective &S,
2564     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2565   // Check if the loop directive is actually a doacross loop directive. In this
2566   // case choose static, 1 schedule.
2567   if (llvm::any_of(
2568           S.getClausesOfKind<OMPOrderedClause>(),
2569           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2570     ScheduleKind = OMPC_SCHEDULE_static;
2571     // Chunk size is 1 in this case.
2572     llvm::APInt ChunkSize(32, 1);
2573     ChunkExpr = IntegerLiteral::Create(
2574         CGF.getContext(), ChunkSize,
2575         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2576         SourceLocation());
2577   }
2578 }
2579 
2580 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2581                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2582                                       bool ForceSimpleCall) {
2583   // Check if we should use the OMPBuilder
2584   auto *OMPRegionInfo =
2585       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2586   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2587     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2588         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2589     return;
2590   }
2591 
2592   if (!CGF.HaveInsertPoint())
2593     return;
2594   // Build call __kmpc_cancel_barrier(loc, thread_id);
2595   // Build call __kmpc_barrier(loc, thread_id);
2596   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2597   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2598   // thread_id);
2599   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2600                          getThreadID(CGF, Loc)};
2601   if (OMPRegionInfo) {
2602     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2603       llvm::Value *Result = CGF.EmitRuntimeCall(
2604           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2605                                                 OMPRTL___kmpc_cancel_barrier),
2606           Args);
2607       if (EmitChecks) {
2608         // if (__kmpc_cancel_barrier()) {
2609         //   exit from construct;
2610         // }
2611         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2612         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2613         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2614         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2615         CGF.EmitBlock(ExitBB);
2616         //   exit from construct;
2617         CodeGenFunction::JumpDest CancelDestination =
2618             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2619         CGF.EmitBranchThroughCleanup(CancelDestination);
2620         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2621       }
2622       return;
2623     }
2624   }
2625   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2626                           CGM.getModule(), OMPRTL___kmpc_barrier),
2627                       Args);
2628 }
2629 
2630 /// Map the OpenMP loop schedule to the runtime enumeration.
2631 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2632                                           bool Chunked, bool Ordered) {
2633   switch (ScheduleKind) {
2634   case OMPC_SCHEDULE_static:
2635     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2636                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2637   case OMPC_SCHEDULE_dynamic:
2638     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2639   case OMPC_SCHEDULE_guided:
2640     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2641   case OMPC_SCHEDULE_runtime:
2642     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2643   case OMPC_SCHEDULE_auto:
2644     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2645   case OMPC_SCHEDULE_unknown:
2646     assert(!Chunked && "chunk was specified but schedule kind not known");
2647     return Ordered ? OMP_ord_static : OMP_sch_static;
2648   }
2649   llvm_unreachable("Unexpected runtime schedule");
2650 }
2651 
2652 /// Map the OpenMP distribute schedule to the runtime enumeration.
2653 static OpenMPSchedType
2654 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2655   // only static is allowed for dist_schedule
2656   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2657 }
2658 
2659 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2660                                          bool Chunked) const {
2661   OpenMPSchedType Schedule =
2662       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2663   return Schedule == OMP_sch_static;
2664 }
2665 
2666 bool CGOpenMPRuntime::isStaticNonchunked(
2667     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2668   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2669   return Schedule == OMP_dist_sch_static;
2670 }
2671 
2672 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2673                                       bool Chunked) const {
2674   OpenMPSchedType Schedule =
2675       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2676   return Schedule == OMP_sch_static_chunked;
2677 }
2678 
2679 bool CGOpenMPRuntime::isStaticChunked(
2680     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2681   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2682   return Schedule == OMP_dist_sch_static_chunked;
2683 }
2684 
2685 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2686   OpenMPSchedType Schedule =
2687       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2688   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2689   return Schedule != OMP_sch_static;
2690 }
2691 
2692 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2693                                   OpenMPScheduleClauseModifier M1,
2694                                   OpenMPScheduleClauseModifier M2) {
2695   int Modifier = 0;
2696   switch (M1) {
2697   case OMPC_SCHEDULE_MODIFIER_monotonic:
2698     Modifier = OMP_sch_modifier_monotonic;
2699     break;
2700   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2701     Modifier = OMP_sch_modifier_nonmonotonic;
2702     break;
2703   case OMPC_SCHEDULE_MODIFIER_simd:
2704     if (Schedule == OMP_sch_static_chunked)
2705       Schedule = OMP_sch_static_balanced_chunked;
2706     break;
2707   case OMPC_SCHEDULE_MODIFIER_last:
2708   case OMPC_SCHEDULE_MODIFIER_unknown:
2709     break;
2710   }
2711   switch (M2) {
2712   case OMPC_SCHEDULE_MODIFIER_monotonic:
2713     Modifier = OMP_sch_modifier_monotonic;
2714     break;
2715   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2716     Modifier = OMP_sch_modifier_nonmonotonic;
2717     break;
2718   case OMPC_SCHEDULE_MODIFIER_simd:
2719     if (Schedule == OMP_sch_static_chunked)
2720       Schedule = OMP_sch_static_balanced_chunked;
2721     break;
2722   case OMPC_SCHEDULE_MODIFIER_last:
2723   case OMPC_SCHEDULE_MODIFIER_unknown:
2724     break;
2725   }
2726   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2727   // If the static schedule kind is specified or if the ordered clause is
2728   // specified, and if the nonmonotonic modifier is not specified, the effect is
2729   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2730   // modifier is specified, the effect is as if the nonmonotonic modifier is
2731   // specified.
2732   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2733     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2734           Schedule == OMP_sch_static_balanced_chunked ||
2735           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2736           Schedule == OMP_dist_sch_static_chunked ||
2737           Schedule == OMP_dist_sch_static))
2738       Modifier = OMP_sch_modifier_nonmonotonic;
2739   }
2740   return Schedule | Modifier;
2741 }
2742 
2743 void CGOpenMPRuntime::emitForDispatchInit(
2744     CodeGenFunction &CGF, SourceLocation Loc,
2745     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2746     bool Ordered, const DispatchRTInput &DispatchValues) {
2747   if (!CGF.HaveInsertPoint())
2748     return;
2749   OpenMPSchedType Schedule = getRuntimeSchedule(
2750       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2751   assert(Ordered ||
2752          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2753           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2754           Schedule != OMP_sch_static_balanced_chunked));
2755   // Call __kmpc_dispatch_init(
2756   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2757   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2758   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2759 
2760   // If the Chunk was not specified in the clause - use default value 1.
2761   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2762                                             : CGF.Builder.getIntN(IVSize, 1);
2763   llvm::Value *Args[] = {
2764       emitUpdateLocation(CGF, Loc),
2765       getThreadID(CGF, Loc),
2766       CGF.Builder.getInt32(addMonoNonMonoModifier(
2767           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2768       DispatchValues.LB,                                     // Lower
2769       DispatchValues.UB,                                     // Upper
2770       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2771       Chunk                                                  // Chunk
2772   };
2773   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2774 }
2775 
2776 static void emitForStaticInitCall(
2777     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2778     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2779     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2780     const CGOpenMPRuntime::StaticRTInput &Values) {
2781   if (!CGF.HaveInsertPoint())
2782     return;
2783 
2784   assert(!Values.Ordered);
2785   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2786          Schedule == OMP_sch_static_balanced_chunked ||
2787          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2788          Schedule == OMP_dist_sch_static ||
2789          Schedule == OMP_dist_sch_static_chunked);
2790 
2791   // Call __kmpc_for_static_init(
2792   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2793   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2794   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2795   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2796   llvm::Value *Chunk = Values.Chunk;
2797   if (Chunk == nullptr) {
2798     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2799             Schedule == OMP_dist_sch_static) &&
2800            "expected static non-chunked schedule");
2801     // If the Chunk was not specified in the clause - use default value 1.
2802     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2803   } else {
2804     assert((Schedule == OMP_sch_static_chunked ||
2805             Schedule == OMP_sch_static_balanced_chunked ||
2806             Schedule == OMP_ord_static_chunked ||
2807             Schedule == OMP_dist_sch_static_chunked) &&
2808            "expected static chunked schedule");
2809   }
2810   llvm::Value *Args[] = {
2811       UpdateLocation,
2812       ThreadId,
2813       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2814                                                   M2)), // Schedule type
2815       Values.IL.getPointer(),                           // &isLastIter
2816       Values.LB.getPointer(),                           // &LB
2817       Values.UB.getPointer(),                           // &UB
2818       Values.ST.getPointer(),                           // &Stride
2819       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2820       Chunk                                             // Chunk
2821   };
2822   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2823 }
2824 
2825 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2826                                         SourceLocation Loc,
2827                                         OpenMPDirectiveKind DKind,
2828                                         const OpenMPScheduleTy &ScheduleKind,
2829                                         const StaticRTInput &Values) {
2830   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2831       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2832   assert(isOpenMPWorksharingDirective(DKind) &&
2833          "Expected loop-based or sections-based directive.");
2834   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2835                                              isOpenMPLoopDirective(DKind)
2836                                                  ? OMP_IDENT_WORK_LOOP
2837                                                  : OMP_IDENT_WORK_SECTIONS);
2838   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2839   llvm::FunctionCallee StaticInitFunction =
2840       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2841   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2842   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2843                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2844 }
2845 
2846 void CGOpenMPRuntime::emitDistributeStaticInit(
2847     CodeGenFunction &CGF, SourceLocation Loc,
2848     OpenMPDistScheduleClauseKind SchedKind,
2849     const CGOpenMPRuntime::StaticRTInput &Values) {
2850   OpenMPSchedType ScheduleNum =
2851       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2852   llvm::Value *UpdatedLocation =
2853       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2854   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2855   llvm::FunctionCallee StaticInitFunction;
2856   bool isGPUDistribute =
2857       CGM.getLangOpts().OpenMPIsDevice &&
2858       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2859   StaticInitFunction = createForStaticInitFunction(
2860       Values.IVSize, Values.IVSigned, isGPUDistribute);
2861 
2862   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2863                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2864                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2865 }
2866 
2867 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2868                                           SourceLocation Loc,
2869                                           OpenMPDirectiveKind DKind) {
2870   if (!CGF.HaveInsertPoint())
2871     return;
2872   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2873   llvm::Value *Args[] = {
2874       emitUpdateLocation(CGF, Loc,
2875                          isOpenMPDistributeDirective(DKind)
2876                              ? OMP_IDENT_WORK_DISTRIBUTE
2877                              : isOpenMPLoopDirective(DKind)
2878                                    ? OMP_IDENT_WORK_LOOP
2879                                    : OMP_IDENT_WORK_SECTIONS),
2880       getThreadID(CGF, Loc)};
2881   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2882   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2883       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2884     CGF.EmitRuntimeCall(
2885         OMPBuilder.getOrCreateRuntimeFunction(
2886             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2887         Args);
2888   else
2889     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2890                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2891                         Args);
2892 }
2893 
2894 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2895                                                  SourceLocation Loc,
2896                                                  unsigned IVSize,
2897                                                  bool IVSigned) {
2898   if (!CGF.HaveInsertPoint())
2899     return;
2900   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2901   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2902   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2903 }
2904 
2905 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2906                                           SourceLocation Loc, unsigned IVSize,
2907                                           bool IVSigned, Address IL,
2908                                           Address LB, Address UB,
2909                                           Address ST) {
2910   // Call __kmpc_dispatch_next(
2911   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2912   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2913   //          kmp_int[32|64] *p_stride);
2914   llvm::Value *Args[] = {
2915       emitUpdateLocation(CGF, Loc),
2916       getThreadID(CGF, Loc),
2917       IL.getPointer(), // &isLastIter
2918       LB.getPointer(), // &Lower
2919       UB.getPointer(), // &Upper
2920       ST.getPointer()  // &Stride
2921   };
2922   llvm::Value *Call =
2923       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2924   return CGF.EmitScalarConversion(
2925       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2926       CGF.getContext().BoolTy, Loc);
2927 }
2928 
2929 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2930                                            llvm::Value *NumThreads,
2931                                            SourceLocation Loc) {
2932   if (!CGF.HaveInsertPoint())
2933     return;
2934   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2935   llvm::Value *Args[] = {
2936       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2937       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2938   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2939                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2940                       Args);
2941 }
2942 
2943 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2944                                          ProcBindKind ProcBind,
2945                                          SourceLocation Loc) {
2946   if (!CGF.HaveInsertPoint())
2947     return;
2948   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2949   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2950   llvm::Value *Args[] = {
2951       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2952       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2953   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2954                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2955                       Args);
2956 }
2957 
2958 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2959                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2960   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2961     OMPBuilder.createFlush(CGF.Builder);
2962   } else {
2963     if (!CGF.HaveInsertPoint())
2964       return;
2965     // Build call void __kmpc_flush(ident_t *loc)
2966     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2967                             CGM.getModule(), OMPRTL___kmpc_flush),
2968                         emitUpdateLocation(CGF, Loc));
2969   }
2970 }
2971 
2972 namespace {
2973 /// Indexes of fields for type kmp_task_t.
2974 enum KmpTaskTFields {
2975   /// List of shared variables.
2976   KmpTaskTShareds,
2977   /// Task routine.
2978   KmpTaskTRoutine,
2979   /// Partition id for the untied tasks.
2980   KmpTaskTPartId,
2981   /// Function with call of destructors for private variables.
2982   Data1,
2983   /// Task priority.
2984   Data2,
2985   /// (Taskloops only) Lower bound.
2986   KmpTaskTLowerBound,
2987   /// (Taskloops only) Upper bound.
2988   KmpTaskTUpperBound,
2989   /// (Taskloops only) Stride.
2990   KmpTaskTStride,
2991   /// (Taskloops only) Is last iteration flag.
2992   KmpTaskTLastIter,
2993   /// (Taskloops only) Reduction data.
2994   KmpTaskTReductions,
2995 };
2996 } // anonymous namespace
2997 
2998 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2999   return OffloadEntriesTargetRegion.empty() &&
3000          OffloadEntriesDeviceGlobalVar.empty();
3001 }
3002 
3003 /// Initialize target region entry.
3004 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3005     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3006                                     StringRef ParentName, unsigned LineNum,
3007                                     unsigned Order) {
3008   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3009                                              "only required for the device "
3010                                              "code generation.");
3011   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3012       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3013                                    OMPTargetRegionEntryTargetRegion);
3014   ++OffloadingEntriesNum;
3015 }
3016 
3017 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3018     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3019                                   StringRef ParentName, unsigned LineNum,
3020                                   llvm::Constant *Addr, llvm::Constant *ID,
3021                                   OMPTargetRegionEntryKind Flags) {
3022   // If we are emitting code for a target, the entry is already initialized,
3023   // only has to be registered.
3024   if (CGM.getLangOpts().OpenMPIsDevice) {
3025     // This could happen if the device compilation is invoked standalone.
3026     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3027       return;
3028     auto &Entry =
3029         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3030     Entry.setAddress(Addr);
3031     Entry.setID(ID);
3032     Entry.setFlags(Flags);
3033   } else {
3034     if (Flags ==
3035             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3036         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3037                                  /*IgnoreAddressId*/ true))
3038       return;
3039     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3040            "Target region entry already registered!");
3041     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3042     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3043     ++OffloadingEntriesNum;
3044   }
3045 }
3046 
3047 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3048     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3049     bool IgnoreAddressId) const {
3050   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3051   if (PerDevice == OffloadEntriesTargetRegion.end())
3052     return false;
3053   auto PerFile = PerDevice->second.find(FileID);
3054   if (PerFile == PerDevice->second.end())
3055     return false;
3056   auto PerParentName = PerFile->second.find(ParentName);
3057   if (PerParentName == PerFile->second.end())
3058     return false;
3059   auto PerLine = PerParentName->second.find(LineNum);
3060   if (PerLine == PerParentName->second.end())
3061     return false;
3062   // Fail if this entry is already registered.
3063   if (!IgnoreAddressId &&
3064       (PerLine->second.getAddress() || PerLine->second.getID()))
3065     return false;
3066   return true;
3067 }
3068 
3069 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3070     const OffloadTargetRegionEntryInfoActTy &Action) {
3071   // Scan all target region entries and perform the provided action.
3072   for (const auto &D : OffloadEntriesTargetRegion)
3073     for (const auto &F : D.second)
3074       for (const auto &P : F.second)
3075         for (const auto &L : P.second)
3076           Action(D.first, F.first, P.first(), L.first, L.second);
3077 }
3078 
3079 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3080     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3081                                        OMPTargetGlobalVarEntryKind Flags,
3082                                        unsigned Order) {
3083   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3084                                              "only required for the device "
3085                                              "code generation.");
3086   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3087   ++OffloadingEntriesNum;
3088 }
3089 
3090 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3091     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3092                                      CharUnits VarSize,
3093                                      OMPTargetGlobalVarEntryKind Flags,
3094                                      llvm::GlobalValue::LinkageTypes Linkage) {
3095   if (CGM.getLangOpts().OpenMPIsDevice) {
3096     // This could happen if the device compilation is invoked standalone.
3097     if (!hasDeviceGlobalVarEntryInfo(VarName))
3098       return;
3099     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3100     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3101       if (Entry.getVarSize().isZero()) {
3102         Entry.setVarSize(VarSize);
3103         Entry.setLinkage(Linkage);
3104       }
3105       return;
3106     }
3107     Entry.setVarSize(VarSize);
3108     Entry.setLinkage(Linkage);
3109     Entry.setAddress(Addr);
3110   } else {
3111     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3112       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3113       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3114              "Entry not initialized!");
3115       if (Entry.getVarSize().isZero()) {
3116         Entry.setVarSize(VarSize);
3117         Entry.setLinkage(Linkage);
3118       }
3119       return;
3120     }
3121     OffloadEntriesDeviceGlobalVar.try_emplace(
3122         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3123     ++OffloadingEntriesNum;
3124   }
3125 }
3126 
3127 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3128     actOnDeviceGlobalVarEntriesInfo(
3129         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3130   // Scan all target region entries and perform the provided action.
3131   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3132     Action(E.getKey(), E.getValue());
3133 }
3134 
3135 void CGOpenMPRuntime::createOffloadEntry(
3136     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3137     llvm::GlobalValue::LinkageTypes Linkage) {
3138   StringRef Name = Addr->getName();
3139   llvm::Module &M = CGM.getModule();
3140   llvm::LLVMContext &C = M.getContext();
3141 
3142   // Create constant string with the name.
3143   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3144 
3145   std::string StringName = getName({"omp_offloading", "entry_name"});
3146   auto *Str = new llvm::GlobalVariable(
3147       M, StrPtrInit->getType(), /*isConstant=*/true,
3148       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3149   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3150 
3151   llvm::Constant *Data[] = {
3152       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3153       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3154       llvm::ConstantInt::get(CGM.SizeTy, Size),
3155       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3156       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3157   std::string EntryName = getName({"omp_offloading", "entry", ""});
3158   llvm::GlobalVariable *Entry = createGlobalStruct(
3159       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3160       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3161 
3162   // The entry has to be created in the section the linker expects it to be.
3163   Entry->setSection("omp_offloading_entries");
3164 }
3165 
3166 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3167   // Emit the offloading entries and metadata so that the device codegen side
3168   // can easily figure out what to emit. The produced metadata looks like
3169   // this:
3170   //
3171   // !omp_offload.info = !{!1, ...}
3172   //
3173   // Right now we only generate metadata for function that contain target
3174   // regions.
3175 
3176   // If we are in simd mode or there are no entries, we don't need to do
3177   // anything.
3178   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3179     return;
3180 
3181   llvm::Module &M = CGM.getModule();
3182   llvm::LLVMContext &C = M.getContext();
3183   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3184                          SourceLocation, StringRef>,
3185               16>
3186       OrderedEntries(OffloadEntriesInfoManager.size());
3187   llvm::SmallVector<StringRef, 16> ParentFunctions(
3188       OffloadEntriesInfoManager.size());
3189 
3190   // Auxiliary methods to create metadata values and strings.
3191   auto &&GetMDInt = [this](unsigned V) {
3192     return llvm::ConstantAsMetadata::get(
3193         llvm::ConstantInt::get(CGM.Int32Ty, V));
3194   };
3195 
3196   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3197 
3198   // Create the offloading info metadata node.
3199   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3200 
3201   // Create function that emits metadata for each target region entry;
3202   auto &&TargetRegionMetadataEmitter =
3203       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3204        &GetMDString](
3205           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3206           unsigned Line,
3207           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3208         // Generate metadata for target regions. Each entry of this metadata
3209         // contains:
3210         // - Entry 0 -> Kind of this type of metadata (0).
3211         // - Entry 1 -> Device ID of the file where the entry was identified.
3212         // - Entry 2 -> File ID of the file where the entry was identified.
3213         // - Entry 3 -> Mangled name of the function where the entry was
3214         // identified.
3215         // - Entry 4 -> Line in the file where the entry was identified.
3216         // - Entry 5 -> Order the entry was created.
3217         // The first element of the metadata node is the kind.
3218         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3219                                  GetMDInt(FileID),      GetMDString(ParentName),
3220                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3221 
3222         SourceLocation Loc;
3223         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3224                   E = CGM.getContext().getSourceManager().fileinfo_end();
3225              I != E; ++I) {
3226           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3227               I->getFirst()->getUniqueID().getFile() == FileID) {
3228             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3229                 I->getFirst(), Line, 1);
3230             break;
3231           }
3232         }
3233         // Save this entry in the right position of the ordered entries array.
3234         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3235         ParentFunctions[E.getOrder()] = ParentName;
3236 
3237         // Add metadata to the named metadata node.
3238         MD->addOperand(llvm::MDNode::get(C, Ops));
3239       };
3240 
3241   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3242       TargetRegionMetadataEmitter);
3243 
3244   // Create function that emits metadata for each device global variable entry;
3245   auto &&DeviceGlobalVarMetadataEmitter =
3246       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3247        MD](StringRef MangledName,
3248            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3249                &E) {
3250         // Generate metadata for global variables. Each entry of this metadata
3251         // contains:
3252         // - Entry 0 -> Kind of this type of metadata (1).
3253         // - Entry 1 -> Mangled name of the variable.
3254         // - Entry 2 -> Declare target kind.
3255         // - Entry 3 -> Order the entry was created.
3256         // The first element of the metadata node is the kind.
3257         llvm::Metadata *Ops[] = {
3258             GetMDInt(E.getKind()), GetMDString(MangledName),
3259             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3260 
3261         // Save this entry in the right position of the ordered entries array.
3262         OrderedEntries[E.getOrder()] =
3263             std::make_tuple(&E, SourceLocation(), MangledName);
3264 
3265         // Add metadata to the named metadata node.
3266         MD->addOperand(llvm::MDNode::get(C, Ops));
3267       };
3268 
3269   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3270       DeviceGlobalVarMetadataEmitter);
3271 
3272   for (const auto &E : OrderedEntries) {
3273     assert(std::get<0>(E) && "All ordered entries must exist!");
3274     if (const auto *CE =
3275             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3276                 std::get<0>(E))) {
3277       if (!CE->getID() || !CE->getAddress()) {
3278         // Do not blame the entry if the parent funtion is not emitted.
3279         StringRef FnName = ParentFunctions[CE->getOrder()];
3280         if (!CGM.GetGlobalValue(FnName))
3281           continue;
3282         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3283             DiagnosticsEngine::Error,
3284             "Offloading entry for target region in %0 is incorrect: either the "
3285             "address or the ID is invalid.");
3286         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3287         continue;
3288       }
3289       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3290                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3291     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3292                                              OffloadEntryInfoDeviceGlobalVar>(
3293                    std::get<0>(E))) {
3294       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3295           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3296               CE->getFlags());
3297       switch (Flags) {
3298       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3299         if (CGM.getLangOpts().OpenMPIsDevice &&
3300             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3301           continue;
3302         if (!CE->getAddress()) {
3303           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3304               DiagnosticsEngine::Error, "Offloading entry for declare target "
3305                                         "variable %0 is incorrect: the "
3306                                         "address is invalid.");
3307           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3308           continue;
3309         }
3310         // The vaiable has no definition - no need to add the entry.
3311         if (CE->getVarSize().isZero())
3312           continue;
3313         break;
3314       }
3315       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3316         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3317                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3318                "Declaret target link address is set.");
3319         if (CGM.getLangOpts().OpenMPIsDevice)
3320           continue;
3321         if (!CE->getAddress()) {
3322           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3323               DiagnosticsEngine::Error,
3324               "Offloading entry for declare target variable is incorrect: the "
3325               "address is invalid.");
3326           CGM.getDiags().Report(DiagID);
3327           continue;
3328         }
3329         break;
3330       }
3331       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3332                          CE->getVarSize().getQuantity(), Flags,
3333                          CE->getLinkage());
3334     } else {
3335       llvm_unreachable("Unsupported entry kind.");
3336     }
3337   }
3338 }
3339 
3340 /// Loads all the offload entries information from the host IR
3341 /// metadata.
3342 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3343   // If we are in target mode, load the metadata from the host IR. This code has
3344   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3345 
3346   if (!CGM.getLangOpts().OpenMPIsDevice)
3347     return;
3348 
3349   if (CGM.getLangOpts().OMPHostIRFile.empty())
3350     return;
3351 
3352   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3353   if (auto EC = Buf.getError()) {
3354     CGM.getDiags().Report(diag::err_cannot_open_file)
3355         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3356     return;
3357   }
3358 
3359   llvm::LLVMContext C;
3360   auto ME = expectedToErrorOrAndEmitErrors(
3361       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3362 
3363   if (auto EC = ME.getError()) {
3364     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3365         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3366     CGM.getDiags().Report(DiagID)
3367         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3368     return;
3369   }
3370 
3371   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3372   if (!MD)
3373     return;
3374 
3375   for (llvm::MDNode *MN : MD->operands()) {
3376     auto &&GetMDInt = [MN](unsigned Idx) {
3377       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3378       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3379     };
3380 
3381     auto &&GetMDString = [MN](unsigned Idx) {
3382       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3383       return V->getString();
3384     };
3385 
3386     switch (GetMDInt(0)) {
3387     default:
3388       llvm_unreachable("Unexpected metadata!");
3389       break;
3390     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3391         OffloadingEntryInfoTargetRegion:
3392       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3393           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3394           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3395           /*Order=*/GetMDInt(5));
3396       break;
3397     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3398         OffloadingEntryInfoDeviceGlobalVar:
3399       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3400           /*MangledName=*/GetMDString(1),
3401           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3402               /*Flags=*/GetMDInt(2)),
3403           /*Order=*/GetMDInt(3));
3404       break;
3405     }
3406   }
3407 }
3408 
3409 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3410   if (!KmpRoutineEntryPtrTy) {
3411     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3412     ASTContext &C = CGM.getContext();
3413     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3414     FunctionProtoType::ExtProtoInfo EPI;
3415     KmpRoutineEntryPtrQTy = C.getPointerType(
3416         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3417     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3418   }
3419 }
3420 
3421 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3422   // Make sure the type of the entry is already created. This is the type we
3423   // have to create:
3424   // struct __tgt_offload_entry{
3425   //   void      *addr;       // Pointer to the offload entry info.
3426   //                          // (function or global)
3427   //   char      *name;       // Name of the function or global.
3428   //   size_t     size;       // Size of the entry info (0 if it a function).
3429   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3430   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3431   // };
3432   if (TgtOffloadEntryQTy.isNull()) {
3433     ASTContext &C = CGM.getContext();
3434     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3435     RD->startDefinition();
3436     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3437     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3438     addFieldToRecordDecl(C, RD, C.getSizeType());
3439     addFieldToRecordDecl(
3440         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3441     addFieldToRecordDecl(
3442         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3443     RD->completeDefinition();
3444     RD->addAttr(PackedAttr::CreateImplicit(C));
3445     TgtOffloadEntryQTy = C.getRecordType(RD);
3446   }
3447   return TgtOffloadEntryQTy;
3448 }
3449 
3450 namespace {
3451 struct PrivateHelpersTy {
3452   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3453                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3454       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3455         PrivateElemInit(PrivateElemInit) {}
3456   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3457   const Expr *OriginalRef = nullptr;
3458   const VarDecl *Original = nullptr;
3459   const VarDecl *PrivateCopy = nullptr;
3460   const VarDecl *PrivateElemInit = nullptr;
3461   bool isLocalPrivate() const {
3462     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3463   }
3464 };
3465 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3466 } // anonymous namespace
3467 
3468 static bool isAllocatableDecl(const VarDecl *VD) {
3469   const VarDecl *CVD = VD->getCanonicalDecl();
3470   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3471     return false;
3472   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3473   // Use the default allocation.
3474   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3475             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3476            !AA->getAllocator());
3477 }
3478 
3479 static RecordDecl *
3480 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3481   if (!Privates.empty()) {
3482     ASTContext &C = CGM.getContext();
3483     // Build struct .kmp_privates_t. {
3484     //         /*  private vars  */
3485     //       };
3486     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3487     RD->startDefinition();
3488     for (const auto &Pair : Privates) {
3489       const VarDecl *VD = Pair.second.Original;
3490       QualType Type = VD->getType().getNonReferenceType();
3491       // If the private variable is a local variable with lvalue ref type,
3492       // allocate the pointer instead of the pointee type.
3493       if (Pair.second.isLocalPrivate()) {
3494         if (VD->getType()->isLValueReferenceType())
3495           Type = C.getPointerType(Type);
3496         if (isAllocatableDecl(VD))
3497           Type = C.getPointerType(Type);
3498       }
3499       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3500       if (VD->hasAttrs()) {
3501         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3502              E(VD->getAttrs().end());
3503              I != E; ++I)
3504           FD->addAttr(*I);
3505       }
3506     }
3507     RD->completeDefinition();
3508     return RD;
3509   }
3510   return nullptr;
3511 }
3512 
3513 static RecordDecl *
3514 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3515                          QualType KmpInt32Ty,
3516                          QualType KmpRoutineEntryPointerQTy) {
3517   ASTContext &C = CGM.getContext();
3518   // Build struct kmp_task_t {
3519   //         void *              shareds;
3520   //         kmp_routine_entry_t routine;
3521   //         kmp_int32           part_id;
3522   //         kmp_cmplrdata_t data1;
3523   //         kmp_cmplrdata_t data2;
3524   // For taskloops additional fields:
3525   //         kmp_uint64          lb;
3526   //         kmp_uint64          ub;
3527   //         kmp_int64           st;
3528   //         kmp_int32           liter;
3529   //         void *              reductions;
3530   //       };
3531   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3532   UD->startDefinition();
3533   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3534   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3535   UD->completeDefinition();
3536   QualType KmpCmplrdataTy = C.getRecordType(UD);
3537   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3538   RD->startDefinition();
3539   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3540   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3541   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3542   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3543   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3544   if (isOpenMPTaskLoopDirective(Kind)) {
3545     QualType KmpUInt64Ty =
3546         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3547     QualType KmpInt64Ty =
3548         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3549     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3550     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3551     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3552     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3553     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3554   }
3555   RD->completeDefinition();
3556   return RD;
3557 }
3558 
3559 static RecordDecl *
3560 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3561                                      ArrayRef<PrivateDataTy> Privates) {
3562   ASTContext &C = CGM.getContext();
3563   // Build struct kmp_task_t_with_privates {
3564   //         kmp_task_t task_data;
3565   //         .kmp_privates_t. privates;
3566   //       };
3567   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3568   RD->startDefinition();
3569   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3570   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3571     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3572   RD->completeDefinition();
3573   return RD;
3574 }
3575 
3576 /// Emit a proxy function which accepts kmp_task_t as the second
3577 /// argument.
3578 /// \code
3579 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3580 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3581 ///   For taskloops:
3582 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3583 ///   tt->reductions, tt->shareds);
3584 ///   return 0;
3585 /// }
3586 /// \endcode
3587 static llvm::Function *
3588 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3589                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3590                       QualType KmpTaskTWithPrivatesPtrQTy,
3591                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3592                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3593                       llvm::Value *TaskPrivatesMap) {
3594   ASTContext &C = CGM.getContext();
3595   FunctionArgList Args;
3596   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3597                             ImplicitParamDecl::Other);
3598   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3599                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3600                                 ImplicitParamDecl::Other);
3601   Args.push_back(&GtidArg);
3602   Args.push_back(&TaskTypeArg);
3603   const auto &TaskEntryFnInfo =
3604       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3605   llvm::FunctionType *TaskEntryTy =
3606       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3607   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3608   auto *TaskEntry = llvm::Function::Create(
3609       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3610   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3611   TaskEntry->setDoesNotRecurse();
3612   CodeGenFunction CGF(CGM);
3613   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3614                     Loc, Loc);
3615 
3616   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3617   // tt,
3618   // For taskloops:
3619   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3620   // tt->task_data.shareds);
3621   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3622       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3623   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3624       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3625       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3626   const auto *KmpTaskTWithPrivatesQTyRD =
3627       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3628   LValue Base =
3629       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3630   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3631   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3632   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3633   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3634 
3635   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3636   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3637   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3638       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3639       CGF.ConvertTypeForMem(SharedsPtrTy));
3640 
3641   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3642   llvm::Value *PrivatesParam;
3643   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3644     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3645     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3646         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3647   } else {
3648     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3649   }
3650 
3651   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3652                                TaskPrivatesMap,
3653                                CGF.Builder
3654                                    .CreatePointerBitCastOrAddrSpaceCast(
3655                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3656                                    .getPointer()};
3657   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3658                                           std::end(CommonArgs));
3659   if (isOpenMPTaskLoopDirective(Kind)) {
3660     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3661     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3662     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3663     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3664     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3665     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3666     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3667     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3668     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3669     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3670     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3671     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3672     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3673     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3674     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3675     CallArgs.push_back(LBParam);
3676     CallArgs.push_back(UBParam);
3677     CallArgs.push_back(StParam);
3678     CallArgs.push_back(LIParam);
3679     CallArgs.push_back(RParam);
3680   }
3681   CallArgs.push_back(SharedsParam);
3682 
3683   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3684                                                   CallArgs);
3685   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3686                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3687   CGF.FinishFunction();
3688   return TaskEntry;
3689 }
3690 
3691 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3692                                             SourceLocation Loc,
3693                                             QualType KmpInt32Ty,
3694                                             QualType KmpTaskTWithPrivatesPtrQTy,
3695                                             QualType KmpTaskTWithPrivatesQTy) {
3696   ASTContext &C = CGM.getContext();
3697   FunctionArgList Args;
3698   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3699                             ImplicitParamDecl::Other);
3700   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3701                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3702                                 ImplicitParamDecl::Other);
3703   Args.push_back(&GtidArg);
3704   Args.push_back(&TaskTypeArg);
3705   const auto &DestructorFnInfo =
3706       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3707   llvm::FunctionType *DestructorFnTy =
3708       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3709   std::string Name =
3710       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3711   auto *DestructorFn =
3712       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3713                              Name, &CGM.getModule());
3714   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3715                                     DestructorFnInfo);
3716   DestructorFn->setDoesNotRecurse();
3717   CodeGenFunction CGF(CGM);
3718   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3719                     Args, Loc, Loc);
3720 
3721   LValue Base = CGF.EmitLoadOfPointerLValue(
3722       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3723       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3724   const auto *KmpTaskTWithPrivatesQTyRD =
3725       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3726   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3727   Base = CGF.EmitLValueForField(Base, *FI);
3728   for (const auto *Field :
3729        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3730     if (QualType::DestructionKind DtorKind =
3731             Field->getType().isDestructedType()) {
3732       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3733       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3734     }
3735   }
3736   CGF.FinishFunction();
3737   return DestructorFn;
3738 }
3739 
3740 /// Emit a privates mapping function for correct handling of private and
3741 /// firstprivate variables.
3742 /// \code
3743 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3744 /// **noalias priv1,...,  <tyn> **noalias privn) {
3745 ///   *priv1 = &.privates.priv1;
3746 ///   ...;
3747 ///   *privn = &.privates.privn;
3748 /// }
3749 /// \endcode
3750 static llvm::Value *
3751 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3752                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3753                                ArrayRef<PrivateDataTy> Privates) {
3754   ASTContext &C = CGM.getContext();
3755   FunctionArgList Args;
3756   ImplicitParamDecl TaskPrivatesArg(
3757       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3758       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3759       ImplicitParamDecl::Other);
3760   Args.push_back(&TaskPrivatesArg);
3761   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3762   unsigned Counter = 1;
3763   for (const Expr *E : Data.PrivateVars) {
3764     Args.push_back(ImplicitParamDecl::Create(
3765         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3766         C.getPointerType(C.getPointerType(E->getType()))
3767             .withConst()
3768             .withRestrict(),
3769         ImplicitParamDecl::Other));
3770     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3771     PrivateVarsPos[VD] = Counter;
3772     ++Counter;
3773   }
3774   for (const Expr *E : Data.FirstprivateVars) {
3775     Args.push_back(ImplicitParamDecl::Create(
3776         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3777         C.getPointerType(C.getPointerType(E->getType()))
3778             .withConst()
3779             .withRestrict(),
3780         ImplicitParamDecl::Other));
3781     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3782     PrivateVarsPos[VD] = Counter;
3783     ++Counter;
3784   }
3785   for (const Expr *E : Data.LastprivateVars) {
3786     Args.push_back(ImplicitParamDecl::Create(
3787         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3788         C.getPointerType(C.getPointerType(E->getType()))
3789             .withConst()
3790             .withRestrict(),
3791         ImplicitParamDecl::Other));
3792     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3793     PrivateVarsPos[VD] = Counter;
3794     ++Counter;
3795   }
3796   for (const VarDecl *VD : Data.PrivateLocals) {
3797     QualType Ty = VD->getType().getNonReferenceType();
3798     if (VD->getType()->isLValueReferenceType())
3799       Ty = C.getPointerType(Ty);
3800     if (isAllocatableDecl(VD))
3801       Ty = C.getPointerType(Ty);
3802     Args.push_back(ImplicitParamDecl::Create(
3803         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3804         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3805         ImplicitParamDecl::Other));
3806     PrivateVarsPos[VD] = Counter;
3807     ++Counter;
3808   }
3809   const auto &TaskPrivatesMapFnInfo =
3810       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3811   llvm::FunctionType *TaskPrivatesMapTy =
3812       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3813   std::string Name =
3814       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3815   auto *TaskPrivatesMap = llvm::Function::Create(
3816       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3817       &CGM.getModule());
3818   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3819                                     TaskPrivatesMapFnInfo);
3820   if (CGM.getLangOpts().Optimize) {
3821     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3822     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3823     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3824   }
3825   CodeGenFunction CGF(CGM);
3826   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3827                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3828 
3829   // *privi = &.privates.privi;
3830   LValue Base = CGF.EmitLoadOfPointerLValue(
3831       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3832       TaskPrivatesArg.getType()->castAs<PointerType>());
3833   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3834   Counter = 0;
3835   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3836     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3837     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3838     LValue RefLVal =
3839         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3840     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3841         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3842     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3843     ++Counter;
3844   }
3845   CGF.FinishFunction();
3846   return TaskPrivatesMap;
3847 }
3848 
3849 /// Emit initialization for private variables in task-based directives.
3850 static void emitPrivatesInit(CodeGenFunction &CGF,
3851                              const OMPExecutableDirective &D,
3852                              Address KmpTaskSharedsPtr, LValue TDBase,
3853                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3854                              QualType SharedsTy, QualType SharedsPtrTy,
3855                              const OMPTaskDataTy &Data,
3856                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3857   ASTContext &C = CGF.getContext();
3858   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3859   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3860   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3861                                  ? OMPD_taskloop
3862                                  : OMPD_task;
3863   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3864   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3865   LValue SrcBase;
3866   bool IsTargetTask =
3867       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3868       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3869   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3870   // PointersArray, SizesArray, and MappersArray. The original variables for
3871   // these arrays are not captured and we get their addresses explicitly.
3872   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3873       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3874     SrcBase = CGF.MakeAddrLValue(
3875         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3876             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3877         SharedsTy);
3878   }
3879   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3880   for (const PrivateDataTy &Pair : Privates) {
3881     // Do not initialize private locals.
3882     if (Pair.second.isLocalPrivate()) {
3883       ++FI;
3884       continue;
3885     }
3886     const VarDecl *VD = Pair.second.PrivateCopy;
3887     const Expr *Init = VD->getAnyInitializer();
3888     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3889                              !CGF.isTrivialInitializer(Init)))) {
3890       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3891       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3892         const VarDecl *OriginalVD = Pair.second.Original;
3893         // Check if the variable is the target-based BasePointersArray,
3894         // PointersArray, SizesArray, or MappersArray.
3895         LValue SharedRefLValue;
3896         QualType Type = PrivateLValue.getType();
3897         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3898         if (IsTargetTask && !SharedField) {
3899           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3900                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3901                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3902                          ->getNumParams() == 0 &&
3903                  isa<TranslationUnitDecl>(
3904                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3905                          ->getDeclContext()) &&
3906                  "Expected artificial target data variable.");
3907           SharedRefLValue =
3908               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3909         } else if (ForDup) {
3910           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3911           SharedRefLValue = CGF.MakeAddrLValue(
3912               Address(SharedRefLValue.getPointer(CGF),
3913                       C.getDeclAlign(OriginalVD)),
3914               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3915               SharedRefLValue.getTBAAInfo());
3916         } else if (CGF.LambdaCaptureFields.count(
3917                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3918                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3919           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3920         } else {
3921           // Processing for implicitly captured variables.
3922           InlinedOpenMPRegionRAII Region(
3923               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3924               /*HasCancel=*/false, /*NoInheritance=*/true);
3925           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3926         }
3927         if (Type->isArrayType()) {
3928           // Initialize firstprivate array.
3929           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3930             // Perform simple memcpy.
3931             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3932           } else {
3933             // Initialize firstprivate array using element-by-element
3934             // initialization.
3935             CGF.EmitOMPAggregateAssign(
3936                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3937                 Type,
3938                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3939                                                   Address SrcElement) {
3940                   // Clean up any temporaries needed by the initialization.
3941                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3942                   InitScope.addPrivate(
3943                       Elem, [SrcElement]() -> Address { return SrcElement; });
3944                   (void)InitScope.Privatize();
3945                   // Emit initialization for single element.
3946                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3947                       CGF, &CapturesInfo);
3948                   CGF.EmitAnyExprToMem(Init, DestElement,
3949                                        Init->getType().getQualifiers(),
3950                                        /*IsInitializer=*/false);
3951                 });
3952           }
3953         } else {
3954           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3955           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3956             return SharedRefLValue.getAddress(CGF);
3957           });
3958           (void)InitScope.Privatize();
3959           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3960           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3961                              /*capturedByInit=*/false);
3962         }
3963       } else {
3964         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3965       }
3966     }
3967     ++FI;
3968   }
3969 }
3970 
3971 /// Check if duplication function is required for taskloops.
3972 static bool checkInitIsRequired(CodeGenFunction &CGF,
3973                                 ArrayRef<PrivateDataTy> Privates) {
3974   bool InitRequired = false;
3975   for (const PrivateDataTy &Pair : Privates) {
3976     if (Pair.second.isLocalPrivate())
3977       continue;
3978     const VarDecl *VD = Pair.second.PrivateCopy;
3979     const Expr *Init = VD->getAnyInitializer();
3980     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3981                                     !CGF.isTrivialInitializer(Init));
3982     if (InitRequired)
3983       break;
3984   }
3985   return InitRequired;
3986 }
3987 
3988 
3989 /// Emit task_dup function (for initialization of
3990 /// private/firstprivate/lastprivate vars and last_iter flag)
3991 /// \code
3992 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3993 /// lastpriv) {
3994 /// // setup lastprivate flag
3995 ///    task_dst->last = lastpriv;
3996 /// // could be constructor calls here...
3997 /// }
3998 /// \endcode
3999 static llvm::Value *
4000 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4001                     const OMPExecutableDirective &D,
4002                     QualType KmpTaskTWithPrivatesPtrQTy,
4003                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4004                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4005                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4006                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4007   ASTContext &C = CGM.getContext();
4008   FunctionArgList Args;
4009   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4010                            KmpTaskTWithPrivatesPtrQTy,
4011                            ImplicitParamDecl::Other);
4012   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4013                            KmpTaskTWithPrivatesPtrQTy,
4014                            ImplicitParamDecl::Other);
4015   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4016                                 ImplicitParamDecl::Other);
4017   Args.push_back(&DstArg);
4018   Args.push_back(&SrcArg);
4019   Args.push_back(&LastprivArg);
4020   const auto &TaskDupFnInfo =
4021       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4022   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4023   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4024   auto *TaskDup = llvm::Function::Create(
4025       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4026   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4027   TaskDup->setDoesNotRecurse();
4028   CodeGenFunction CGF(CGM);
4029   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4030                     Loc);
4031 
4032   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4033       CGF.GetAddrOfLocalVar(&DstArg),
4034       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4035   // task_dst->liter = lastpriv;
4036   if (WithLastIter) {
4037     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4038     LValue Base = CGF.EmitLValueForField(
4039         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4040     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4041     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4042         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4043     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4044   }
4045 
4046   // Emit initial values for private copies (if any).
4047   assert(!Privates.empty());
4048   Address KmpTaskSharedsPtr = Address::invalid();
4049   if (!Data.FirstprivateVars.empty()) {
4050     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4051         CGF.GetAddrOfLocalVar(&SrcArg),
4052         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4053     LValue Base = CGF.EmitLValueForField(
4054         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4055     KmpTaskSharedsPtr = Address(
4056         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4057                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4058                                                   KmpTaskTShareds)),
4059                              Loc),
4060         CGM.getNaturalTypeAlignment(SharedsTy));
4061   }
4062   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4063                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4064   CGF.FinishFunction();
4065   return TaskDup;
4066 }
4067 
4068 /// Checks if destructor function is required to be generated.
4069 /// \return true if cleanups are required, false otherwise.
4070 static bool
4071 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4072                          ArrayRef<PrivateDataTy> Privates) {
4073   for (const PrivateDataTy &P : Privates) {
4074     if (P.second.isLocalPrivate())
4075       continue;
4076     QualType Ty = P.second.Original->getType().getNonReferenceType();
4077     if (Ty.isDestructedType())
4078       return true;
4079   }
4080   return false;
4081 }
4082 
4083 namespace {
4084 /// Loop generator for OpenMP iterator expression.
4085 class OMPIteratorGeneratorScope final
4086     : public CodeGenFunction::OMPPrivateScope {
4087   CodeGenFunction &CGF;
4088   const OMPIteratorExpr *E = nullptr;
4089   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4090   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4091   OMPIteratorGeneratorScope() = delete;
4092   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4093 
4094 public:
4095   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4096       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4097     if (!E)
4098       return;
4099     SmallVector<llvm::Value *, 4> Uppers;
4100     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4101       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4102       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4103       addPrivate(VD, [&CGF, VD]() {
4104         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4105       });
4106       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4107       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4108         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4109                                  "counter.addr");
4110       });
4111     }
4112     Privatize();
4113 
4114     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4115       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4116       LValue CLVal =
4117           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4118                              HelperData.CounterVD->getType());
4119       // Counter = 0;
4120       CGF.EmitStoreOfScalar(
4121           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4122           CLVal);
4123       CodeGenFunction::JumpDest &ContDest =
4124           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4125       CodeGenFunction::JumpDest &ExitDest =
4126           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4127       // N = <number-of_iterations>;
4128       llvm::Value *N = Uppers[I];
4129       // cont:
4130       // if (Counter < N) goto body; else goto exit;
4131       CGF.EmitBlock(ContDest.getBlock());
4132       auto *CVal =
4133           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4134       llvm::Value *Cmp =
4135           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4136               ? CGF.Builder.CreateICmpSLT(CVal, N)
4137               : CGF.Builder.CreateICmpULT(CVal, N);
4138       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4139       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4140       // body:
4141       CGF.EmitBlock(BodyBB);
4142       // Iteri = Begini + Counter * Stepi;
4143       CGF.EmitIgnoredExpr(HelperData.Update);
4144     }
4145   }
4146   ~OMPIteratorGeneratorScope() {
4147     if (!E)
4148       return;
4149     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4150       // Counter = Counter + 1;
4151       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4152       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4153       // goto cont;
4154       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4155       // exit:
4156       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4157     }
4158   }
4159 };
4160 } // namespace
4161 
4162 static std::pair<llvm::Value *, llvm::Value *>
4163 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4164   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4165   llvm::Value *Addr;
4166   if (OASE) {
4167     const Expr *Base = OASE->getBase();
4168     Addr = CGF.EmitScalarExpr(Base);
4169   } else {
4170     Addr = CGF.EmitLValue(E).getPointer(CGF);
4171   }
4172   llvm::Value *SizeVal;
4173   QualType Ty = E->getType();
4174   if (OASE) {
4175     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4176     for (const Expr *SE : OASE->getDimensions()) {
4177       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4178       Sz = CGF.EmitScalarConversion(
4179           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4180       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4181     }
4182   } else if (const auto *ASE =
4183                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4184     LValue UpAddrLVal =
4185         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4186     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4187     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4188         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4189     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4190     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4191     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4192   } else {
4193     SizeVal = CGF.getTypeSize(Ty);
4194   }
4195   return std::make_pair(Addr, SizeVal);
4196 }
4197 
4198 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4199 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4200   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4201   if (KmpTaskAffinityInfoTy.isNull()) {
4202     RecordDecl *KmpAffinityInfoRD =
4203         C.buildImplicitRecord("kmp_task_affinity_info_t");
4204     KmpAffinityInfoRD->startDefinition();
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4206     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4207     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4208     KmpAffinityInfoRD->completeDefinition();
4209     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4210   }
4211 }
4212 
4213 CGOpenMPRuntime::TaskResultTy
4214 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4215                               const OMPExecutableDirective &D,
4216                               llvm::Function *TaskFunction, QualType SharedsTy,
4217                               Address Shareds, const OMPTaskDataTy &Data) {
4218   ASTContext &C = CGM.getContext();
4219   llvm::SmallVector<PrivateDataTy, 4> Privates;
4220   // Aggregate privates and sort them by the alignment.
4221   const auto *I = Data.PrivateCopies.begin();
4222   for (const Expr *E : Data.PrivateVars) {
4223     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4224     Privates.emplace_back(
4225         C.getDeclAlign(VD),
4226         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4227                          /*PrivateElemInit=*/nullptr));
4228     ++I;
4229   }
4230   I = Data.FirstprivateCopies.begin();
4231   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4232   for (const Expr *E : Data.FirstprivateVars) {
4233     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4234     Privates.emplace_back(
4235         C.getDeclAlign(VD),
4236         PrivateHelpersTy(
4237             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4238             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4239     ++I;
4240     ++IElemInitRef;
4241   }
4242   I = Data.LastprivateCopies.begin();
4243   for (const Expr *E : Data.LastprivateVars) {
4244     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4245     Privates.emplace_back(
4246         C.getDeclAlign(VD),
4247         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4248                          /*PrivateElemInit=*/nullptr));
4249     ++I;
4250   }
4251   for (const VarDecl *VD : Data.PrivateLocals) {
4252     if (isAllocatableDecl(VD))
4253       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4254     else
4255       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4256   }
4257   llvm::stable_sort(Privates,
4258                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4259                       return L.first > R.first;
4260                     });
4261   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4262   // Build type kmp_routine_entry_t (if not built yet).
4263   emitKmpRoutineEntryT(KmpInt32Ty);
4264   // Build type kmp_task_t (if not built yet).
4265   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4266     if (SavedKmpTaskloopTQTy.isNull()) {
4267       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4268           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4269     }
4270     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4271   } else {
4272     assert((D.getDirectiveKind() == OMPD_task ||
4273             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4274             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4275            "Expected taskloop, task or target directive");
4276     if (SavedKmpTaskTQTy.isNull()) {
4277       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4278           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4279     }
4280     KmpTaskTQTy = SavedKmpTaskTQTy;
4281   }
4282   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4283   // Build particular struct kmp_task_t for the given task.
4284   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4285       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4286   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4287   QualType KmpTaskTWithPrivatesPtrQTy =
4288       C.getPointerType(KmpTaskTWithPrivatesQTy);
4289   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4290   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4291       KmpTaskTWithPrivatesTy->getPointerTo();
4292   llvm::Value *KmpTaskTWithPrivatesTySize =
4293       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4294   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4295 
4296   // Emit initial values for private copies (if any).
4297   llvm::Value *TaskPrivatesMap = nullptr;
4298   llvm::Type *TaskPrivatesMapTy =
4299       std::next(TaskFunction->arg_begin(), 3)->getType();
4300   if (!Privates.empty()) {
4301     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4302     TaskPrivatesMap =
4303         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4304     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4305         TaskPrivatesMap, TaskPrivatesMapTy);
4306   } else {
4307     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4308         cast<llvm::PointerType>(TaskPrivatesMapTy));
4309   }
4310   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4311   // kmp_task_t *tt);
4312   llvm::Function *TaskEntry = emitProxyTaskFunction(
4313       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4314       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4315       TaskPrivatesMap);
4316 
4317   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4318   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4319   // kmp_routine_entry_t *task_entry);
4320   // Task flags. Format is taken from
4321   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4322   // description of kmp_tasking_flags struct.
4323   enum {
4324     TiedFlag = 0x1,
4325     FinalFlag = 0x2,
4326     DestructorsFlag = 0x8,
4327     PriorityFlag = 0x20,
4328     DetachableFlag = 0x40,
4329   };
4330   unsigned Flags = Data.Tied ? TiedFlag : 0;
4331   bool NeedsCleanup = false;
4332   if (!Privates.empty()) {
4333     NeedsCleanup =
4334         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4335     if (NeedsCleanup)
4336       Flags = Flags | DestructorsFlag;
4337   }
4338   if (Data.Priority.getInt())
4339     Flags = Flags | PriorityFlag;
4340   if (D.hasClausesOfKind<OMPDetachClause>())
4341     Flags = Flags | DetachableFlag;
4342   llvm::Value *TaskFlags =
4343       Data.Final.getPointer()
4344           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4345                                      CGF.Builder.getInt32(FinalFlag),
4346                                      CGF.Builder.getInt32(/*C=*/0))
4347           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4348   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4349   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4350   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4351       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4352       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4353           TaskEntry, KmpRoutineEntryPtrTy)};
4354   llvm::Value *NewTask;
4355   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4356     // Check if we have any device clause associated with the directive.
4357     const Expr *Device = nullptr;
4358     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4359       Device = C->getDevice();
4360     // Emit device ID if any otherwise use default value.
4361     llvm::Value *DeviceID;
4362     if (Device)
4363       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4364                                            CGF.Int64Ty, /*isSigned=*/true);
4365     else
4366       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4367     AllocArgs.push_back(DeviceID);
4368     NewTask = CGF.EmitRuntimeCall(
4369         OMPBuilder.getOrCreateRuntimeFunction(
4370             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4371         AllocArgs);
4372   } else {
4373     NewTask =
4374         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4375                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4376                             AllocArgs);
4377   }
4378   // Emit detach clause initialization.
4379   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4380   // task_descriptor);
4381   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4382     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4383     LValue EvtLVal = CGF.EmitLValue(Evt);
4384 
4385     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4386     // int gtid, kmp_task_t *task);
4387     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4388     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4389     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4390     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4391         OMPBuilder.getOrCreateRuntimeFunction(
4392             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4393         {Loc, Tid, NewTask});
4394     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4395                                       Evt->getExprLoc());
4396     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4397   }
4398   // Process affinity clauses.
4399   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4400     // Process list of affinity data.
4401     ASTContext &C = CGM.getContext();
4402     Address AffinitiesArray = Address::invalid();
4403     // Calculate number of elements to form the array of affinity data.
4404     llvm::Value *NumOfElements = nullptr;
4405     unsigned NumAffinities = 0;
4406     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4407       if (const Expr *Modifier = C->getModifier()) {
4408         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4409         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4410           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4411           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4412           NumOfElements =
4413               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4414         }
4415       } else {
4416         NumAffinities += C->varlist_size();
4417       }
4418     }
4419     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4420     // Fields ids in kmp_task_affinity_info record.
4421     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4422 
4423     QualType KmpTaskAffinityInfoArrayTy;
4424     if (NumOfElements) {
4425       NumOfElements = CGF.Builder.CreateNUWAdd(
4426           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4427       auto *OVE = new (C) OpaqueValueExpr(
4428           Loc,
4429           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4430           VK_PRValue);
4431       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4432                                                     RValue::get(NumOfElements));
4433       KmpTaskAffinityInfoArrayTy =
4434           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4435                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4436       // Properly emit variable-sized array.
4437       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4438                                            ImplicitParamDecl::Other);
4439       CGF.EmitVarDecl(*PD);
4440       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4441       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4442                                                 /*isSigned=*/false);
4443     } else {
4444       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4445           KmpTaskAffinityInfoTy,
4446           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4447           ArrayType::Normal, /*IndexTypeQuals=*/0);
4448       AffinitiesArray =
4449           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4450       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4451       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4452                                              /*isSigned=*/false);
4453     }
4454 
4455     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4456     // Fill array by elements without iterators.
4457     unsigned Pos = 0;
4458     bool HasIterator = false;
4459     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4460       if (C->getModifier()) {
4461         HasIterator = true;
4462         continue;
4463       }
4464       for (const Expr *E : C->varlists()) {
4465         llvm::Value *Addr;
4466         llvm::Value *Size;
4467         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4468         LValue Base =
4469             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4470                                KmpTaskAffinityInfoTy);
4471         // affs[i].base_addr = &<Affinities[i].second>;
4472         LValue BaseAddrLVal = CGF.EmitLValueForField(
4473             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4474         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4475                               BaseAddrLVal);
4476         // affs[i].len = sizeof(<Affinities[i].second>);
4477         LValue LenLVal = CGF.EmitLValueForField(
4478             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4479         CGF.EmitStoreOfScalar(Size, LenLVal);
4480         ++Pos;
4481       }
4482     }
4483     LValue PosLVal;
4484     if (HasIterator) {
4485       PosLVal = CGF.MakeAddrLValue(
4486           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4487           C.getSizeType());
4488       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4489     }
4490     // Process elements with iterators.
4491     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4492       const Expr *Modifier = C->getModifier();
4493       if (!Modifier)
4494         continue;
4495       OMPIteratorGeneratorScope IteratorScope(
4496           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4497       for (const Expr *E : C->varlists()) {
4498         llvm::Value *Addr;
4499         llvm::Value *Size;
4500         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4501         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4502         LValue Base = CGF.MakeAddrLValue(
4503             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4504                                           AffinitiesArray.getPointer(), Idx),
4505                     AffinitiesArray.getAlignment()),
4506             KmpTaskAffinityInfoTy);
4507         // affs[i].base_addr = &<Affinities[i].second>;
4508         LValue BaseAddrLVal = CGF.EmitLValueForField(
4509             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4510         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4511                               BaseAddrLVal);
4512         // affs[i].len = sizeof(<Affinities[i].second>);
4513         LValue LenLVal = CGF.EmitLValueForField(
4514             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4515         CGF.EmitStoreOfScalar(Size, LenLVal);
4516         Idx = CGF.Builder.CreateNUWAdd(
4517             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4518         CGF.EmitStoreOfScalar(Idx, PosLVal);
4519       }
4520     }
4521     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4522     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4523     // naffins, kmp_task_affinity_info_t *affin_list);
4524     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4525     llvm::Value *GTid = getThreadID(CGF, Loc);
4526     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4527         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4528     // FIXME: Emit the function and ignore its result for now unless the
4529     // runtime function is properly implemented.
4530     (void)CGF.EmitRuntimeCall(
4531         OMPBuilder.getOrCreateRuntimeFunction(
4532             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4533         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4534   }
4535   llvm::Value *NewTaskNewTaskTTy =
4536       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4537           NewTask, KmpTaskTWithPrivatesPtrTy);
4538   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4539                                                KmpTaskTWithPrivatesQTy);
4540   LValue TDBase =
4541       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4542   // Fill the data in the resulting kmp_task_t record.
4543   // Copy shareds if there are any.
4544   Address KmpTaskSharedsPtr = Address::invalid();
4545   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4546     KmpTaskSharedsPtr =
4547         Address(CGF.EmitLoadOfScalar(
4548                     CGF.EmitLValueForField(
4549                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4550                                            KmpTaskTShareds)),
4551                     Loc),
4552                 CGM.getNaturalTypeAlignment(SharedsTy));
4553     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4554     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4555     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4556   }
4557   // Emit initial values for private copies (if any).
4558   TaskResultTy Result;
4559   if (!Privates.empty()) {
4560     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4561                      SharedsTy, SharedsPtrTy, Data, Privates,
4562                      /*ForDup=*/false);
4563     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4564         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4565       Result.TaskDupFn = emitTaskDupFunction(
4566           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4567           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4568           /*WithLastIter=*/!Data.LastprivateVars.empty());
4569     }
4570   }
4571   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4572   enum { Priority = 0, Destructors = 1 };
4573   // Provide pointer to function with destructors for privates.
4574   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4575   const RecordDecl *KmpCmplrdataUD =
4576       (*FI)->getType()->getAsUnionType()->getDecl();
4577   if (NeedsCleanup) {
4578     llvm::Value *DestructorFn = emitDestructorsFunction(
4579         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4580         KmpTaskTWithPrivatesQTy);
4581     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4582     LValue DestructorsLV = CGF.EmitLValueForField(
4583         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4584     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4585                               DestructorFn, KmpRoutineEntryPtrTy),
4586                           DestructorsLV);
4587   }
4588   // Set priority.
4589   if (Data.Priority.getInt()) {
4590     LValue Data2LV = CGF.EmitLValueForField(
4591         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4592     LValue PriorityLV = CGF.EmitLValueForField(
4593         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4594     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4595   }
4596   Result.NewTask = NewTask;
4597   Result.TaskEntry = TaskEntry;
4598   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4599   Result.TDBase = TDBase;
4600   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4601   return Result;
4602 }
4603 
4604 namespace {
4605 /// Dependence kind for RTL.
4606 enum RTLDependenceKindTy {
4607   DepIn = 0x01,
4608   DepInOut = 0x3,
4609   DepMutexInOutSet = 0x4
4610 };
4611 /// Fields ids in kmp_depend_info record.
4612 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4613 } // namespace
4614 
4615 /// Translates internal dependency kind into the runtime kind.
4616 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4617   RTLDependenceKindTy DepKind;
4618   switch (K) {
4619   case OMPC_DEPEND_in:
4620     DepKind = DepIn;
4621     break;
4622   // Out and InOut dependencies must use the same code.
4623   case OMPC_DEPEND_out:
4624   case OMPC_DEPEND_inout:
4625     DepKind = DepInOut;
4626     break;
4627   case OMPC_DEPEND_mutexinoutset:
4628     DepKind = DepMutexInOutSet;
4629     break;
4630   case OMPC_DEPEND_source:
4631   case OMPC_DEPEND_sink:
4632   case OMPC_DEPEND_depobj:
4633   case OMPC_DEPEND_unknown:
4634     llvm_unreachable("Unknown task dependence type");
4635   }
4636   return DepKind;
4637 }
4638 
4639 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4640 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4641                            QualType &FlagsTy) {
4642   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4643   if (KmpDependInfoTy.isNull()) {
4644     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4645     KmpDependInfoRD->startDefinition();
4646     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4647     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4648     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4649     KmpDependInfoRD->completeDefinition();
4650     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4651   }
4652 }
4653 
4654 std::pair<llvm::Value *, LValue>
4655 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4656                                    SourceLocation Loc) {
4657   ASTContext &C = CGM.getContext();
4658   QualType FlagsTy;
4659   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4660   RecordDecl *KmpDependInfoRD =
4661       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4662   LValue Base = CGF.EmitLoadOfPointerLValue(
4663       DepobjLVal.getAddress(CGF),
4664       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4665   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4666   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4667           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4668   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4669                             Base.getTBAAInfo());
4670   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4671       Addr.getElementType(), Addr.getPointer(),
4672       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4673   LValue NumDepsBase = CGF.MakeAddrLValue(
4674       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4675       Base.getBaseInfo(), Base.getTBAAInfo());
4676   // NumDeps = deps[i].base_addr;
4677   LValue BaseAddrLVal = CGF.EmitLValueForField(
4678       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4679   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4680   return std::make_pair(NumDeps, Base);
4681 }
4682 
4683 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4684                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4685                            const OMPTaskDataTy::DependData &Data,
4686                            Address DependenciesArray) {
4687   CodeGenModule &CGM = CGF.CGM;
4688   ASTContext &C = CGM.getContext();
4689   QualType FlagsTy;
4690   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4691   RecordDecl *KmpDependInfoRD =
4692       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4693   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4694 
4695   OMPIteratorGeneratorScope IteratorScope(
4696       CGF, cast_or_null<OMPIteratorExpr>(
4697                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4698                                  : nullptr));
4699   for (const Expr *E : Data.DepExprs) {
4700     llvm::Value *Addr;
4701     llvm::Value *Size;
4702     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4703     LValue Base;
4704     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4705       Base = CGF.MakeAddrLValue(
4706           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4707     } else {
4708       LValue &PosLVal = *Pos.get<LValue *>();
4709       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4710       Base = CGF.MakeAddrLValue(
4711           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4712                                         DependenciesArray.getPointer(), Idx),
4713                   DependenciesArray.getAlignment()),
4714           KmpDependInfoTy);
4715     }
4716     // deps[i].base_addr = &<Dependencies[i].second>;
4717     LValue BaseAddrLVal = CGF.EmitLValueForField(
4718         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4719     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4720                           BaseAddrLVal);
4721     // deps[i].len = sizeof(<Dependencies[i].second>);
4722     LValue LenLVal = CGF.EmitLValueForField(
4723         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4724     CGF.EmitStoreOfScalar(Size, LenLVal);
4725     // deps[i].flags = <Dependencies[i].first>;
4726     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4727     LValue FlagsLVal = CGF.EmitLValueForField(
4728         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4729     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4730                           FlagsLVal);
4731     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4732       ++(*P);
4733     } else {
4734       LValue &PosLVal = *Pos.get<LValue *>();
4735       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4736       Idx = CGF.Builder.CreateNUWAdd(Idx,
4737                                      llvm::ConstantInt::get(Idx->getType(), 1));
4738       CGF.EmitStoreOfScalar(Idx, PosLVal);
4739     }
4740   }
4741 }
4742 
4743 static SmallVector<llvm::Value *, 4>
4744 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4745                         const OMPTaskDataTy::DependData &Data) {
4746   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4747          "Expected depobj dependecy kind.");
4748   SmallVector<llvm::Value *, 4> Sizes;
4749   SmallVector<LValue, 4> SizeLVals;
4750   ASTContext &C = CGF.getContext();
4751   QualType FlagsTy;
4752   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4753   RecordDecl *KmpDependInfoRD =
4754       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4755   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4756   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4757   {
4758     OMPIteratorGeneratorScope IteratorScope(
4759         CGF, cast_or_null<OMPIteratorExpr>(
4760                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4761                                    : nullptr));
4762     for (const Expr *E : Data.DepExprs) {
4763       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4764       LValue Base = CGF.EmitLoadOfPointerLValue(
4765           DepobjLVal.getAddress(CGF),
4766           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4767       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4768           Base.getAddress(CGF), KmpDependInfoPtrT);
4769       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4770                                 Base.getTBAAInfo());
4771       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4772           Addr.getElementType(), Addr.getPointer(),
4773           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4774       LValue NumDepsBase = CGF.MakeAddrLValue(
4775           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4776           Base.getBaseInfo(), Base.getTBAAInfo());
4777       // NumDeps = deps[i].base_addr;
4778       LValue BaseAddrLVal = CGF.EmitLValueForField(
4779           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4780       llvm::Value *NumDeps =
4781           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4782       LValue NumLVal = CGF.MakeAddrLValue(
4783           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4784           C.getUIntPtrType());
4785       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4786                               NumLVal.getAddress(CGF));
4787       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4788       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4789       CGF.EmitStoreOfScalar(Add, NumLVal);
4790       SizeLVals.push_back(NumLVal);
4791     }
4792   }
4793   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4794     llvm::Value *Size =
4795         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4796     Sizes.push_back(Size);
4797   }
4798   return Sizes;
4799 }
4800 
4801 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4802                                LValue PosLVal,
4803                                const OMPTaskDataTy::DependData &Data,
4804                                Address DependenciesArray) {
4805   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4806          "Expected depobj dependecy kind.");
4807   ASTContext &C = CGF.getContext();
4808   QualType FlagsTy;
4809   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4810   RecordDecl *KmpDependInfoRD =
4811       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4812   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4813   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4814   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4815   {
4816     OMPIteratorGeneratorScope IteratorScope(
4817         CGF, cast_or_null<OMPIteratorExpr>(
4818                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4819                                    : nullptr));
4820     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4821       const Expr *E = Data.DepExprs[I];
4822       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4823       LValue Base = CGF.EmitLoadOfPointerLValue(
4824           DepobjLVal.getAddress(CGF),
4825           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4826       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4827           Base.getAddress(CGF), KmpDependInfoPtrT);
4828       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4829                                 Base.getTBAAInfo());
4830 
4831       // Get number of elements in a single depobj.
4832       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4833           Addr.getElementType(), Addr.getPointer(),
4834           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4835       LValue NumDepsBase = CGF.MakeAddrLValue(
4836           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4837           Base.getBaseInfo(), Base.getTBAAInfo());
4838       // NumDeps = deps[i].base_addr;
4839       LValue BaseAddrLVal = CGF.EmitLValueForField(
4840           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4841       llvm::Value *NumDeps =
4842           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4843 
4844       // memcopy dependency data.
4845       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4846           ElSize,
4847           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4848       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4849       Address DepAddr =
4850           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4851                                         DependenciesArray.getPointer(), Pos),
4852                   DependenciesArray.getAlignment());
4853       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4854 
4855       // Increase pos.
4856       // pos += size;
4857       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4858       CGF.EmitStoreOfScalar(Add, PosLVal);
4859     }
4860   }
4861 }
4862 
4863 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4864     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4865     SourceLocation Loc) {
4866   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4867         return D.DepExprs.empty();
4868       }))
4869     return std::make_pair(nullptr, Address::invalid());
4870   // Process list of dependencies.
4871   ASTContext &C = CGM.getContext();
4872   Address DependenciesArray = Address::invalid();
4873   llvm::Value *NumOfElements = nullptr;
4874   unsigned NumDependencies = std::accumulate(
4875       Dependencies.begin(), Dependencies.end(), 0,
4876       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4877         return D.DepKind == OMPC_DEPEND_depobj
4878                    ? V
4879                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4880       });
4881   QualType FlagsTy;
4882   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4883   bool HasDepobjDeps = false;
4884   bool HasRegularWithIterators = false;
4885   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4886   llvm::Value *NumOfRegularWithIterators =
4887       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4888   // Calculate number of depobj dependecies and regular deps with the iterators.
4889   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4890     if (D.DepKind == OMPC_DEPEND_depobj) {
4891       SmallVector<llvm::Value *, 4> Sizes =
4892           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4893       for (llvm::Value *Size : Sizes) {
4894         NumOfDepobjElements =
4895             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4896       }
4897       HasDepobjDeps = true;
4898       continue;
4899     }
4900     // Include number of iterations, if any.
4901 
4902     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4903       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4904         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4905         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4906         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4907             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4908         NumOfRegularWithIterators =
4909             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4910       }
4911       HasRegularWithIterators = true;
4912       continue;
4913     }
4914   }
4915 
4916   QualType KmpDependInfoArrayTy;
4917   if (HasDepobjDeps || HasRegularWithIterators) {
4918     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4919                                            /*isSigned=*/false);
4920     if (HasDepobjDeps) {
4921       NumOfElements =
4922           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4923     }
4924     if (HasRegularWithIterators) {
4925       NumOfElements =
4926           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4927     }
4928     auto *OVE = new (C) OpaqueValueExpr(
4929         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4930         VK_PRValue);
4931     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4932                                                   RValue::get(NumOfElements));
4933     KmpDependInfoArrayTy =
4934         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4935                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4936     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4937     // Properly emit variable-sized array.
4938     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4939                                          ImplicitParamDecl::Other);
4940     CGF.EmitVarDecl(*PD);
4941     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4942     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4943                                               /*isSigned=*/false);
4944   } else {
4945     KmpDependInfoArrayTy = C.getConstantArrayType(
4946         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4947         ArrayType::Normal, /*IndexTypeQuals=*/0);
4948     DependenciesArray =
4949         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4950     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4951     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4952                                            /*isSigned=*/false);
4953   }
4954   unsigned Pos = 0;
4955   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4956     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4957         Dependencies[I].IteratorExpr)
4958       continue;
4959     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4960                    DependenciesArray);
4961   }
4962   // Copy regular dependecies with iterators.
4963   LValue PosLVal = CGF.MakeAddrLValue(
4964       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4965   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4966   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4967     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4968         !Dependencies[I].IteratorExpr)
4969       continue;
4970     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4971                    DependenciesArray);
4972   }
4973   // Copy final depobj arrays without iterators.
4974   if (HasDepobjDeps) {
4975     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4976       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4977         continue;
4978       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4979                          DependenciesArray);
4980     }
4981   }
4982   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4983       DependenciesArray, CGF.VoidPtrTy);
4984   return std::make_pair(NumOfElements, DependenciesArray);
4985 }
4986 
4987 Address CGOpenMPRuntime::emitDepobjDependClause(
4988     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4989     SourceLocation Loc) {
4990   if (Dependencies.DepExprs.empty())
4991     return Address::invalid();
4992   // Process list of dependencies.
4993   ASTContext &C = CGM.getContext();
4994   Address DependenciesArray = Address::invalid();
4995   unsigned NumDependencies = Dependencies.DepExprs.size();
4996   QualType FlagsTy;
4997   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4998   RecordDecl *KmpDependInfoRD =
4999       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5000 
5001   llvm::Value *Size;
5002   // Define type kmp_depend_info[<Dependencies.size()>];
5003   // For depobj reserve one extra element to store the number of elements.
5004   // It is required to handle depobj(x) update(in) construct.
5005   // kmp_depend_info[<Dependencies.size()>] deps;
5006   llvm::Value *NumDepsVal;
5007   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5008   if (const auto *IE =
5009           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5010     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5011     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5012       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5013       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5014       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5015     }
5016     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5017                                     NumDepsVal);
5018     CharUnits SizeInBytes =
5019         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5020     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5021     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5022     NumDepsVal =
5023         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5024   } else {
5025     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5026         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5027         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5028     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5029     Size = CGM.getSize(Sz.alignTo(Align));
5030     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5031   }
5032   // Need to allocate on the dynamic memory.
5033   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5034   // Use default allocator.
5035   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5036   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5037 
5038   llvm::Value *Addr =
5039       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5040                               CGM.getModule(), OMPRTL___kmpc_alloc),
5041                           Args, ".dep.arr.addr");
5042   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5043       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5044   DependenciesArray = Address(Addr, Align);
5045   // Write number of elements in the first element of array for depobj.
5046   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5047   // deps[i].base_addr = NumDependencies;
5048   LValue BaseAddrLVal = CGF.EmitLValueForField(
5049       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5050   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5051   llvm::PointerUnion<unsigned *, LValue *> Pos;
5052   unsigned Idx = 1;
5053   LValue PosLVal;
5054   if (Dependencies.IteratorExpr) {
5055     PosLVal = CGF.MakeAddrLValue(
5056         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5057         C.getSizeType());
5058     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5059                           /*IsInit=*/true);
5060     Pos = &PosLVal;
5061   } else {
5062     Pos = &Idx;
5063   }
5064   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5065   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5066       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5067   return DependenciesArray;
5068 }
5069 
5070 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5071                                         SourceLocation Loc) {
5072   ASTContext &C = CGM.getContext();
5073   QualType FlagsTy;
5074   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5075   LValue Base = CGF.EmitLoadOfPointerLValue(
5076       DepobjLVal.getAddress(CGF),
5077       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5078   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5079   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5080       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5081   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5082       Addr.getElementType(), Addr.getPointer(),
5083       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5084   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5085                                                                CGF.VoidPtrTy);
5086   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5087   // Use default allocator.
5088   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5089   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5090 
5091   // _kmpc_free(gtid, addr, nullptr);
5092   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5093                                 CGM.getModule(), OMPRTL___kmpc_free),
5094                             Args);
5095 }
5096 
5097 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5098                                        OpenMPDependClauseKind NewDepKind,
5099                                        SourceLocation Loc) {
5100   ASTContext &C = CGM.getContext();
5101   QualType FlagsTy;
5102   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5103   RecordDecl *KmpDependInfoRD =
5104       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5105   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5106   llvm::Value *NumDeps;
5107   LValue Base;
5108   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5109 
5110   Address Begin = Base.getAddress(CGF);
5111   // Cast from pointer to array type to pointer to single element.
5112   llvm::Value *End = CGF.Builder.CreateGEP(
5113       Begin.getElementType(), Begin.getPointer(), NumDeps);
5114   // The basic structure here is a while-do loop.
5115   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5116   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5117   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5118   CGF.EmitBlock(BodyBB);
5119   llvm::PHINode *ElementPHI =
5120       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5121   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5122   Begin = Address(ElementPHI, Begin.getAlignment());
5123   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5124                             Base.getTBAAInfo());
5125   // deps[i].flags = NewDepKind;
5126   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5127   LValue FlagsLVal = CGF.EmitLValueForField(
5128       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5129   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5130                         FlagsLVal);
5131 
5132   // Shift the address forward by one element.
5133   Address ElementNext =
5134       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5135   ElementPHI->addIncoming(ElementNext.getPointer(),
5136                           CGF.Builder.GetInsertBlock());
5137   llvm::Value *IsEmpty =
5138       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5139   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5140   // Done.
5141   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5142 }
5143 
5144 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5145                                    const OMPExecutableDirective &D,
5146                                    llvm::Function *TaskFunction,
5147                                    QualType SharedsTy, Address Shareds,
5148                                    const Expr *IfCond,
5149                                    const OMPTaskDataTy &Data) {
5150   if (!CGF.HaveInsertPoint())
5151     return;
5152 
5153   TaskResultTy Result =
5154       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5155   llvm::Value *NewTask = Result.NewTask;
5156   llvm::Function *TaskEntry = Result.TaskEntry;
5157   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5158   LValue TDBase = Result.TDBase;
5159   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5160   // Process list of dependences.
5161   Address DependenciesArray = Address::invalid();
5162   llvm::Value *NumOfElements;
5163   std::tie(NumOfElements, DependenciesArray) =
5164       emitDependClause(CGF, Data.Dependences, Loc);
5165 
5166   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5167   // libcall.
5168   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5169   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5170   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5171   // list is not empty
5172   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5173   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5174   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5175   llvm::Value *DepTaskArgs[7];
5176   if (!Data.Dependences.empty()) {
5177     DepTaskArgs[0] = UpLoc;
5178     DepTaskArgs[1] = ThreadID;
5179     DepTaskArgs[2] = NewTask;
5180     DepTaskArgs[3] = NumOfElements;
5181     DepTaskArgs[4] = DependenciesArray.getPointer();
5182     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5183     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5184   }
5185   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5186                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5187     if (!Data.Tied) {
5188       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5189       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5190       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5191     }
5192     if (!Data.Dependences.empty()) {
5193       CGF.EmitRuntimeCall(
5194           OMPBuilder.getOrCreateRuntimeFunction(
5195               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5196           DepTaskArgs);
5197     } else {
5198       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5199                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5200                           TaskArgs);
5201     }
5202     // Check if parent region is untied and build return for untied task;
5203     if (auto *Region =
5204             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5205       Region->emitUntiedSwitch(CGF);
5206   };
5207 
5208   llvm::Value *DepWaitTaskArgs[6];
5209   if (!Data.Dependences.empty()) {
5210     DepWaitTaskArgs[0] = UpLoc;
5211     DepWaitTaskArgs[1] = ThreadID;
5212     DepWaitTaskArgs[2] = NumOfElements;
5213     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5214     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5215     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5216   }
5217   auto &M = CGM.getModule();
5218   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5219                         TaskEntry, &Data, &DepWaitTaskArgs,
5220                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5221     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5222     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5223     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5224     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5225     // is specified.
5226     if (!Data.Dependences.empty())
5227       CGF.EmitRuntimeCall(
5228           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5229           DepWaitTaskArgs);
5230     // Call proxy_task_entry(gtid, new_task);
5231     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5232                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5233       Action.Enter(CGF);
5234       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5235       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5236                                                           OutlinedFnArgs);
5237     };
5238 
5239     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5240     // kmp_task_t *new_task);
5241     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5242     // kmp_task_t *new_task);
5243     RegionCodeGenTy RCG(CodeGen);
5244     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5245                               M, OMPRTL___kmpc_omp_task_begin_if0),
5246                           TaskArgs,
5247                           OMPBuilder.getOrCreateRuntimeFunction(
5248                               M, OMPRTL___kmpc_omp_task_complete_if0),
5249                           TaskArgs);
5250     RCG.setAction(Action);
5251     RCG(CGF);
5252   };
5253 
5254   if (IfCond) {
5255     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5256   } else {
5257     RegionCodeGenTy ThenRCG(ThenCodeGen);
5258     ThenRCG(CGF);
5259   }
5260 }
5261 
5262 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5263                                        const OMPLoopDirective &D,
5264                                        llvm::Function *TaskFunction,
5265                                        QualType SharedsTy, Address Shareds,
5266                                        const Expr *IfCond,
5267                                        const OMPTaskDataTy &Data) {
5268   if (!CGF.HaveInsertPoint())
5269     return;
5270   TaskResultTy Result =
5271       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5272   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5273   // libcall.
5274   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5275   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5276   // sched, kmp_uint64 grainsize, void *task_dup);
5277   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5278   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5279   llvm::Value *IfVal;
5280   if (IfCond) {
5281     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5282                                       /*isSigned=*/true);
5283   } else {
5284     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5285   }
5286 
5287   LValue LBLVal = CGF.EmitLValueForField(
5288       Result.TDBase,
5289       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5290   const auto *LBVar =
5291       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5292   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5293                        LBLVal.getQuals(),
5294                        /*IsInitializer=*/true);
5295   LValue UBLVal = CGF.EmitLValueForField(
5296       Result.TDBase,
5297       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5298   const auto *UBVar =
5299       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5300   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5301                        UBLVal.getQuals(),
5302                        /*IsInitializer=*/true);
5303   LValue StLVal = CGF.EmitLValueForField(
5304       Result.TDBase,
5305       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5306   const auto *StVar =
5307       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5308   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5309                        StLVal.getQuals(),
5310                        /*IsInitializer=*/true);
5311   // Store reductions address.
5312   LValue RedLVal = CGF.EmitLValueForField(
5313       Result.TDBase,
5314       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5315   if (Data.Reductions) {
5316     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5317   } else {
5318     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5319                                CGF.getContext().VoidPtrTy);
5320   }
5321   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5322   llvm::Value *TaskArgs[] = {
5323       UpLoc,
5324       ThreadID,
5325       Result.NewTask,
5326       IfVal,
5327       LBLVal.getPointer(CGF),
5328       UBLVal.getPointer(CGF),
5329       CGF.EmitLoadOfScalar(StLVal, Loc),
5330       llvm::ConstantInt::getSigned(
5331           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5332       llvm::ConstantInt::getSigned(
5333           CGF.IntTy, Data.Schedule.getPointer()
5334                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5335                          : NoSchedule),
5336       Data.Schedule.getPointer()
5337           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5338                                       /*isSigned=*/false)
5339           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5340       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5341                              Result.TaskDupFn, CGF.VoidPtrTy)
5342                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5343   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5344                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5345                       TaskArgs);
5346 }
5347 
5348 /// Emit reduction operation for each element of array (required for
5349 /// array sections) LHS op = RHS.
5350 /// \param Type Type of array.
5351 /// \param LHSVar Variable on the left side of the reduction operation
5352 /// (references element of array in original variable).
5353 /// \param RHSVar Variable on the right side of the reduction operation
5354 /// (references element of array in original variable).
5355 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5356 /// RHSVar.
5357 static void EmitOMPAggregateReduction(
5358     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5359     const VarDecl *RHSVar,
5360     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5361                                   const Expr *, const Expr *)> &RedOpGen,
5362     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5363     const Expr *UpExpr = nullptr) {
5364   // Perform element-by-element initialization.
5365   QualType ElementTy;
5366   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5367   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5368 
5369   // Drill down to the base element type on both arrays.
5370   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5371   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5372 
5373   llvm::Value *RHSBegin = RHSAddr.getPointer();
5374   llvm::Value *LHSBegin = LHSAddr.getPointer();
5375   // Cast from pointer to array type to pointer to single element.
5376   llvm::Value *LHSEnd =
5377       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5378   // The basic structure here is a while-do loop.
5379   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5380   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5381   llvm::Value *IsEmpty =
5382       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5383   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5384 
5385   // Enter the loop body, making that address the current address.
5386   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5387   CGF.EmitBlock(BodyBB);
5388 
5389   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5390 
5391   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5392       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5393   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5394   Address RHSElementCurrent =
5395       Address(RHSElementPHI,
5396               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5397 
5398   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5399       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5400   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5401   Address LHSElementCurrent =
5402       Address(LHSElementPHI,
5403               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5404 
5405   // Emit copy.
5406   CodeGenFunction::OMPPrivateScope Scope(CGF);
5407   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5408   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5409   Scope.Privatize();
5410   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5411   Scope.ForceCleanup();
5412 
5413   // Shift the address forward by one element.
5414   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5415       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5416       "omp.arraycpy.dest.element");
5417   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5418       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5419       "omp.arraycpy.src.element");
5420   // Check whether we've reached the end.
5421   llvm::Value *Done =
5422       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5423   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5424   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5425   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5426 
5427   // Done.
5428   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5429 }
5430 
5431 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5432 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5433 /// UDR combiner function.
5434 static void emitReductionCombiner(CodeGenFunction &CGF,
5435                                   const Expr *ReductionOp) {
5436   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5437     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5438       if (const auto *DRE =
5439               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5440         if (const auto *DRD =
5441                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5442           std::pair<llvm::Function *, llvm::Function *> Reduction =
5443               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5444           RValue Func = RValue::get(Reduction.first);
5445           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5446           CGF.EmitIgnoredExpr(ReductionOp);
5447           return;
5448         }
5449   CGF.EmitIgnoredExpr(ReductionOp);
5450 }
5451 
5452 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5453     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5454     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5455     ArrayRef<const Expr *> ReductionOps) {
5456   ASTContext &C = CGM.getContext();
5457 
5458   // void reduction_func(void *LHSArg, void *RHSArg);
5459   FunctionArgList Args;
5460   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5461                            ImplicitParamDecl::Other);
5462   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5463                            ImplicitParamDecl::Other);
5464   Args.push_back(&LHSArg);
5465   Args.push_back(&RHSArg);
5466   const auto &CGFI =
5467       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5468   std::string Name = getName({"omp", "reduction", "reduction_func"});
5469   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5470                                     llvm::GlobalValue::InternalLinkage, Name,
5471                                     &CGM.getModule());
5472   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5473   Fn->setDoesNotRecurse();
5474   CodeGenFunction CGF(CGM);
5475   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5476 
5477   // Dst = (void*[n])(LHSArg);
5478   // Src = (void*[n])(RHSArg);
5479   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5480       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5481       ArgsType), CGF.getPointerAlign());
5482   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5483       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5484       ArgsType), CGF.getPointerAlign());
5485 
5486   //  ...
5487   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5488   //  ...
5489   CodeGenFunction::OMPPrivateScope Scope(CGF);
5490   auto IPriv = Privates.begin();
5491   unsigned Idx = 0;
5492   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5493     const auto *RHSVar =
5494         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5495     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5496       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5497     });
5498     const auto *LHSVar =
5499         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5500     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5501       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5502     });
5503     QualType PrivTy = (*IPriv)->getType();
5504     if (PrivTy->isVariablyModifiedType()) {
5505       // Get array size and emit VLA type.
5506       ++Idx;
5507       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5508       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5509       const VariableArrayType *VLA =
5510           CGF.getContext().getAsVariableArrayType(PrivTy);
5511       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5512       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5513           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5514       CGF.EmitVariablyModifiedType(PrivTy);
5515     }
5516   }
5517   Scope.Privatize();
5518   IPriv = Privates.begin();
5519   auto ILHS = LHSExprs.begin();
5520   auto IRHS = RHSExprs.begin();
5521   for (const Expr *E : ReductionOps) {
5522     if ((*IPriv)->getType()->isArrayType()) {
5523       // Emit reduction for array section.
5524       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5525       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5526       EmitOMPAggregateReduction(
5527           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5528           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5529             emitReductionCombiner(CGF, E);
5530           });
5531     } else {
5532       // Emit reduction for array subscript or single variable.
5533       emitReductionCombiner(CGF, E);
5534     }
5535     ++IPriv;
5536     ++ILHS;
5537     ++IRHS;
5538   }
5539   Scope.ForceCleanup();
5540   CGF.FinishFunction();
5541   return Fn;
5542 }
5543 
5544 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5545                                                   const Expr *ReductionOp,
5546                                                   const Expr *PrivateRef,
5547                                                   const DeclRefExpr *LHS,
5548                                                   const DeclRefExpr *RHS) {
5549   if (PrivateRef->getType()->isArrayType()) {
5550     // Emit reduction for array section.
5551     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5552     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5553     EmitOMPAggregateReduction(
5554         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5555         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5556           emitReductionCombiner(CGF, ReductionOp);
5557         });
5558   } else {
5559     // Emit reduction for array subscript or single variable.
5560     emitReductionCombiner(CGF, ReductionOp);
5561   }
5562 }
5563 
5564 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5565                                     ArrayRef<const Expr *> Privates,
5566                                     ArrayRef<const Expr *> LHSExprs,
5567                                     ArrayRef<const Expr *> RHSExprs,
5568                                     ArrayRef<const Expr *> ReductionOps,
5569                                     ReductionOptionsTy Options) {
5570   if (!CGF.HaveInsertPoint())
5571     return;
5572 
5573   bool WithNowait = Options.WithNowait;
5574   bool SimpleReduction = Options.SimpleReduction;
5575 
5576   // Next code should be emitted for reduction:
5577   //
5578   // static kmp_critical_name lock = { 0 };
5579   //
5580   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5581   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5582   //  ...
5583   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5584   //  *(Type<n>-1*)rhs[<n>-1]);
5585   // }
5586   //
5587   // ...
5588   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5589   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5590   // RedList, reduce_func, &<lock>)) {
5591   // case 1:
5592   //  ...
5593   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5594   //  ...
5595   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5596   // break;
5597   // case 2:
5598   //  ...
5599   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5600   //  ...
5601   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5602   // break;
5603   // default:;
5604   // }
5605   //
5606   // if SimpleReduction is true, only the next code is generated:
5607   //  ...
5608   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5609   //  ...
5610 
5611   ASTContext &C = CGM.getContext();
5612 
5613   if (SimpleReduction) {
5614     CodeGenFunction::RunCleanupsScope Scope(CGF);
5615     auto IPriv = Privates.begin();
5616     auto ILHS = LHSExprs.begin();
5617     auto IRHS = RHSExprs.begin();
5618     for (const Expr *E : ReductionOps) {
5619       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5620                                   cast<DeclRefExpr>(*IRHS));
5621       ++IPriv;
5622       ++ILHS;
5623       ++IRHS;
5624     }
5625     return;
5626   }
5627 
5628   // 1. Build a list of reduction variables.
5629   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5630   auto Size = RHSExprs.size();
5631   for (const Expr *E : Privates) {
5632     if (E->getType()->isVariablyModifiedType())
5633       // Reserve place for array size.
5634       ++Size;
5635   }
5636   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5637   QualType ReductionArrayTy =
5638       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5639                              /*IndexTypeQuals=*/0);
5640   Address ReductionList =
5641       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5642   auto IPriv = Privates.begin();
5643   unsigned Idx = 0;
5644   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5645     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5646     CGF.Builder.CreateStore(
5647         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5648             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5649         Elem);
5650     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5651       // Store array size.
5652       ++Idx;
5653       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5654       llvm::Value *Size = CGF.Builder.CreateIntCast(
5655           CGF.getVLASize(
5656                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5657               .NumElts,
5658           CGF.SizeTy, /*isSigned=*/false);
5659       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5660                               Elem);
5661     }
5662   }
5663 
5664   // 2. Emit reduce_func().
5665   llvm::Function *ReductionFn = emitReductionFunction(
5666       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5667       LHSExprs, RHSExprs, ReductionOps);
5668 
5669   // 3. Create static kmp_critical_name lock = { 0 };
5670   std::string Name = getName({"reduction"});
5671   llvm::Value *Lock = getCriticalRegionLock(Name);
5672 
5673   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5674   // RedList, reduce_func, &<lock>);
5675   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5676   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5677   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5678   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5679       ReductionList.getPointer(), CGF.VoidPtrTy);
5680   llvm::Value *Args[] = {
5681       IdentTLoc,                             // ident_t *<loc>
5682       ThreadId,                              // i32 <gtid>
5683       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5684       ReductionArrayTySize,                  // size_type sizeof(RedList)
5685       RL,                                    // void *RedList
5686       ReductionFn, // void (*) (void *, void *) <reduce_func>
5687       Lock         // kmp_critical_name *&<lock>
5688   };
5689   llvm::Value *Res = CGF.EmitRuntimeCall(
5690       OMPBuilder.getOrCreateRuntimeFunction(
5691           CGM.getModule(),
5692           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5693       Args);
5694 
5695   // 5. Build switch(res)
5696   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5697   llvm::SwitchInst *SwInst =
5698       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5699 
5700   // 6. Build case 1:
5701   //  ...
5702   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5703   //  ...
5704   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5705   // break;
5706   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5707   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5708   CGF.EmitBlock(Case1BB);
5709 
5710   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5711   llvm::Value *EndArgs[] = {
5712       IdentTLoc, // ident_t *<loc>
5713       ThreadId,  // i32 <gtid>
5714       Lock       // kmp_critical_name *&<lock>
5715   };
5716   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5717                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5718     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5719     auto IPriv = Privates.begin();
5720     auto ILHS = LHSExprs.begin();
5721     auto IRHS = RHSExprs.begin();
5722     for (const Expr *E : ReductionOps) {
5723       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5724                                      cast<DeclRefExpr>(*IRHS));
5725       ++IPriv;
5726       ++ILHS;
5727       ++IRHS;
5728     }
5729   };
5730   RegionCodeGenTy RCG(CodeGen);
5731   CommonActionTy Action(
5732       nullptr, llvm::None,
5733       OMPBuilder.getOrCreateRuntimeFunction(
5734           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5735                                       : OMPRTL___kmpc_end_reduce),
5736       EndArgs);
5737   RCG.setAction(Action);
5738   RCG(CGF);
5739 
5740   CGF.EmitBranch(DefaultBB);
5741 
5742   // 7. Build case 2:
5743   //  ...
5744   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5745   //  ...
5746   // break;
5747   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5748   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5749   CGF.EmitBlock(Case2BB);
5750 
5751   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5752                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5753     auto ILHS = LHSExprs.begin();
5754     auto IRHS = RHSExprs.begin();
5755     auto IPriv = Privates.begin();
5756     for (const Expr *E : ReductionOps) {
5757       const Expr *XExpr = nullptr;
5758       const Expr *EExpr = nullptr;
5759       const Expr *UpExpr = nullptr;
5760       BinaryOperatorKind BO = BO_Comma;
5761       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5762         if (BO->getOpcode() == BO_Assign) {
5763           XExpr = BO->getLHS();
5764           UpExpr = BO->getRHS();
5765         }
5766       }
5767       // Try to emit update expression as a simple atomic.
5768       const Expr *RHSExpr = UpExpr;
5769       if (RHSExpr) {
5770         // Analyze RHS part of the whole expression.
5771         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5772                 RHSExpr->IgnoreParenImpCasts())) {
5773           // If this is a conditional operator, analyze its condition for
5774           // min/max reduction operator.
5775           RHSExpr = ACO->getCond();
5776         }
5777         if (const auto *BORHS =
5778                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5779           EExpr = BORHS->getRHS();
5780           BO = BORHS->getOpcode();
5781         }
5782       }
5783       if (XExpr) {
5784         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5785         auto &&AtomicRedGen = [BO, VD,
5786                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5787                                     const Expr *EExpr, const Expr *UpExpr) {
5788           LValue X = CGF.EmitLValue(XExpr);
5789           RValue E;
5790           if (EExpr)
5791             E = CGF.EmitAnyExpr(EExpr);
5792           CGF.EmitOMPAtomicSimpleUpdateExpr(
5793               X, E, BO, /*IsXLHSInRHSPart=*/true,
5794               llvm::AtomicOrdering::Monotonic, Loc,
5795               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5796                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5797                 PrivateScope.addPrivate(
5798                     VD, [&CGF, VD, XRValue, Loc]() {
5799                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5800                       CGF.emitOMPSimpleStore(
5801                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5802                           VD->getType().getNonReferenceType(), Loc);
5803                       return LHSTemp;
5804                     });
5805                 (void)PrivateScope.Privatize();
5806                 return CGF.EmitAnyExpr(UpExpr);
5807               });
5808         };
5809         if ((*IPriv)->getType()->isArrayType()) {
5810           // Emit atomic reduction for array section.
5811           const auto *RHSVar =
5812               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5813           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5814                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5815         } else {
5816           // Emit atomic reduction for array subscript or single variable.
5817           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5818         }
5819       } else {
5820         // Emit as a critical region.
5821         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5822                                            const Expr *, const Expr *) {
5823           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5824           std::string Name = RT.getName({"atomic_reduction"});
5825           RT.emitCriticalRegion(
5826               CGF, Name,
5827               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5828                 Action.Enter(CGF);
5829                 emitReductionCombiner(CGF, E);
5830               },
5831               Loc);
5832         };
5833         if ((*IPriv)->getType()->isArrayType()) {
5834           const auto *LHSVar =
5835               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5836           const auto *RHSVar =
5837               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5838           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5839                                     CritRedGen);
5840         } else {
5841           CritRedGen(CGF, nullptr, nullptr, nullptr);
5842         }
5843       }
5844       ++ILHS;
5845       ++IRHS;
5846       ++IPriv;
5847     }
5848   };
5849   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5850   if (!WithNowait) {
5851     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5852     llvm::Value *EndArgs[] = {
5853         IdentTLoc, // ident_t *<loc>
5854         ThreadId,  // i32 <gtid>
5855         Lock       // kmp_critical_name *&<lock>
5856     };
5857     CommonActionTy Action(nullptr, llvm::None,
5858                           OMPBuilder.getOrCreateRuntimeFunction(
5859                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5860                           EndArgs);
5861     AtomicRCG.setAction(Action);
5862     AtomicRCG(CGF);
5863   } else {
5864     AtomicRCG(CGF);
5865   }
5866 
5867   CGF.EmitBranch(DefaultBB);
5868   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5869 }
5870 
5871 /// Generates unique name for artificial threadprivate variables.
5872 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5873 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5874                                       const Expr *Ref) {
5875   SmallString<256> Buffer;
5876   llvm::raw_svector_ostream Out(Buffer);
5877   const clang::DeclRefExpr *DE;
5878   const VarDecl *D = ::getBaseDecl(Ref, DE);
5879   if (!D)
5880     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5881   D = D->getCanonicalDecl();
5882   std::string Name = CGM.getOpenMPRuntime().getName(
5883       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5884   Out << Prefix << Name << "_"
5885       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5886   return std::string(Out.str());
5887 }
5888 
5889 /// Emits reduction initializer function:
5890 /// \code
5891 /// void @.red_init(void* %arg, void* %orig) {
5892 /// %0 = bitcast void* %arg to <type>*
5893 /// store <type> <init>, <type>* %0
5894 /// ret void
5895 /// }
5896 /// \endcode
5897 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5898                                            SourceLocation Loc,
5899                                            ReductionCodeGen &RCG, unsigned N) {
5900   ASTContext &C = CGM.getContext();
5901   QualType VoidPtrTy = C.VoidPtrTy;
5902   VoidPtrTy.addRestrict();
5903   FunctionArgList Args;
5904   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5905                           ImplicitParamDecl::Other);
5906   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5907                               ImplicitParamDecl::Other);
5908   Args.emplace_back(&Param);
5909   Args.emplace_back(&ParamOrig);
5910   const auto &FnInfo =
5911       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5912   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5913   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5914   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5915                                     Name, &CGM.getModule());
5916   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5917   Fn->setDoesNotRecurse();
5918   CodeGenFunction CGF(CGM);
5919   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5920   Address PrivateAddr = CGF.EmitLoadOfPointer(
5921       CGF.GetAddrOfLocalVar(&Param),
5922       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5923   llvm::Value *Size = nullptr;
5924   // If the size of the reduction item is non-constant, load it from global
5925   // threadprivate variable.
5926   if (RCG.getSizes(N).second) {
5927     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5928         CGF, CGM.getContext().getSizeType(),
5929         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5930     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5931                                 CGM.getContext().getSizeType(), Loc);
5932   }
5933   RCG.emitAggregateType(CGF, N, Size);
5934   LValue OrigLVal;
5935   // If initializer uses initializer from declare reduction construct, emit a
5936   // pointer to the address of the original reduction item (reuired by reduction
5937   // initializer)
5938   if (RCG.usesReductionInitializer(N)) {
5939     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5940     SharedAddr = CGF.EmitLoadOfPointer(
5941         SharedAddr,
5942         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5943     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5944   } else {
5945     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5946         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5947         CGM.getContext().VoidPtrTy);
5948   }
5949   // Emit the initializer:
5950   // %0 = bitcast void* %arg to <type>*
5951   // store <type> <init>, <type>* %0
5952   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5953                          [](CodeGenFunction &) { return false; });
5954   CGF.FinishFunction();
5955   return Fn;
5956 }
5957 
5958 /// Emits reduction combiner function:
5959 /// \code
5960 /// void @.red_comb(void* %arg0, void* %arg1) {
5961 /// %lhs = bitcast void* %arg0 to <type>*
5962 /// %rhs = bitcast void* %arg1 to <type>*
5963 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5964 /// store <type> %2, <type>* %lhs
5965 /// ret void
5966 /// }
5967 /// \endcode
5968 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5969                                            SourceLocation Loc,
5970                                            ReductionCodeGen &RCG, unsigned N,
5971                                            const Expr *ReductionOp,
5972                                            const Expr *LHS, const Expr *RHS,
5973                                            const Expr *PrivateRef) {
5974   ASTContext &C = CGM.getContext();
5975   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5976   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5977   FunctionArgList Args;
5978   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5979                                C.VoidPtrTy, ImplicitParamDecl::Other);
5980   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5981                             ImplicitParamDecl::Other);
5982   Args.emplace_back(&ParamInOut);
5983   Args.emplace_back(&ParamIn);
5984   const auto &FnInfo =
5985       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5986   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5987   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5988   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5989                                     Name, &CGM.getModule());
5990   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5991   Fn->setDoesNotRecurse();
5992   CodeGenFunction CGF(CGM);
5993   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5994   llvm::Value *Size = nullptr;
5995   // If the size of the reduction item is non-constant, load it from global
5996   // threadprivate variable.
5997   if (RCG.getSizes(N).second) {
5998     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5999         CGF, CGM.getContext().getSizeType(),
6000         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6001     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6002                                 CGM.getContext().getSizeType(), Loc);
6003   }
6004   RCG.emitAggregateType(CGF, N, Size);
6005   // Remap lhs and rhs variables to the addresses of the function arguments.
6006   // %lhs = bitcast void* %arg0 to <type>*
6007   // %rhs = bitcast void* %arg1 to <type>*
6008   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6009   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6010     // Pull out the pointer to the variable.
6011     Address PtrAddr = CGF.EmitLoadOfPointer(
6012         CGF.GetAddrOfLocalVar(&ParamInOut),
6013         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6014     return CGF.Builder.CreateElementBitCast(
6015         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6016   });
6017   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6018     // Pull out the pointer to the variable.
6019     Address PtrAddr = CGF.EmitLoadOfPointer(
6020         CGF.GetAddrOfLocalVar(&ParamIn),
6021         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6022     return CGF.Builder.CreateElementBitCast(
6023         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6024   });
6025   PrivateScope.Privatize();
6026   // Emit the combiner body:
6027   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6028   // store <type> %2, <type>* %lhs
6029   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6030       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6031       cast<DeclRefExpr>(RHS));
6032   CGF.FinishFunction();
6033   return Fn;
6034 }
6035 
6036 /// Emits reduction finalizer function:
6037 /// \code
6038 /// void @.red_fini(void* %arg) {
6039 /// %0 = bitcast void* %arg to <type>*
6040 /// <destroy>(<type>* %0)
6041 /// ret void
6042 /// }
6043 /// \endcode
6044 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6045                                            SourceLocation Loc,
6046                                            ReductionCodeGen &RCG, unsigned N) {
6047   if (!RCG.needCleanups(N))
6048     return nullptr;
6049   ASTContext &C = CGM.getContext();
6050   FunctionArgList Args;
6051   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6052                           ImplicitParamDecl::Other);
6053   Args.emplace_back(&Param);
6054   const auto &FnInfo =
6055       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6056   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6057   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6058   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6059                                     Name, &CGM.getModule());
6060   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6061   Fn->setDoesNotRecurse();
6062   CodeGenFunction CGF(CGM);
6063   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6064   Address PrivateAddr = CGF.EmitLoadOfPointer(
6065       CGF.GetAddrOfLocalVar(&Param),
6066       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6067   llvm::Value *Size = nullptr;
6068   // If the size of the reduction item is non-constant, load it from global
6069   // threadprivate variable.
6070   if (RCG.getSizes(N).second) {
6071     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6072         CGF, CGM.getContext().getSizeType(),
6073         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6074     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6075                                 CGM.getContext().getSizeType(), Loc);
6076   }
6077   RCG.emitAggregateType(CGF, N, Size);
6078   // Emit the finalizer body:
6079   // <destroy>(<type>* %0)
6080   RCG.emitCleanups(CGF, N, PrivateAddr);
6081   CGF.FinishFunction(Loc);
6082   return Fn;
6083 }
6084 
6085 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6086     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6087     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6088   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6089     return nullptr;
6090 
6091   // Build typedef struct:
6092   // kmp_taskred_input {
6093   //   void *reduce_shar; // shared reduction item
6094   //   void *reduce_orig; // original reduction item used for initialization
6095   //   size_t reduce_size; // size of data item
6096   //   void *reduce_init; // data initialization routine
6097   //   void *reduce_fini; // data finalization routine
6098   //   void *reduce_comb; // data combiner routine
6099   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6100   // } kmp_taskred_input_t;
6101   ASTContext &C = CGM.getContext();
6102   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6103   RD->startDefinition();
6104   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6105   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6106   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6107   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6108   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6109   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6110   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6111       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6112   RD->completeDefinition();
6113   QualType RDType = C.getRecordType(RD);
6114   unsigned Size = Data.ReductionVars.size();
6115   llvm::APInt ArraySize(/*numBits=*/64, Size);
6116   QualType ArrayRDType = C.getConstantArrayType(
6117       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6118   // kmp_task_red_input_t .rd_input.[Size];
6119   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6120   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6121                        Data.ReductionCopies, Data.ReductionOps);
6122   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6123     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6124     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6125                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6126     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6127         TaskRedInput.getPointer(), Idxs,
6128         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6129         ".rd_input.gep.");
6130     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6131     // ElemLVal.reduce_shar = &Shareds[Cnt];
6132     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6133     RCG.emitSharedOrigLValue(CGF, Cnt);
6134     llvm::Value *CastedShared =
6135         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6136     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6137     // ElemLVal.reduce_orig = &Origs[Cnt];
6138     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6139     llvm::Value *CastedOrig =
6140         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6141     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6142     RCG.emitAggregateType(CGF, Cnt);
6143     llvm::Value *SizeValInChars;
6144     llvm::Value *SizeVal;
6145     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6146     // We use delayed creation/initialization for VLAs and array sections. It is
6147     // required because runtime does not provide the way to pass the sizes of
6148     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6149     // threadprivate global variables are used to store these values and use
6150     // them in the functions.
6151     bool DelayedCreation = !!SizeVal;
6152     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6153                                                /*isSigned=*/false);
6154     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6155     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6156     // ElemLVal.reduce_init = init;
6157     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6158     llvm::Value *InitAddr =
6159         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6160     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6161     // ElemLVal.reduce_fini = fini;
6162     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6163     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6164     llvm::Value *FiniAddr = Fini
6165                                 ? CGF.EmitCastToVoidPtr(Fini)
6166                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6167     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6168     // ElemLVal.reduce_comb = comb;
6169     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6170     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6171         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6172         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6173     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6174     // ElemLVal.flags = 0;
6175     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6176     if (DelayedCreation) {
6177       CGF.EmitStoreOfScalar(
6178           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6179           FlagsLVal);
6180     } else
6181       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6182                                  FlagsLVal.getType());
6183   }
6184   if (Data.IsReductionWithTaskMod) {
6185     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6186     // is_ws, int num, void *data);
6187     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6188     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6189                                                   CGM.IntTy, /*isSigned=*/true);
6190     llvm::Value *Args[] = {
6191         IdentTLoc, GTid,
6192         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6193                                /*isSigned=*/true),
6194         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6195         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6196             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6197     return CGF.EmitRuntimeCall(
6198         OMPBuilder.getOrCreateRuntimeFunction(
6199             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6200         Args);
6201   }
6202   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6203   llvm::Value *Args[] = {
6204       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6205                                 /*isSigned=*/true),
6206       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6207       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6208                                                       CGM.VoidPtrTy)};
6209   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6210                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6211                              Args);
6212 }
6213 
6214 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6215                                             SourceLocation Loc,
6216                                             bool IsWorksharingReduction) {
6217   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6218   // is_ws, int num, void *data);
6219   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6220   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6221                                                 CGM.IntTy, /*isSigned=*/true);
6222   llvm::Value *Args[] = {IdentTLoc, GTid,
6223                          llvm::ConstantInt::get(CGM.IntTy,
6224                                                 IsWorksharingReduction ? 1 : 0,
6225                                                 /*isSigned=*/true)};
6226   (void)CGF.EmitRuntimeCall(
6227       OMPBuilder.getOrCreateRuntimeFunction(
6228           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6229       Args);
6230 }
6231 
6232 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6233                                               SourceLocation Loc,
6234                                               ReductionCodeGen &RCG,
6235                                               unsigned N) {
6236   auto Sizes = RCG.getSizes(N);
6237   // Emit threadprivate global variable if the type is non-constant
6238   // (Sizes.second = nullptr).
6239   if (Sizes.second) {
6240     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6241                                                      /*isSigned=*/false);
6242     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6243         CGF, CGM.getContext().getSizeType(),
6244         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6245     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6246   }
6247 }
6248 
6249 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6250                                               SourceLocation Loc,
6251                                               llvm::Value *ReductionsPtr,
6252                                               LValue SharedLVal) {
6253   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6254   // *d);
6255   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6256                                                    CGM.IntTy,
6257                                                    /*isSigned=*/true),
6258                          ReductionsPtr,
6259                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6260                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6261   return Address(
6262       CGF.EmitRuntimeCall(
6263           OMPBuilder.getOrCreateRuntimeFunction(
6264               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6265           Args),
6266       SharedLVal.getAlignment());
6267 }
6268 
6269 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6270                                        const OMPTaskDataTy &Data) {
6271   if (!CGF.HaveInsertPoint())
6272     return;
6273 
6274   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6275     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6276     OMPBuilder.createTaskwait(CGF.Builder);
6277   } else {
6278     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6279     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6280     auto &M = CGM.getModule();
6281     Address DependenciesArray = Address::invalid();
6282     llvm::Value *NumOfElements;
6283     std::tie(NumOfElements, DependenciesArray) =
6284         emitDependClause(CGF, Data.Dependences, Loc);
6285     llvm::Value *DepWaitTaskArgs[6];
6286     if (!Data.Dependences.empty()) {
6287       DepWaitTaskArgs[0] = UpLoc;
6288       DepWaitTaskArgs[1] = ThreadID;
6289       DepWaitTaskArgs[2] = NumOfElements;
6290       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6291       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6292       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6293 
6294       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6295 
6296       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6297       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6298       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6299       // is specified.
6300       CGF.EmitRuntimeCall(
6301           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6302           DepWaitTaskArgs);
6303 
6304     } else {
6305 
6306       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6307       // global_tid);
6308       llvm::Value *Args[] = {UpLoc, ThreadID};
6309       // Ignore return result until untied tasks are supported.
6310       CGF.EmitRuntimeCall(
6311           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6312           Args);
6313     }
6314   }
6315 
6316   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6317     Region->emitUntiedSwitch(CGF);
6318 }
6319 
6320 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6321                                            OpenMPDirectiveKind InnerKind,
6322                                            const RegionCodeGenTy &CodeGen,
6323                                            bool HasCancel) {
6324   if (!CGF.HaveInsertPoint())
6325     return;
6326   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6327                                  InnerKind != OMPD_critical &&
6328                                      InnerKind != OMPD_master &&
6329                                      InnerKind != OMPD_masked);
6330   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6331 }
6332 
6333 namespace {
6334 enum RTCancelKind {
6335   CancelNoreq = 0,
6336   CancelParallel = 1,
6337   CancelLoop = 2,
6338   CancelSections = 3,
6339   CancelTaskgroup = 4
6340 };
6341 } // anonymous namespace
6342 
6343 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6344   RTCancelKind CancelKind = CancelNoreq;
6345   if (CancelRegion == OMPD_parallel)
6346     CancelKind = CancelParallel;
6347   else if (CancelRegion == OMPD_for)
6348     CancelKind = CancelLoop;
6349   else if (CancelRegion == OMPD_sections)
6350     CancelKind = CancelSections;
6351   else {
6352     assert(CancelRegion == OMPD_taskgroup);
6353     CancelKind = CancelTaskgroup;
6354   }
6355   return CancelKind;
6356 }
6357 
6358 void CGOpenMPRuntime::emitCancellationPointCall(
6359     CodeGenFunction &CGF, SourceLocation Loc,
6360     OpenMPDirectiveKind CancelRegion) {
6361   if (!CGF.HaveInsertPoint())
6362     return;
6363   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6364   // global_tid, kmp_int32 cncl_kind);
6365   if (auto *OMPRegionInfo =
6366           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6367     // For 'cancellation point taskgroup', the task region info may not have a
6368     // cancel. This may instead happen in another adjacent task.
6369     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6370       llvm::Value *Args[] = {
6371           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6372           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6373       // Ignore return result until untied tasks are supported.
6374       llvm::Value *Result = CGF.EmitRuntimeCall(
6375           OMPBuilder.getOrCreateRuntimeFunction(
6376               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6377           Args);
6378       // if (__kmpc_cancellationpoint()) {
6379       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6380       //   exit from construct;
6381       // }
6382       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6383       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6384       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6385       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6386       CGF.EmitBlock(ExitBB);
6387       if (CancelRegion == OMPD_parallel)
6388         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6389       // exit from construct;
6390       CodeGenFunction::JumpDest CancelDest =
6391           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6392       CGF.EmitBranchThroughCleanup(CancelDest);
6393       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6394     }
6395   }
6396 }
6397 
6398 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6399                                      const Expr *IfCond,
6400                                      OpenMPDirectiveKind CancelRegion) {
6401   if (!CGF.HaveInsertPoint())
6402     return;
6403   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6404   // kmp_int32 cncl_kind);
6405   auto &M = CGM.getModule();
6406   if (auto *OMPRegionInfo =
6407           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6408     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6409                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6410       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6411       llvm::Value *Args[] = {
6412           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6413           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6414       // Ignore return result until untied tasks are supported.
6415       llvm::Value *Result = CGF.EmitRuntimeCall(
6416           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6417       // if (__kmpc_cancel()) {
6418       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6419       //   exit from construct;
6420       // }
6421       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6422       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6423       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6424       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6425       CGF.EmitBlock(ExitBB);
6426       if (CancelRegion == OMPD_parallel)
6427         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6428       // exit from construct;
6429       CodeGenFunction::JumpDest CancelDest =
6430           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6431       CGF.EmitBranchThroughCleanup(CancelDest);
6432       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6433     };
6434     if (IfCond) {
6435       emitIfClause(CGF, IfCond, ThenGen,
6436                    [](CodeGenFunction &, PrePostActionTy &) {});
6437     } else {
6438       RegionCodeGenTy ThenRCG(ThenGen);
6439       ThenRCG(CGF);
6440     }
6441   }
6442 }
6443 
6444 namespace {
6445 /// Cleanup action for uses_allocators support.
6446 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6447   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6448 
6449 public:
6450   OMPUsesAllocatorsActionTy(
6451       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6452       : Allocators(Allocators) {}
6453   void Enter(CodeGenFunction &CGF) override {
6454     if (!CGF.HaveInsertPoint())
6455       return;
6456     for (const auto &AllocatorData : Allocators) {
6457       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6458           CGF, AllocatorData.first, AllocatorData.second);
6459     }
6460   }
6461   void Exit(CodeGenFunction &CGF) override {
6462     if (!CGF.HaveInsertPoint())
6463       return;
6464     for (const auto &AllocatorData : Allocators) {
6465       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6466                                                         AllocatorData.first);
6467     }
6468   }
6469 };
6470 } // namespace
6471 
6472 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6473     const OMPExecutableDirective &D, StringRef ParentName,
6474     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6475     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6476   assert(!ParentName.empty() && "Invalid target region parent name!");
6477   HasEmittedTargetRegion = true;
6478   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6479   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6480     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6481       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6482       if (!D.AllocatorTraits)
6483         continue;
6484       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6485     }
6486   }
6487   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6488   CodeGen.setAction(UsesAllocatorAction);
6489   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6490                                    IsOffloadEntry, CodeGen);
6491 }
6492 
6493 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6494                                              const Expr *Allocator,
6495                                              const Expr *AllocatorTraits) {
6496   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6497   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6498   // Use default memspace handle.
6499   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6500   llvm::Value *NumTraits = llvm::ConstantInt::get(
6501       CGF.IntTy, cast<ConstantArrayType>(
6502                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6503                      ->getSize()
6504                      .getLimitedValue());
6505   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6506   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6507       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6508   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6509                                            AllocatorTraitsLVal.getBaseInfo(),
6510                                            AllocatorTraitsLVal.getTBAAInfo());
6511   llvm::Value *Traits =
6512       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6513 
6514   llvm::Value *AllocatorVal =
6515       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6516                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6517                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6518   // Store to allocator.
6519   CGF.EmitVarDecl(*cast<VarDecl>(
6520       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6521   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6522   AllocatorVal =
6523       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6524                                Allocator->getType(), Allocator->getExprLoc());
6525   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6526 }
6527 
6528 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6529                                              const Expr *Allocator) {
6530   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6531   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6532   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6533   llvm::Value *AllocatorVal =
6534       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6535   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6536                                           CGF.getContext().VoidPtrTy,
6537                                           Allocator->getExprLoc());
6538   (void)CGF.EmitRuntimeCall(
6539       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6540                                             OMPRTL___kmpc_destroy_allocator),
6541       {ThreadId, AllocatorVal});
6542 }
6543 
6544 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6545     const OMPExecutableDirective &D, StringRef ParentName,
6546     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6547     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6548   // Create a unique name for the entry function using the source location
6549   // information of the current target region. The name will be something like:
6550   //
6551   // __omp_offloading_DD_FFFF_PP_lBB
6552   //
6553   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6554   // mangled name of the function that encloses the target region and BB is the
6555   // line number of the target region.
6556 
6557   unsigned DeviceID;
6558   unsigned FileID;
6559   unsigned Line;
6560   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6561                            Line);
6562   SmallString<64> EntryFnName;
6563   {
6564     llvm::raw_svector_ostream OS(EntryFnName);
6565     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6566        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6567   }
6568 
6569   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6570 
6571   CodeGenFunction CGF(CGM, true);
6572   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6573   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6574 
6575   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6576 
6577   // If this target outline function is not an offload entry, we don't need to
6578   // register it.
6579   if (!IsOffloadEntry)
6580     return;
6581 
6582   // The target region ID is used by the runtime library to identify the current
6583   // target region, so it only has to be unique and not necessarily point to
6584   // anything. It could be the pointer to the outlined function that implements
6585   // the target region, but we aren't using that so that the compiler doesn't
6586   // need to keep that, and could therefore inline the host function if proven
6587   // worthwhile during optimization. In the other hand, if emitting code for the
6588   // device, the ID has to be the function address so that it can retrieved from
6589   // the offloading entry and launched by the runtime library. We also mark the
6590   // outlined function to have external linkage in case we are emitting code for
6591   // the device, because these functions will be entry points to the device.
6592 
6593   if (CGM.getLangOpts().OpenMPIsDevice) {
6594     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6595     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6596     OutlinedFn->setDSOLocal(false);
6597     if (CGM.getTriple().isAMDGCN())
6598       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6599   } else {
6600     std::string Name = getName({EntryFnName, "region_id"});
6601     OutlinedFnID = new llvm::GlobalVariable(
6602         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6603         llvm::GlobalValue::WeakAnyLinkage,
6604         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6605   }
6606 
6607   // Register the information for the entry associated with this target region.
6608   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6609       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6610       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6611 
6612   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6613   int32_t DefaultValTeams = -1;
6614   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6615   if (DefaultValTeams > 0) {
6616     OutlinedFn->addFnAttr("omp_target_num_teams",
6617                           std::to_string(DefaultValTeams));
6618   }
6619   int32_t DefaultValThreads = -1;
6620   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6621   if (DefaultValThreads > 0) {
6622     OutlinedFn->addFnAttr("omp_target_thread_limit",
6623                           std::to_string(DefaultValThreads));
6624   }
6625 
6626   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6627 }
6628 
6629 /// Checks if the expression is constant or does not have non-trivial function
6630 /// calls.
6631 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6632   // We can skip constant expressions.
6633   // We can skip expressions with trivial calls or simple expressions.
6634   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6635           !E->hasNonTrivialCall(Ctx)) &&
6636          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6637 }
6638 
6639 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6640                                                     const Stmt *Body) {
6641   const Stmt *Child = Body->IgnoreContainers();
6642   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6643     Child = nullptr;
6644     for (const Stmt *S : C->body()) {
6645       if (const auto *E = dyn_cast<Expr>(S)) {
6646         if (isTrivial(Ctx, E))
6647           continue;
6648       }
6649       // Some of the statements can be ignored.
6650       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6651           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6652         continue;
6653       // Analyze declarations.
6654       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6655         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6656               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6657                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6658                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6659                   isa<UsingDirectiveDecl>(D) ||
6660                   isa<OMPDeclareReductionDecl>(D) ||
6661                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6662                 return true;
6663               const auto *VD = dyn_cast<VarDecl>(D);
6664               if (!VD)
6665                 return false;
6666               return VD->hasGlobalStorage() || !VD->isUsed();
6667             }))
6668           continue;
6669       }
6670       // Found multiple children - cannot get the one child only.
6671       if (Child)
6672         return nullptr;
6673       Child = S;
6674     }
6675     if (Child)
6676       Child = Child->IgnoreContainers();
6677   }
6678   return Child;
6679 }
6680 
6681 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6682     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6683     int32_t &DefaultVal) {
6684 
6685   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6686   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6687          "Expected target-based executable directive.");
6688   switch (DirectiveKind) {
6689   case OMPD_target: {
6690     const auto *CS = D.getInnermostCapturedStmt();
6691     const auto *Body =
6692         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6693     const Stmt *ChildStmt =
6694         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6695     if (const auto *NestedDir =
6696             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6697       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6698         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6699           const Expr *NumTeams =
6700               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6701           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6702             if (auto Constant =
6703                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6704               DefaultVal = Constant->getExtValue();
6705           return NumTeams;
6706         }
6707         DefaultVal = 0;
6708         return nullptr;
6709       }
6710       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6711           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6712         DefaultVal = 1;
6713         return nullptr;
6714       }
6715       DefaultVal = 1;
6716       return nullptr;
6717     }
6718     // A value of -1 is used to check if we need to emit no teams region
6719     DefaultVal = -1;
6720     return nullptr;
6721   }
6722   case OMPD_target_teams:
6723   case OMPD_target_teams_distribute:
6724   case OMPD_target_teams_distribute_simd:
6725   case OMPD_target_teams_distribute_parallel_for:
6726   case OMPD_target_teams_distribute_parallel_for_simd: {
6727     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6728       const Expr *NumTeams =
6729           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6730       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6731         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6732           DefaultVal = Constant->getExtValue();
6733       return NumTeams;
6734     }
6735     DefaultVal = 0;
6736     return nullptr;
6737   }
6738   case OMPD_target_parallel:
6739   case OMPD_target_parallel_for:
6740   case OMPD_target_parallel_for_simd:
6741   case OMPD_target_simd:
6742     DefaultVal = 1;
6743     return nullptr;
6744   case OMPD_parallel:
6745   case OMPD_for:
6746   case OMPD_parallel_for:
6747   case OMPD_parallel_master:
6748   case OMPD_parallel_sections:
6749   case OMPD_for_simd:
6750   case OMPD_parallel_for_simd:
6751   case OMPD_cancel:
6752   case OMPD_cancellation_point:
6753   case OMPD_ordered:
6754   case OMPD_threadprivate:
6755   case OMPD_allocate:
6756   case OMPD_task:
6757   case OMPD_simd:
6758   case OMPD_tile:
6759   case OMPD_unroll:
6760   case OMPD_sections:
6761   case OMPD_section:
6762   case OMPD_single:
6763   case OMPD_master:
6764   case OMPD_critical:
6765   case OMPD_taskyield:
6766   case OMPD_barrier:
6767   case OMPD_taskwait:
6768   case OMPD_taskgroup:
6769   case OMPD_atomic:
6770   case OMPD_flush:
6771   case OMPD_depobj:
6772   case OMPD_scan:
6773   case OMPD_teams:
6774   case OMPD_target_data:
6775   case OMPD_target_exit_data:
6776   case OMPD_target_enter_data:
6777   case OMPD_distribute:
6778   case OMPD_distribute_simd:
6779   case OMPD_distribute_parallel_for:
6780   case OMPD_distribute_parallel_for_simd:
6781   case OMPD_teams_distribute:
6782   case OMPD_teams_distribute_simd:
6783   case OMPD_teams_distribute_parallel_for:
6784   case OMPD_teams_distribute_parallel_for_simd:
6785   case OMPD_target_update:
6786   case OMPD_declare_simd:
6787   case OMPD_declare_variant:
6788   case OMPD_begin_declare_variant:
6789   case OMPD_end_declare_variant:
6790   case OMPD_declare_target:
6791   case OMPD_end_declare_target:
6792   case OMPD_declare_reduction:
6793   case OMPD_declare_mapper:
6794   case OMPD_taskloop:
6795   case OMPD_taskloop_simd:
6796   case OMPD_master_taskloop:
6797   case OMPD_master_taskloop_simd:
6798   case OMPD_parallel_master_taskloop:
6799   case OMPD_parallel_master_taskloop_simd:
6800   case OMPD_requires:
6801   case OMPD_metadirective:
6802   case OMPD_unknown:
6803     break;
6804   default:
6805     break;
6806   }
6807   llvm_unreachable("Unexpected directive kind.");
6808 }
6809 
6810 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6811     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6812   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6813          "Clauses associated with the teams directive expected to be emitted "
6814          "only for the host!");
6815   CGBuilderTy &Bld = CGF.Builder;
6816   int32_t DefaultNT = -1;
6817   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6818   if (NumTeams != nullptr) {
6819     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6820 
6821     switch (DirectiveKind) {
6822     case OMPD_target: {
6823       const auto *CS = D.getInnermostCapturedStmt();
6824       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6825       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6826       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6827                                                   /*IgnoreResultAssign*/ true);
6828       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6829                              /*isSigned=*/true);
6830     }
6831     case OMPD_target_teams:
6832     case OMPD_target_teams_distribute:
6833     case OMPD_target_teams_distribute_simd:
6834     case OMPD_target_teams_distribute_parallel_for:
6835     case OMPD_target_teams_distribute_parallel_for_simd: {
6836       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6837       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6838                                                   /*IgnoreResultAssign*/ true);
6839       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6840                              /*isSigned=*/true);
6841     }
6842     default:
6843       break;
6844     }
6845   } else if (DefaultNT == -1) {
6846     return nullptr;
6847   }
6848 
6849   return Bld.getInt32(DefaultNT);
6850 }
6851 
6852 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6853                                   llvm::Value *DefaultThreadLimitVal) {
6854   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6855       CGF.getContext(), CS->getCapturedStmt());
6856   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6857     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6858       llvm::Value *NumThreads = nullptr;
6859       llvm::Value *CondVal = nullptr;
6860       // Handle if clause. If if clause present, the number of threads is
6861       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6862       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6863         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6864         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6865         const OMPIfClause *IfClause = nullptr;
6866         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6867           if (C->getNameModifier() == OMPD_unknown ||
6868               C->getNameModifier() == OMPD_parallel) {
6869             IfClause = C;
6870             break;
6871           }
6872         }
6873         if (IfClause) {
6874           const Expr *Cond = IfClause->getCondition();
6875           bool Result;
6876           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6877             if (!Result)
6878               return CGF.Builder.getInt32(1);
6879           } else {
6880             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6881             if (const auto *PreInit =
6882                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6883               for (const auto *I : PreInit->decls()) {
6884                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6885                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6886                 } else {
6887                   CodeGenFunction::AutoVarEmission Emission =
6888                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6889                   CGF.EmitAutoVarCleanups(Emission);
6890                 }
6891               }
6892             }
6893             CondVal = CGF.EvaluateExprAsBool(Cond);
6894           }
6895         }
6896       }
6897       // Check the value of num_threads clause iff if clause was not specified
6898       // or is not evaluated to false.
6899       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6900         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6901         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6902         const auto *NumThreadsClause =
6903             Dir->getSingleClause<OMPNumThreadsClause>();
6904         CodeGenFunction::LexicalScope Scope(
6905             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6906         if (const auto *PreInit =
6907                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6908           for (const auto *I : PreInit->decls()) {
6909             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6910               CGF.EmitVarDecl(cast<VarDecl>(*I));
6911             } else {
6912               CodeGenFunction::AutoVarEmission Emission =
6913                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6914               CGF.EmitAutoVarCleanups(Emission);
6915             }
6916           }
6917         }
6918         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6919         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6920                                                /*isSigned=*/false);
6921         if (DefaultThreadLimitVal)
6922           NumThreads = CGF.Builder.CreateSelect(
6923               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6924               DefaultThreadLimitVal, NumThreads);
6925       } else {
6926         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6927                                            : CGF.Builder.getInt32(0);
6928       }
6929       // Process condition of the if clause.
6930       if (CondVal) {
6931         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6932                                               CGF.Builder.getInt32(1));
6933       }
6934       return NumThreads;
6935     }
6936     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6937       return CGF.Builder.getInt32(1);
6938     return DefaultThreadLimitVal;
6939   }
6940   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6941                                : CGF.Builder.getInt32(0);
6942 }
6943 
6944 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6945     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6946     int32_t &DefaultVal) {
6947   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6948   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6949          "Expected target-based executable directive.");
6950 
6951   switch (DirectiveKind) {
6952   case OMPD_target:
6953     // Teams have no clause thread_limit
6954     return nullptr;
6955   case OMPD_target_teams:
6956   case OMPD_target_teams_distribute:
6957     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6958       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6959       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6960       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6961         if (auto Constant =
6962                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6963           DefaultVal = Constant->getExtValue();
6964       return ThreadLimit;
6965     }
6966     return nullptr;
6967   case OMPD_target_parallel:
6968   case OMPD_target_parallel_for:
6969   case OMPD_target_parallel_for_simd:
6970   case OMPD_target_teams_distribute_parallel_for:
6971   case OMPD_target_teams_distribute_parallel_for_simd: {
6972     Expr *ThreadLimit = nullptr;
6973     Expr *NumThreads = nullptr;
6974     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6975       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6976       ThreadLimit = ThreadLimitClause->getThreadLimit();
6977       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6978         if (auto Constant =
6979                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6980           DefaultVal = Constant->getExtValue();
6981     }
6982     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6983       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6984       NumThreads = NumThreadsClause->getNumThreads();
6985       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6986         if (auto Constant =
6987                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6988           if (Constant->getExtValue() < DefaultVal) {
6989             DefaultVal = Constant->getExtValue();
6990             ThreadLimit = NumThreads;
6991           }
6992         }
6993       }
6994     }
6995     return ThreadLimit;
6996   }
6997   case OMPD_target_teams_distribute_simd:
6998   case OMPD_target_simd:
6999     DefaultVal = 1;
7000     return nullptr;
7001   case OMPD_parallel:
7002   case OMPD_for:
7003   case OMPD_parallel_for:
7004   case OMPD_parallel_master:
7005   case OMPD_parallel_sections:
7006   case OMPD_for_simd:
7007   case OMPD_parallel_for_simd:
7008   case OMPD_cancel:
7009   case OMPD_cancellation_point:
7010   case OMPD_ordered:
7011   case OMPD_threadprivate:
7012   case OMPD_allocate:
7013   case OMPD_task:
7014   case OMPD_simd:
7015   case OMPD_tile:
7016   case OMPD_unroll:
7017   case OMPD_sections:
7018   case OMPD_section:
7019   case OMPD_single:
7020   case OMPD_master:
7021   case OMPD_critical:
7022   case OMPD_taskyield:
7023   case OMPD_barrier:
7024   case OMPD_taskwait:
7025   case OMPD_taskgroup:
7026   case OMPD_atomic:
7027   case OMPD_flush:
7028   case OMPD_depobj:
7029   case OMPD_scan:
7030   case OMPD_teams:
7031   case OMPD_target_data:
7032   case OMPD_target_exit_data:
7033   case OMPD_target_enter_data:
7034   case OMPD_distribute:
7035   case OMPD_distribute_simd:
7036   case OMPD_distribute_parallel_for:
7037   case OMPD_distribute_parallel_for_simd:
7038   case OMPD_teams_distribute:
7039   case OMPD_teams_distribute_simd:
7040   case OMPD_teams_distribute_parallel_for:
7041   case OMPD_teams_distribute_parallel_for_simd:
7042   case OMPD_target_update:
7043   case OMPD_declare_simd:
7044   case OMPD_declare_variant:
7045   case OMPD_begin_declare_variant:
7046   case OMPD_end_declare_variant:
7047   case OMPD_declare_target:
7048   case OMPD_end_declare_target:
7049   case OMPD_declare_reduction:
7050   case OMPD_declare_mapper:
7051   case OMPD_taskloop:
7052   case OMPD_taskloop_simd:
7053   case OMPD_master_taskloop:
7054   case OMPD_master_taskloop_simd:
7055   case OMPD_parallel_master_taskloop:
7056   case OMPD_parallel_master_taskloop_simd:
7057   case OMPD_requires:
7058   case OMPD_unknown:
7059     break;
7060   default:
7061     break;
7062   }
7063   llvm_unreachable("Unsupported directive kind.");
7064 }
7065 
7066 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7067     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7068   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7069          "Clauses associated with the teams directive expected to be emitted "
7070          "only for the host!");
7071   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7072   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7073          "Expected target-based executable directive.");
7074   CGBuilderTy &Bld = CGF.Builder;
7075   llvm::Value *ThreadLimitVal = nullptr;
7076   llvm::Value *NumThreadsVal = nullptr;
7077   switch (DirectiveKind) {
7078   case OMPD_target: {
7079     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7080     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7081       return NumThreads;
7082     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7083         CGF.getContext(), CS->getCapturedStmt());
7084     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7085       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7086         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7087         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7088         const auto *ThreadLimitClause =
7089             Dir->getSingleClause<OMPThreadLimitClause>();
7090         CodeGenFunction::LexicalScope Scope(
7091             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7092         if (const auto *PreInit =
7093                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7094           for (const auto *I : PreInit->decls()) {
7095             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7096               CGF.EmitVarDecl(cast<VarDecl>(*I));
7097             } else {
7098               CodeGenFunction::AutoVarEmission Emission =
7099                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7100               CGF.EmitAutoVarCleanups(Emission);
7101             }
7102           }
7103         }
7104         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7105             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7106         ThreadLimitVal =
7107             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7108       }
7109       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7110           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7111         CS = Dir->getInnermostCapturedStmt();
7112         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7113             CGF.getContext(), CS->getCapturedStmt());
7114         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7115       }
7116       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7117           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7118         CS = Dir->getInnermostCapturedStmt();
7119         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7120           return NumThreads;
7121       }
7122       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7123         return Bld.getInt32(1);
7124     }
7125     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7126   }
7127   case OMPD_target_teams: {
7128     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7129       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7130       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7131       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7132           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7133       ThreadLimitVal =
7134           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7135     }
7136     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7137     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7138       return NumThreads;
7139     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7140         CGF.getContext(), CS->getCapturedStmt());
7141     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7142       if (Dir->getDirectiveKind() == OMPD_distribute) {
7143         CS = Dir->getInnermostCapturedStmt();
7144         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7145           return NumThreads;
7146       }
7147     }
7148     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7149   }
7150   case OMPD_target_teams_distribute:
7151     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7152       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7153       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7154       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7155           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7156       ThreadLimitVal =
7157           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7158     }
7159     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7160   case OMPD_target_parallel:
7161   case OMPD_target_parallel_for:
7162   case OMPD_target_parallel_for_simd:
7163   case OMPD_target_teams_distribute_parallel_for:
7164   case OMPD_target_teams_distribute_parallel_for_simd: {
7165     llvm::Value *CondVal = nullptr;
7166     // Handle if clause. If if clause present, the number of threads is
7167     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7168     if (D.hasClausesOfKind<OMPIfClause>()) {
7169       const OMPIfClause *IfClause = nullptr;
7170       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7171         if (C->getNameModifier() == OMPD_unknown ||
7172             C->getNameModifier() == OMPD_parallel) {
7173           IfClause = C;
7174           break;
7175         }
7176       }
7177       if (IfClause) {
7178         const Expr *Cond = IfClause->getCondition();
7179         bool Result;
7180         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7181           if (!Result)
7182             return Bld.getInt32(1);
7183         } else {
7184           CodeGenFunction::RunCleanupsScope Scope(CGF);
7185           CondVal = CGF.EvaluateExprAsBool(Cond);
7186         }
7187       }
7188     }
7189     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7190       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7191       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7192       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7193           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7194       ThreadLimitVal =
7195           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7196     }
7197     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7198       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7199       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7200       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7201           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7202       NumThreadsVal =
7203           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7204       ThreadLimitVal = ThreadLimitVal
7205                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7206                                                                 ThreadLimitVal),
7207                                               NumThreadsVal, ThreadLimitVal)
7208                            : NumThreadsVal;
7209     }
7210     if (!ThreadLimitVal)
7211       ThreadLimitVal = Bld.getInt32(0);
7212     if (CondVal)
7213       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7214     return ThreadLimitVal;
7215   }
7216   case OMPD_target_teams_distribute_simd:
7217   case OMPD_target_simd:
7218     return Bld.getInt32(1);
7219   case OMPD_parallel:
7220   case OMPD_for:
7221   case OMPD_parallel_for:
7222   case OMPD_parallel_master:
7223   case OMPD_parallel_sections:
7224   case OMPD_for_simd:
7225   case OMPD_parallel_for_simd:
7226   case OMPD_cancel:
7227   case OMPD_cancellation_point:
7228   case OMPD_ordered:
7229   case OMPD_threadprivate:
7230   case OMPD_allocate:
7231   case OMPD_task:
7232   case OMPD_simd:
7233   case OMPD_tile:
7234   case OMPD_unroll:
7235   case OMPD_sections:
7236   case OMPD_section:
7237   case OMPD_single:
7238   case OMPD_master:
7239   case OMPD_critical:
7240   case OMPD_taskyield:
7241   case OMPD_barrier:
7242   case OMPD_taskwait:
7243   case OMPD_taskgroup:
7244   case OMPD_atomic:
7245   case OMPD_flush:
7246   case OMPD_depobj:
7247   case OMPD_scan:
7248   case OMPD_teams:
7249   case OMPD_target_data:
7250   case OMPD_target_exit_data:
7251   case OMPD_target_enter_data:
7252   case OMPD_distribute:
7253   case OMPD_distribute_simd:
7254   case OMPD_distribute_parallel_for:
7255   case OMPD_distribute_parallel_for_simd:
7256   case OMPD_teams_distribute:
7257   case OMPD_teams_distribute_simd:
7258   case OMPD_teams_distribute_parallel_for:
7259   case OMPD_teams_distribute_parallel_for_simd:
7260   case OMPD_target_update:
7261   case OMPD_declare_simd:
7262   case OMPD_declare_variant:
7263   case OMPD_begin_declare_variant:
7264   case OMPD_end_declare_variant:
7265   case OMPD_declare_target:
7266   case OMPD_end_declare_target:
7267   case OMPD_declare_reduction:
7268   case OMPD_declare_mapper:
7269   case OMPD_taskloop:
7270   case OMPD_taskloop_simd:
7271   case OMPD_master_taskloop:
7272   case OMPD_master_taskloop_simd:
7273   case OMPD_parallel_master_taskloop:
7274   case OMPD_parallel_master_taskloop_simd:
7275   case OMPD_requires:
7276   case OMPD_metadirective:
7277   case OMPD_unknown:
7278     break;
7279   default:
7280     break;
7281   }
7282   llvm_unreachable("Unsupported directive kind.");
7283 }
7284 
7285 namespace {
7286 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7287 
7288 // Utility to handle information from clauses associated with a given
7289 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7290 // It provides a convenient interface to obtain the information and generate
7291 // code for that information.
7292 class MappableExprsHandler {
7293 public:
7294   /// Values for bit flags used to specify the mapping type for
7295   /// offloading.
7296   enum OpenMPOffloadMappingFlags : uint64_t {
7297     /// No flags
7298     OMP_MAP_NONE = 0x0,
7299     /// Allocate memory on the device and move data from host to device.
7300     OMP_MAP_TO = 0x01,
7301     /// Allocate memory on the device and move data from device to host.
7302     OMP_MAP_FROM = 0x02,
7303     /// Always perform the requested mapping action on the element, even
7304     /// if it was already mapped before.
7305     OMP_MAP_ALWAYS = 0x04,
7306     /// Delete the element from the device environment, ignoring the
7307     /// current reference count associated with the element.
7308     OMP_MAP_DELETE = 0x08,
7309     /// The element being mapped is a pointer-pointee pair; both the
7310     /// pointer and the pointee should be mapped.
7311     OMP_MAP_PTR_AND_OBJ = 0x10,
7312     /// This flags signals that the base address of an entry should be
7313     /// passed to the target kernel as an argument.
7314     OMP_MAP_TARGET_PARAM = 0x20,
7315     /// Signal that the runtime library has to return the device pointer
7316     /// in the current position for the data being mapped. Used when we have the
7317     /// use_device_ptr or use_device_addr clause.
7318     OMP_MAP_RETURN_PARAM = 0x40,
7319     /// This flag signals that the reference being passed is a pointer to
7320     /// private data.
7321     OMP_MAP_PRIVATE = 0x80,
7322     /// Pass the element to the device by value.
7323     OMP_MAP_LITERAL = 0x100,
7324     /// Implicit map
7325     OMP_MAP_IMPLICIT = 0x200,
7326     /// Close is a hint to the runtime to allocate memory close to
7327     /// the target device.
7328     OMP_MAP_CLOSE = 0x400,
7329     /// 0x800 is reserved for compatibility with XLC.
7330     /// Produce a runtime error if the data is not already allocated.
7331     OMP_MAP_PRESENT = 0x1000,
7332     // Increment and decrement a separate reference counter so that the data
7333     // cannot be unmapped within the associated region.  Thus, this flag is
7334     // intended to be used on 'target' and 'target data' directives because they
7335     // are inherently structured.  It is not intended to be used on 'target
7336     // enter data' and 'target exit data' directives because they are inherently
7337     // dynamic.
7338     // This is an OpenMP extension for the sake of OpenACC support.
7339     OMP_MAP_OMPX_HOLD = 0x2000,
7340     /// Signal that the runtime library should use args as an array of
7341     /// descriptor_dim pointers and use args_size as dims. Used when we have
7342     /// non-contiguous list items in target update directive
7343     OMP_MAP_NON_CONTIG = 0x100000000000,
7344     /// The 16 MSBs of the flags indicate whether the entry is member of some
7345     /// struct/class.
7346     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7347     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7348   };
7349 
7350   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7351   static unsigned getFlagMemberOffset() {
7352     unsigned Offset = 0;
7353     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7354          Remain = Remain >> 1)
7355       Offset++;
7356     return Offset;
7357   }
7358 
7359   /// Class that holds debugging information for a data mapping to be passed to
7360   /// the runtime library.
7361   class MappingExprInfo {
7362     /// The variable declaration used for the data mapping.
7363     const ValueDecl *MapDecl = nullptr;
7364     /// The original expression used in the map clause, or null if there is
7365     /// none.
7366     const Expr *MapExpr = nullptr;
7367 
7368   public:
7369     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7370         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7371 
7372     const ValueDecl *getMapDecl() const { return MapDecl; }
7373     const Expr *getMapExpr() const { return MapExpr; }
7374   };
7375 
7376   /// Class that associates information with a base pointer to be passed to the
7377   /// runtime library.
7378   class BasePointerInfo {
7379     /// The base pointer.
7380     llvm::Value *Ptr = nullptr;
7381     /// The base declaration that refers to this device pointer, or null if
7382     /// there is none.
7383     const ValueDecl *DevPtrDecl = nullptr;
7384 
7385   public:
7386     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7387         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7388     llvm::Value *operator*() const { return Ptr; }
7389     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7390     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7391   };
7392 
7393   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7394   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7395   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7396   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7397   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7398   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7399   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7400 
7401   /// This structure contains combined information generated for mappable
7402   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7403   /// mappers, and non-contiguous information.
7404   struct MapCombinedInfoTy {
7405     struct StructNonContiguousInfo {
7406       bool IsNonContiguous = false;
7407       MapDimArrayTy Dims;
7408       MapNonContiguousArrayTy Offsets;
7409       MapNonContiguousArrayTy Counts;
7410       MapNonContiguousArrayTy Strides;
7411     };
7412     MapExprsArrayTy Exprs;
7413     MapBaseValuesArrayTy BasePointers;
7414     MapValuesArrayTy Pointers;
7415     MapValuesArrayTy Sizes;
7416     MapFlagsArrayTy Types;
7417     MapMappersArrayTy Mappers;
7418     StructNonContiguousInfo NonContigInfo;
7419 
7420     /// Append arrays in \a CurInfo.
7421     void append(MapCombinedInfoTy &CurInfo) {
7422       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7423       BasePointers.append(CurInfo.BasePointers.begin(),
7424                           CurInfo.BasePointers.end());
7425       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7426       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7427       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7428       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7429       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7430                                  CurInfo.NonContigInfo.Dims.end());
7431       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7432                                     CurInfo.NonContigInfo.Offsets.end());
7433       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7434                                    CurInfo.NonContigInfo.Counts.end());
7435       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7436                                     CurInfo.NonContigInfo.Strides.end());
7437     }
7438   };
7439 
7440   /// Map between a struct and the its lowest & highest elements which have been
7441   /// mapped.
7442   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7443   ///                    HE(FieldIndex, Pointer)}
7444   struct StructRangeInfoTy {
7445     MapCombinedInfoTy PreliminaryMapData;
7446     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7447         0, Address::invalid()};
7448     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7449         0, Address::invalid()};
7450     Address Base = Address::invalid();
7451     Address LB = Address::invalid();
7452     bool IsArraySection = false;
7453     bool HasCompleteRecord = false;
7454   };
7455 
7456 private:
7457   /// Kind that defines how a device pointer has to be returned.
7458   struct MapInfo {
7459     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7460     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7461     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7462     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7463     bool ReturnDevicePointer = false;
7464     bool IsImplicit = false;
7465     const ValueDecl *Mapper = nullptr;
7466     const Expr *VarRef = nullptr;
7467     bool ForDeviceAddr = false;
7468 
7469     MapInfo() = default;
7470     MapInfo(
7471         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7472         OpenMPMapClauseKind MapType,
7473         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7474         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7475         bool ReturnDevicePointer, bool IsImplicit,
7476         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7477         bool ForDeviceAddr = false)
7478         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7479           MotionModifiers(MotionModifiers),
7480           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7481           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7482   };
7483 
7484   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7485   /// member and there is no map information about it, then emission of that
7486   /// entry is deferred until the whole struct has been processed.
7487   struct DeferredDevicePtrEntryTy {
7488     const Expr *IE = nullptr;
7489     const ValueDecl *VD = nullptr;
7490     bool ForDeviceAddr = false;
7491 
7492     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7493                              bool ForDeviceAddr)
7494         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7495   };
7496 
7497   /// The target directive from where the mappable clauses were extracted. It
7498   /// is either a executable directive or a user-defined mapper directive.
7499   llvm::PointerUnion<const OMPExecutableDirective *,
7500                      const OMPDeclareMapperDecl *>
7501       CurDir;
7502 
7503   /// Function the directive is being generated for.
7504   CodeGenFunction &CGF;
7505 
7506   /// Set of all first private variables in the current directive.
7507   /// bool data is set to true if the variable is implicitly marked as
7508   /// firstprivate, false otherwise.
7509   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7510 
7511   /// Map between device pointer declarations and their expression components.
7512   /// The key value for declarations in 'this' is null.
7513   llvm::DenseMap<
7514       const ValueDecl *,
7515       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7516       DevPointersMap;
7517 
7518   /// Map between lambda declarations and their map type.
7519   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7520 
7521   llvm::Value *getExprTypeSize(const Expr *E) const {
7522     QualType ExprTy = E->getType().getCanonicalType();
7523 
7524     // Calculate the size for array shaping expression.
7525     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7526       llvm::Value *Size =
7527           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7528       for (const Expr *SE : OAE->getDimensions()) {
7529         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7530         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7531                                       CGF.getContext().getSizeType(),
7532                                       SE->getExprLoc());
7533         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7534       }
7535       return Size;
7536     }
7537 
7538     // Reference types are ignored for mapping purposes.
7539     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7540       ExprTy = RefTy->getPointeeType().getCanonicalType();
7541 
7542     // Given that an array section is considered a built-in type, we need to
7543     // do the calculation based on the length of the section instead of relying
7544     // on CGF.getTypeSize(E->getType()).
7545     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7546       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7547                             OAE->getBase()->IgnoreParenImpCasts())
7548                             .getCanonicalType();
7549 
7550       // If there is no length associated with the expression and lower bound is
7551       // not specified too, that means we are using the whole length of the
7552       // base.
7553       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7554           !OAE->getLowerBound())
7555         return CGF.getTypeSize(BaseTy);
7556 
7557       llvm::Value *ElemSize;
7558       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7559         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7560       } else {
7561         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7562         assert(ATy && "Expecting array type if not a pointer type.");
7563         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7564       }
7565 
7566       // If we don't have a length at this point, that is because we have an
7567       // array section with a single element.
7568       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7569         return ElemSize;
7570 
7571       if (const Expr *LenExpr = OAE->getLength()) {
7572         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7573         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7574                                              CGF.getContext().getSizeType(),
7575                                              LenExpr->getExprLoc());
7576         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7577       }
7578       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7579              OAE->getLowerBound() && "expected array_section[lb:].");
7580       // Size = sizetype - lb * elemtype;
7581       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7582       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7583       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7584                                        CGF.getContext().getSizeType(),
7585                                        OAE->getLowerBound()->getExprLoc());
7586       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7587       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7588       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7589       LengthVal = CGF.Builder.CreateSelect(
7590           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7591       return LengthVal;
7592     }
7593     return CGF.getTypeSize(ExprTy);
7594   }
7595 
7596   /// Return the corresponding bits for a given map clause modifier. Add
7597   /// a flag marking the map as a pointer if requested. Add a flag marking the
7598   /// map as the first one of a series of maps that relate to the same map
7599   /// expression.
7600   OpenMPOffloadMappingFlags getMapTypeBits(
7601       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7602       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7603       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7604     OpenMPOffloadMappingFlags Bits =
7605         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7606     switch (MapType) {
7607     case OMPC_MAP_alloc:
7608     case OMPC_MAP_release:
7609       // alloc and release is the default behavior in the runtime library,  i.e.
7610       // if we don't pass any bits alloc/release that is what the runtime is
7611       // going to do. Therefore, we don't need to signal anything for these two
7612       // type modifiers.
7613       break;
7614     case OMPC_MAP_to:
7615       Bits |= OMP_MAP_TO;
7616       break;
7617     case OMPC_MAP_from:
7618       Bits |= OMP_MAP_FROM;
7619       break;
7620     case OMPC_MAP_tofrom:
7621       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7622       break;
7623     case OMPC_MAP_delete:
7624       Bits |= OMP_MAP_DELETE;
7625       break;
7626     case OMPC_MAP_unknown:
7627       llvm_unreachable("Unexpected map type!");
7628     }
7629     if (AddPtrFlag)
7630       Bits |= OMP_MAP_PTR_AND_OBJ;
7631     if (AddIsTargetParamFlag)
7632       Bits |= OMP_MAP_TARGET_PARAM;
7633     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7634       Bits |= OMP_MAP_ALWAYS;
7635     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7636       Bits |= OMP_MAP_CLOSE;
7637     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7638         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7639       Bits |= OMP_MAP_PRESENT;
7640     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7641       Bits |= OMP_MAP_OMPX_HOLD;
7642     if (IsNonContiguous)
7643       Bits |= OMP_MAP_NON_CONTIG;
7644     return Bits;
7645   }
7646 
7647   /// Return true if the provided expression is a final array section. A
7648   /// final array section, is one whose length can't be proved to be one.
7649   bool isFinalArraySectionExpression(const Expr *E) const {
7650     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7651 
7652     // It is not an array section and therefore not a unity-size one.
7653     if (!OASE)
7654       return false;
7655 
7656     // An array section with no colon always refer to a single element.
7657     if (OASE->getColonLocFirst().isInvalid())
7658       return false;
7659 
7660     const Expr *Length = OASE->getLength();
7661 
7662     // If we don't have a length we have to check if the array has size 1
7663     // for this dimension. Also, we should always expect a length if the
7664     // base type is pointer.
7665     if (!Length) {
7666       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7667                              OASE->getBase()->IgnoreParenImpCasts())
7668                              .getCanonicalType();
7669       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7670         return ATy->getSize().getSExtValue() != 1;
7671       // If we don't have a constant dimension length, we have to consider
7672       // the current section as having any size, so it is not necessarily
7673       // unitary. If it happen to be unity size, that's user fault.
7674       return true;
7675     }
7676 
7677     // Check if the length evaluates to 1.
7678     Expr::EvalResult Result;
7679     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7680       return true; // Can have more that size 1.
7681 
7682     llvm::APSInt ConstLength = Result.Val.getInt();
7683     return ConstLength.getSExtValue() != 1;
7684   }
7685 
7686   /// Generate the base pointers, section pointers, sizes, map type bits, and
7687   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7688   /// map type, map or motion modifiers, and expression components.
7689   /// \a IsFirstComponent should be set to true if the provided set of
7690   /// components is the first associated with a capture.
7691   void generateInfoForComponentList(
7692       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7693       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7694       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7695       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7696       bool IsFirstComponentList, bool IsImplicit,
7697       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7698       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7699       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7700           OverlappedElements = llvm::None) const {
7701     // The following summarizes what has to be generated for each map and the
7702     // types below. The generated information is expressed in this order:
7703     // base pointer, section pointer, size, flags
7704     // (to add to the ones that come from the map type and modifier).
7705     //
7706     // double d;
7707     // int i[100];
7708     // float *p;
7709     //
7710     // struct S1 {
7711     //   int i;
7712     //   float f[50];
7713     // }
7714     // struct S2 {
7715     //   int i;
7716     //   float f[50];
7717     //   S1 s;
7718     //   double *p;
7719     //   struct S2 *ps;
7720     //   int &ref;
7721     // }
7722     // S2 s;
7723     // S2 *ps;
7724     //
7725     // map(d)
7726     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7727     //
7728     // map(i)
7729     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7730     //
7731     // map(i[1:23])
7732     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7733     //
7734     // map(p)
7735     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7736     //
7737     // map(p[1:24])
7738     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7739     // in unified shared memory mode or for local pointers
7740     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7741     //
7742     // map(s)
7743     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7744     //
7745     // map(s.i)
7746     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7747     //
7748     // map(s.s.f)
7749     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7750     //
7751     // map(s.p)
7752     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7753     //
7754     // map(to: s.p[:22])
7755     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7756     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7757     // &(s.p), &(s.p[0]), 22*sizeof(double),
7758     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7759     // (*) alloc space for struct members, only this is a target parameter
7760     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7761     //      optimizes this entry out, same in the examples below)
7762     // (***) map the pointee (map: to)
7763     //
7764     // map(to: s.ref)
7765     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7766     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7767     // (*) alloc space for struct members, only this is a target parameter
7768     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7769     //      optimizes this entry out, same in the examples below)
7770     // (***) map the pointee (map: to)
7771     //
7772     // map(s.ps)
7773     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7774     //
7775     // map(from: s.ps->s.i)
7776     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7777     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7778     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7779     //
7780     // map(to: s.ps->ps)
7781     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7782     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7783     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7784     //
7785     // map(s.ps->ps->ps)
7786     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7787     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7788     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7789     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7790     //
7791     // map(to: s.ps->ps->s.f[:22])
7792     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7793     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7794     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7795     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7796     //
7797     // map(ps)
7798     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7799     //
7800     // map(ps->i)
7801     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7802     //
7803     // map(ps->s.f)
7804     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7805     //
7806     // map(from: ps->p)
7807     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7808     //
7809     // map(to: ps->p[:22])
7810     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7811     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7812     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7813     //
7814     // map(ps->ps)
7815     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7816     //
7817     // map(from: ps->ps->s.i)
7818     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7819     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7820     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7821     //
7822     // map(from: ps->ps->ps)
7823     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7824     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7825     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7826     //
7827     // map(ps->ps->ps->ps)
7828     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7829     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7830     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7831     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7832     //
7833     // map(to: ps->ps->ps->s.f[:22])
7834     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7835     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7836     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7837     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7838     //
7839     // map(to: s.f[:22]) map(from: s.p[:33])
7840     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7841     //     sizeof(double*) (**), TARGET_PARAM
7842     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7843     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7844     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7845     // (*) allocate contiguous space needed to fit all mapped members even if
7846     //     we allocate space for members not mapped (in this example,
7847     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7848     //     them as well because they fall between &s.f[0] and &s.p)
7849     //
7850     // map(from: s.f[:22]) map(to: ps->p[:33])
7851     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7852     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7853     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7854     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7855     // (*) the struct this entry pertains to is the 2nd element in the list of
7856     //     arguments, hence MEMBER_OF(2)
7857     //
7858     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7859     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7860     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7861     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7862     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7863     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7864     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7865     // (*) the struct this entry pertains to is the 4th element in the list
7866     //     of arguments, hence MEMBER_OF(4)
7867 
7868     // Track if the map information being generated is the first for a capture.
7869     bool IsCaptureFirstInfo = IsFirstComponentList;
7870     // When the variable is on a declare target link or in a to clause with
7871     // unified memory, a reference is needed to hold the host/device address
7872     // of the variable.
7873     bool RequiresReference = false;
7874 
7875     // Scan the components from the base to the complete expression.
7876     auto CI = Components.rbegin();
7877     auto CE = Components.rend();
7878     auto I = CI;
7879 
7880     // Track if the map information being generated is the first for a list of
7881     // components.
7882     bool IsExpressionFirstInfo = true;
7883     bool FirstPointerInComplexData = false;
7884     Address BP = Address::invalid();
7885     const Expr *AssocExpr = I->getAssociatedExpression();
7886     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7887     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7888     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7889 
7890     if (isa<MemberExpr>(AssocExpr)) {
7891       // The base is the 'this' pointer. The content of the pointer is going
7892       // to be the base of the field being mapped.
7893       BP = CGF.LoadCXXThisAddress();
7894     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7895                (OASE &&
7896                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7897       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7898     } else if (OAShE &&
7899                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7900       BP = Address(
7901           CGF.EmitScalarExpr(OAShE->getBase()),
7902           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7903     } else {
7904       // The base is the reference to the variable.
7905       // BP = &Var.
7906       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7907       if (const auto *VD =
7908               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7909         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7910                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7911           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7912               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7913                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7914             RequiresReference = true;
7915             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7916           }
7917         }
7918       }
7919 
7920       // If the variable is a pointer and is being dereferenced (i.e. is not
7921       // the last component), the base has to be the pointer itself, not its
7922       // reference. References are ignored for mapping purposes.
7923       QualType Ty =
7924           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7925       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7926         // No need to generate individual map information for the pointer, it
7927         // can be associated with the combined storage if shared memory mode is
7928         // active or the base declaration is not global variable.
7929         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7930         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7931             !VD || VD->hasLocalStorage())
7932           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7933         else
7934           FirstPointerInComplexData = true;
7935         ++I;
7936       }
7937     }
7938 
7939     // Track whether a component of the list should be marked as MEMBER_OF some
7940     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7941     // in a component list should be marked as MEMBER_OF, all subsequent entries
7942     // do not belong to the base struct. E.g.
7943     // struct S2 s;
7944     // s.ps->ps->ps->f[:]
7945     //   (1) (2) (3) (4)
7946     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7947     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7948     // is the pointee of ps(2) which is not member of struct s, so it should not
7949     // be marked as such (it is still PTR_AND_OBJ).
7950     // The variable is initialized to false so that PTR_AND_OBJ entries which
7951     // are not struct members are not considered (e.g. array of pointers to
7952     // data).
7953     bool ShouldBeMemberOf = false;
7954 
7955     // Variable keeping track of whether or not we have encountered a component
7956     // in the component list which is a member expression. Useful when we have a
7957     // pointer or a final array section, in which case it is the previous
7958     // component in the list which tells us whether we have a member expression.
7959     // E.g. X.f[:]
7960     // While processing the final array section "[:]" it is "f" which tells us
7961     // whether we are dealing with a member of a declared struct.
7962     const MemberExpr *EncounteredME = nullptr;
7963 
7964     // Track for the total number of dimension. Start from one for the dummy
7965     // dimension.
7966     uint64_t DimSize = 1;
7967 
7968     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7969     bool IsPrevMemberReference = false;
7970 
7971     for (; I != CE; ++I) {
7972       // If the current component is member of a struct (parent struct) mark it.
7973       if (!EncounteredME) {
7974         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7975         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7976         // as MEMBER_OF the parent struct.
7977         if (EncounteredME) {
7978           ShouldBeMemberOf = true;
7979           // Do not emit as complex pointer if this is actually not array-like
7980           // expression.
7981           if (FirstPointerInComplexData) {
7982             QualType Ty = std::prev(I)
7983                               ->getAssociatedDeclaration()
7984                               ->getType()
7985                               .getNonReferenceType();
7986             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7987             FirstPointerInComplexData = false;
7988           }
7989         }
7990       }
7991 
7992       auto Next = std::next(I);
7993 
7994       // We need to generate the addresses and sizes if this is the last
7995       // component, if the component is a pointer or if it is an array section
7996       // whose length can't be proved to be one. If this is a pointer, it
7997       // becomes the base address for the following components.
7998 
7999       // A final array section, is one whose length can't be proved to be one.
8000       // If the map item is non-contiguous then we don't treat any array section
8001       // as final array section.
8002       bool IsFinalArraySection =
8003           !IsNonContiguous &&
8004           isFinalArraySectionExpression(I->getAssociatedExpression());
8005 
8006       // If we have a declaration for the mapping use that, otherwise use
8007       // the base declaration of the map clause.
8008       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8009                                      ? I->getAssociatedDeclaration()
8010                                      : BaseDecl;
8011       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8012                                                : MapExpr;
8013 
8014       // Get information on whether the element is a pointer. Have to do a
8015       // special treatment for array sections given that they are built-in
8016       // types.
8017       const auto *OASE =
8018           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8019       const auto *OAShE =
8020           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8021       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8022       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8023       bool IsPointer =
8024           OAShE ||
8025           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8026                        .getCanonicalType()
8027                        ->isAnyPointerType()) ||
8028           I->getAssociatedExpression()->getType()->isAnyPointerType();
8029       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8030                                MapDecl &&
8031                                MapDecl->getType()->isLValueReferenceType();
8032       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8033 
8034       if (OASE)
8035         ++DimSize;
8036 
8037       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8038           IsFinalArraySection) {
8039         // If this is not the last component, we expect the pointer to be
8040         // associated with an array expression or member expression.
8041         assert((Next == CE ||
8042                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8043                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8044                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8045                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8046                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8047                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8048                "Unexpected expression");
8049 
8050         Address LB = Address::invalid();
8051         Address LowestElem = Address::invalid();
8052         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8053                                        const MemberExpr *E) {
8054           const Expr *BaseExpr = E->getBase();
8055           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8056           // scalar.
8057           LValue BaseLV;
8058           if (E->isArrow()) {
8059             LValueBaseInfo BaseInfo;
8060             TBAAAccessInfo TBAAInfo;
8061             Address Addr =
8062                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8063             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8064             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8065           } else {
8066             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8067           }
8068           return BaseLV;
8069         };
8070         if (OAShE) {
8071           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8072                                     CGF.getContext().getTypeAlignInChars(
8073                                         OAShE->getBase()->getType()));
8074         } else if (IsMemberReference) {
8075           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8076           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8077           LowestElem = CGF.EmitLValueForFieldInitialization(
8078                               BaseLVal, cast<FieldDecl>(MapDecl))
8079                            .getAddress(CGF);
8080           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8081                    .getAddress(CGF);
8082         } else {
8083           LowestElem = LB =
8084               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8085                   .getAddress(CGF);
8086         }
8087 
8088         // If this component is a pointer inside the base struct then we don't
8089         // need to create any entry for it - it will be combined with the object
8090         // it is pointing to into a single PTR_AND_OBJ entry.
8091         bool IsMemberPointerOrAddr =
8092             EncounteredME &&
8093             (((IsPointer || ForDeviceAddr) &&
8094               I->getAssociatedExpression() == EncounteredME) ||
8095              (IsPrevMemberReference && !IsPointer) ||
8096              (IsMemberReference && Next != CE &&
8097               !Next->getAssociatedExpression()->getType()->isPointerType()));
8098         if (!OverlappedElements.empty() && Next == CE) {
8099           // Handle base element with the info for overlapped elements.
8100           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8101           assert(!IsPointer &&
8102                  "Unexpected base element with the pointer type.");
8103           // Mark the whole struct as the struct that requires allocation on the
8104           // device.
8105           PartialStruct.LowestElem = {0, LowestElem};
8106           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8107               I->getAssociatedExpression()->getType());
8108           Address HB = CGF.Builder.CreateConstGEP(
8109               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8110                                                               CGF.VoidPtrTy),
8111               TypeSize.getQuantity() - 1);
8112           PartialStruct.HighestElem = {
8113               std::numeric_limits<decltype(
8114                   PartialStruct.HighestElem.first)>::max(),
8115               HB};
8116           PartialStruct.Base = BP;
8117           PartialStruct.LB = LB;
8118           assert(
8119               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8120               "Overlapped elements must be used only once for the variable.");
8121           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8122           // Emit data for non-overlapped data.
8123           OpenMPOffloadMappingFlags Flags =
8124               OMP_MAP_MEMBER_OF |
8125               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8126                              /*AddPtrFlag=*/false,
8127                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8128           llvm::Value *Size = nullptr;
8129           // Do bitcopy of all non-overlapped structure elements.
8130           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8131                    Component : OverlappedElements) {
8132             Address ComponentLB = Address::invalid();
8133             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8134                  Component) {
8135               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8136                 const auto *FD = dyn_cast<FieldDecl>(VD);
8137                 if (FD && FD->getType()->isLValueReferenceType()) {
8138                   const auto *ME =
8139                       cast<MemberExpr>(MC.getAssociatedExpression());
8140                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8141                   ComponentLB =
8142                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8143                           .getAddress(CGF);
8144                 } else {
8145                   ComponentLB =
8146                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8147                           .getAddress(CGF);
8148                 }
8149                 Size = CGF.Builder.CreatePtrDiff(
8150                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8151                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8152                 break;
8153               }
8154             }
8155             assert(Size && "Failed to determine structure size");
8156             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8157             CombinedInfo.BasePointers.push_back(BP.getPointer());
8158             CombinedInfo.Pointers.push_back(LB.getPointer());
8159             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8160                 Size, CGF.Int64Ty, /*isSigned=*/true));
8161             CombinedInfo.Types.push_back(Flags);
8162             CombinedInfo.Mappers.push_back(nullptr);
8163             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8164                                                                       : 1);
8165             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8166           }
8167           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8168           CombinedInfo.BasePointers.push_back(BP.getPointer());
8169           CombinedInfo.Pointers.push_back(LB.getPointer());
8170           Size = CGF.Builder.CreatePtrDiff(
8171               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8172               CGF.EmitCastToVoidPtr(LB.getPointer()));
8173           CombinedInfo.Sizes.push_back(
8174               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8175           CombinedInfo.Types.push_back(Flags);
8176           CombinedInfo.Mappers.push_back(nullptr);
8177           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8178                                                                     : 1);
8179           break;
8180         }
8181         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8182         if (!IsMemberPointerOrAddr ||
8183             (Next == CE && MapType != OMPC_MAP_unknown)) {
8184           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8185           CombinedInfo.BasePointers.push_back(BP.getPointer());
8186           CombinedInfo.Pointers.push_back(LB.getPointer());
8187           CombinedInfo.Sizes.push_back(
8188               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8189           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8190                                                                     : 1);
8191 
8192           // If Mapper is valid, the last component inherits the mapper.
8193           bool HasMapper = Mapper && Next == CE;
8194           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8195 
8196           // We need to add a pointer flag for each map that comes from the
8197           // same expression except for the first one. We also need to signal
8198           // this map is the first one that relates with the current capture
8199           // (there is a set of entries for each capture).
8200           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8201               MapType, MapModifiers, MotionModifiers, IsImplicit,
8202               !IsExpressionFirstInfo || RequiresReference ||
8203                   FirstPointerInComplexData || IsMemberReference,
8204               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8205 
8206           if (!IsExpressionFirstInfo || IsMemberReference) {
8207             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8208             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8209             if (IsPointer || (IsMemberReference && Next != CE))
8210               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8211                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8212 
8213             if (ShouldBeMemberOf) {
8214               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8215               // should be later updated with the correct value of MEMBER_OF.
8216               Flags |= OMP_MAP_MEMBER_OF;
8217               // From now on, all subsequent PTR_AND_OBJ entries should not be
8218               // marked as MEMBER_OF.
8219               ShouldBeMemberOf = false;
8220             }
8221           }
8222 
8223           CombinedInfo.Types.push_back(Flags);
8224         }
8225 
8226         // If we have encountered a member expression so far, keep track of the
8227         // mapped member. If the parent is "*this", then the value declaration
8228         // is nullptr.
8229         if (EncounteredME) {
8230           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8231           unsigned FieldIndex = FD->getFieldIndex();
8232 
8233           // Update info about the lowest and highest elements for this struct
8234           if (!PartialStruct.Base.isValid()) {
8235             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8236             if (IsFinalArraySection) {
8237               Address HB =
8238                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8239                       .getAddress(CGF);
8240               PartialStruct.HighestElem = {FieldIndex, HB};
8241             } else {
8242               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8243             }
8244             PartialStruct.Base = BP;
8245             PartialStruct.LB = BP;
8246           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8247             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8248           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8249             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8250           }
8251         }
8252 
8253         // Need to emit combined struct for array sections.
8254         if (IsFinalArraySection || IsNonContiguous)
8255           PartialStruct.IsArraySection = true;
8256 
8257         // If we have a final array section, we are done with this expression.
8258         if (IsFinalArraySection)
8259           break;
8260 
8261         // The pointer becomes the base for the next element.
8262         if (Next != CE)
8263           BP = IsMemberReference ? LowestElem : LB;
8264 
8265         IsExpressionFirstInfo = false;
8266         IsCaptureFirstInfo = false;
8267         FirstPointerInComplexData = false;
8268         IsPrevMemberReference = IsMemberReference;
8269       } else if (FirstPointerInComplexData) {
8270         QualType Ty = Components.rbegin()
8271                           ->getAssociatedDeclaration()
8272                           ->getType()
8273                           .getNonReferenceType();
8274         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8275         FirstPointerInComplexData = false;
8276       }
8277     }
8278     // If ran into the whole component - allocate the space for the whole
8279     // record.
8280     if (!EncounteredME)
8281       PartialStruct.HasCompleteRecord = true;
8282 
8283     if (!IsNonContiguous)
8284       return;
8285 
8286     const ASTContext &Context = CGF.getContext();
8287 
8288     // For supporting stride in array section, we need to initialize the first
8289     // dimension size as 1, first offset as 0, and first count as 1
8290     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8291     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8292     MapValuesArrayTy CurStrides;
8293     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8294     uint64_t ElementTypeSize;
8295 
8296     // Collect Size information for each dimension and get the element size as
8297     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8298     // should be [10, 10] and the first stride is 4 btyes.
8299     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8300          Components) {
8301       const Expr *AssocExpr = Component.getAssociatedExpression();
8302       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8303 
8304       if (!OASE)
8305         continue;
8306 
8307       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8308       auto *CAT = Context.getAsConstantArrayType(Ty);
8309       auto *VAT = Context.getAsVariableArrayType(Ty);
8310 
8311       // We need all the dimension size except for the last dimension.
8312       assert((VAT || CAT || &Component == &*Components.begin()) &&
8313              "Should be either ConstantArray or VariableArray if not the "
8314              "first Component");
8315 
8316       // Get element size if CurStrides is empty.
8317       if (CurStrides.empty()) {
8318         const Type *ElementType = nullptr;
8319         if (CAT)
8320           ElementType = CAT->getElementType().getTypePtr();
8321         else if (VAT)
8322           ElementType = VAT->getElementType().getTypePtr();
8323         else
8324           assert(&Component == &*Components.begin() &&
8325                  "Only expect pointer (non CAT or VAT) when this is the "
8326                  "first Component");
8327         // If ElementType is null, then it means the base is a pointer
8328         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8329         // for next iteration.
8330         if (ElementType) {
8331           // For the case that having pointer as base, we need to remove one
8332           // level of indirection.
8333           if (&Component != &*Components.begin())
8334             ElementType = ElementType->getPointeeOrArrayElementType();
8335           ElementTypeSize =
8336               Context.getTypeSizeInChars(ElementType).getQuantity();
8337           CurStrides.push_back(
8338               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8339         }
8340       }
8341       // Get dimension value except for the last dimension since we don't need
8342       // it.
8343       if (DimSizes.size() < Components.size() - 1) {
8344         if (CAT)
8345           DimSizes.push_back(llvm::ConstantInt::get(
8346               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8347         else if (VAT)
8348           DimSizes.push_back(CGF.Builder.CreateIntCast(
8349               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8350               /*IsSigned=*/false));
8351       }
8352     }
8353 
8354     // Skip the dummy dimension since we have already have its information.
8355     auto DI = DimSizes.begin() + 1;
8356     // Product of dimension.
8357     llvm::Value *DimProd =
8358         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8359 
8360     // Collect info for non-contiguous. Notice that offset, count, and stride
8361     // are only meaningful for array-section, so we insert a null for anything
8362     // other than array-section.
8363     // Also, the size of offset, count, and stride are not the same as
8364     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8365     // count, and stride are the same as the number of non-contiguous
8366     // declaration in target update to/from clause.
8367     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8368          Components) {
8369       const Expr *AssocExpr = Component.getAssociatedExpression();
8370 
8371       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8372         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8373             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8374             /*isSigned=*/false);
8375         CurOffsets.push_back(Offset);
8376         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8377         CurStrides.push_back(CurStrides.back());
8378         continue;
8379       }
8380 
8381       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8382 
8383       if (!OASE)
8384         continue;
8385 
8386       // Offset
8387       const Expr *OffsetExpr = OASE->getLowerBound();
8388       llvm::Value *Offset = nullptr;
8389       if (!OffsetExpr) {
8390         // If offset is absent, then we just set it to zero.
8391         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8392       } else {
8393         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8394                                            CGF.Int64Ty,
8395                                            /*isSigned=*/false);
8396       }
8397       CurOffsets.push_back(Offset);
8398 
8399       // Count
8400       const Expr *CountExpr = OASE->getLength();
8401       llvm::Value *Count = nullptr;
8402       if (!CountExpr) {
8403         // In Clang, once a high dimension is an array section, we construct all
8404         // the lower dimension as array section, however, for case like
8405         // arr[0:2][2], Clang construct the inner dimension as an array section
8406         // but it actually is not in an array section form according to spec.
8407         if (!OASE->getColonLocFirst().isValid() &&
8408             !OASE->getColonLocSecond().isValid()) {
8409           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8410         } else {
8411           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8412           // When the length is absent it defaults to ⌈(size −
8413           // lower-bound)/stride⌉, where size is the size of the array
8414           // dimension.
8415           const Expr *StrideExpr = OASE->getStride();
8416           llvm::Value *Stride =
8417               StrideExpr
8418                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8419                                               CGF.Int64Ty, /*isSigned=*/false)
8420                   : nullptr;
8421           if (Stride)
8422             Count = CGF.Builder.CreateUDiv(
8423                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8424           else
8425             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8426         }
8427       } else {
8428         Count = CGF.EmitScalarExpr(CountExpr);
8429       }
8430       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8431       CurCounts.push_back(Count);
8432 
8433       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8434       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8435       //              Offset      Count     Stride
8436       //    D0          0           1         4    (int)    <- dummy dimension
8437       //    D1          0           2         8    (2 * (1) * 4)
8438       //    D2          1           2         20   (1 * (1 * 5) * 4)
8439       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8440       const Expr *StrideExpr = OASE->getStride();
8441       llvm::Value *Stride =
8442           StrideExpr
8443               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8444                                           CGF.Int64Ty, /*isSigned=*/false)
8445               : nullptr;
8446       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8447       if (Stride)
8448         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8449       else
8450         CurStrides.push_back(DimProd);
8451       if (DI != DimSizes.end())
8452         ++DI;
8453     }
8454 
8455     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8456     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8457     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8458   }
8459 
8460   /// Return the adjusted map modifiers if the declaration a capture refers to
8461   /// appears in a first-private clause. This is expected to be used only with
8462   /// directives that start with 'target'.
8463   MappableExprsHandler::OpenMPOffloadMappingFlags
8464   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8465     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8466 
8467     // A first private variable captured by reference will use only the
8468     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8469     // declaration is known as first-private in this handler.
8470     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8471       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8472         return MappableExprsHandler::OMP_MAP_TO |
8473                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8474       return MappableExprsHandler::OMP_MAP_PRIVATE |
8475              MappableExprsHandler::OMP_MAP_TO;
8476     }
8477     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8478     if (I != LambdasMap.end())
8479       // for map(to: lambda): using user specified map type.
8480       return getMapTypeBits(
8481           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8482           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8483           /*AddPtrFlag=*/false,
8484           /*AddIsTargetParamFlag=*/false,
8485           /*isNonContiguous=*/false);
8486     return MappableExprsHandler::OMP_MAP_TO |
8487            MappableExprsHandler::OMP_MAP_FROM;
8488   }
8489 
8490   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8491     // Rotate by getFlagMemberOffset() bits.
8492     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8493                                                   << getFlagMemberOffset());
8494   }
8495 
8496   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8497                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8498     // If the entry is PTR_AND_OBJ but has not been marked with the special
8499     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8500     // marked as MEMBER_OF.
8501     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8502         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8503       return;
8504 
8505     // Reset the placeholder value to prepare the flag for the assignment of the
8506     // proper MEMBER_OF value.
8507     Flags &= ~OMP_MAP_MEMBER_OF;
8508     Flags |= MemberOfFlag;
8509   }
8510 
8511   void getPlainLayout(const CXXRecordDecl *RD,
8512                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8513                       bool AsBase) const {
8514     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8515 
8516     llvm::StructType *St =
8517         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8518 
8519     unsigned NumElements = St->getNumElements();
8520     llvm::SmallVector<
8521         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8522         RecordLayout(NumElements);
8523 
8524     // Fill bases.
8525     for (const auto &I : RD->bases()) {
8526       if (I.isVirtual())
8527         continue;
8528       const auto *Base = I.getType()->getAsCXXRecordDecl();
8529       // Ignore empty bases.
8530       if (Base->isEmpty() || CGF.getContext()
8531                                  .getASTRecordLayout(Base)
8532                                  .getNonVirtualSize()
8533                                  .isZero())
8534         continue;
8535 
8536       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8537       RecordLayout[FieldIndex] = Base;
8538     }
8539     // Fill in virtual bases.
8540     for (const auto &I : RD->vbases()) {
8541       const auto *Base = I.getType()->getAsCXXRecordDecl();
8542       // Ignore empty bases.
8543       if (Base->isEmpty())
8544         continue;
8545       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8546       if (RecordLayout[FieldIndex])
8547         continue;
8548       RecordLayout[FieldIndex] = Base;
8549     }
8550     // Fill in all the fields.
8551     assert(!RD->isUnion() && "Unexpected union.");
8552     for (const auto *Field : RD->fields()) {
8553       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8554       // will fill in later.)
8555       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8556         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8557         RecordLayout[FieldIndex] = Field;
8558       }
8559     }
8560     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8561              &Data : RecordLayout) {
8562       if (Data.isNull())
8563         continue;
8564       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8565         getPlainLayout(Base, Layout, /*AsBase=*/true);
8566       else
8567         Layout.push_back(Data.get<const FieldDecl *>());
8568     }
8569   }
8570 
8571   /// Generate all the base pointers, section pointers, sizes, map types, and
8572   /// mappers for the extracted mappable expressions (all included in \a
8573   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8574   /// pair of the relevant declaration and index where it occurs is appended to
8575   /// the device pointers info array.
8576   void generateAllInfoForClauses(
8577       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8578       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8579           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8580     // We have to process the component lists that relate with the same
8581     // declaration in a single chunk so that we can generate the map flags
8582     // correctly. Therefore, we organize all lists in a map.
8583     enum MapKind { Present, Allocs, Other, Total };
8584     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8585                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8586         Info;
8587 
8588     // Helper function to fill the information map for the different supported
8589     // clauses.
8590     auto &&InfoGen =
8591         [&Info, &SkipVarSet](
8592             const ValueDecl *D, MapKind Kind,
8593             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8594             OpenMPMapClauseKind MapType,
8595             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8596             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8597             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8598             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8599           if (SkipVarSet.contains(D))
8600             return;
8601           auto It = Info.find(D);
8602           if (It == Info.end())
8603             It = Info
8604                      .insert(std::make_pair(
8605                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8606                      .first;
8607           It->second[Kind].emplace_back(
8608               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8609               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8610         };
8611 
8612     for (const auto *Cl : Clauses) {
8613       const auto *C = dyn_cast<OMPMapClause>(Cl);
8614       if (!C)
8615         continue;
8616       MapKind Kind = Other;
8617       if (llvm::is_contained(C->getMapTypeModifiers(),
8618                              OMPC_MAP_MODIFIER_present))
8619         Kind = Present;
8620       else if (C->getMapType() == OMPC_MAP_alloc)
8621         Kind = Allocs;
8622       const auto *EI = C->getVarRefs().begin();
8623       for (const auto L : C->component_lists()) {
8624         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8625         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8626                 C->getMapTypeModifiers(), llvm::None,
8627                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8628                 E);
8629         ++EI;
8630       }
8631     }
8632     for (const auto *Cl : Clauses) {
8633       const auto *C = dyn_cast<OMPToClause>(Cl);
8634       if (!C)
8635         continue;
8636       MapKind Kind = Other;
8637       if (llvm::is_contained(C->getMotionModifiers(),
8638                              OMPC_MOTION_MODIFIER_present))
8639         Kind = Present;
8640       const auto *EI = C->getVarRefs().begin();
8641       for (const auto L : C->component_lists()) {
8642         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8643                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8644                 C->isImplicit(), std::get<2>(L), *EI);
8645         ++EI;
8646       }
8647     }
8648     for (const auto *Cl : Clauses) {
8649       const auto *C = dyn_cast<OMPFromClause>(Cl);
8650       if (!C)
8651         continue;
8652       MapKind Kind = Other;
8653       if (llvm::is_contained(C->getMotionModifiers(),
8654                              OMPC_MOTION_MODIFIER_present))
8655         Kind = Present;
8656       const auto *EI = C->getVarRefs().begin();
8657       for (const auto L : C->component_lists()) {
8658         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8659                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8660                 C->isImplicit(), std::get<2>(L), *EI);
8661         ++EI;
8662       }
8663     }
8664 
8665     // Look at the use_device_ptr clause information and mark the existing map
8666     // entries as such. If there is no map information for an entry in the
8667     // use_device_ptr list, we create one with map type 'alloc' and zero size
8668     // section. It is the user fault if that was not mapped before. If there is
8669     // no map information and the pointer is a struct member, then we defer the
8670     // emission of that entry until the whole struct has been processed.
8671     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8672                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8673         DeferredInfo;
8674     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8675 
8676     for (const auto *Cl : Clauses) {
8677       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8678       if (!C)
8679         continue;
8680       for (const auto L : C->component_lists()) {
8681         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8682             std::get<1>(L);
8683         assert(!Components.empty() &&
8684                "Not expecting empty list of components!");
8685         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8686         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8687         const Expr *IE = Components.back().getAssociatedExpression();
8688         // If the first component is a member expression, we have to look into
8689         // 'this', which maps to null in the map of map information. Otherwise
8690         // look directly for the information.
8691         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8692 
8693         // We potentially have map information for this declaration already.
8694         // Look for the first set of components that refer to it.
8695         if (It != Info.end()) {
8696           bool Found = false;
8697           for (auto &Data : It->second) {
8698             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8699               return MI.Components.back().getAssociatedDeclaration() == VD;
8700             });
8701             // If we found a map entry, signal that the pointer has to be
8702             // returned and move on to the next declaration. Exclude cases where
8703             // the base pointer is mapped as array subscript, array section or
8704             // array shaping. The base address is passed as a pointer to base in
8705             // this case and cannot be used as a base for use_device_ptr list
8706             // item.
8707             if (CI != Data.end()) {
8708               auto PrevCI = std::next(CI->Components.rbegin());
8709               const auto *VarD = dyn_cast<VarDecl>(VD);
8710               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8711                   isa<MemberExpr>(IE) ||
8712                   !VD->getType().getNonReferenceType()->isPointerType() ||
8713                   PrevCI == CI->Components.rend() ||
8714                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8715                   VarD->hasLocalStorage()) {
8716                 CI->ReturnDevicePointer = true;
8717                 Found = true;
8718                 break;
8719               }
8720             }
8721           }
8722           if (Found)
8723             continue;
8724         }
8725 
8726         // We didn't find any match in our map information - generate a zero
8727         // size array section - if the pointer is a struct member we defer this
8728         // action until the whole struct has been processed.
8729         if (isa<MemberExpr>(IE)) {
8730           // Insert the pointer into Info to be processed by
8731           // generateInfoForComponentList. Because it is a member pointer
8732           // without a pointee, no entry will be generated for it, therefore
8733           // we need to generate one after the whole struct has been processed.
8734           // Nonetheless, generateInfoForComponentList must be called to take
8735           // the pointer into account for the calculation of the range of the
8736           // partial struct.
8737           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8738                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8739                   nullptr);
8740           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8741         } else {
8742           llvm::Value *Ptr =
8743               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8744           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8745           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8746           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8747           UseDevicePtrCombinedInfo.Sizes.push_back(
8748               llvm::Constant::getNullValue(CGF.Int64Ty));
8749           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8750           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8751         }
8752       }
8753     }
8754 
8755     // Look at the use_device_addr clause information and mark the existing map
8756     // entries as such. If there is no map information for an entry in the
8757     // use_device_addr list, we create one with map type 'alloc' and zero size
8758     // section. It is the user fault if that was not mapped before. If there is
8759     // no map information and the pointer is a struct member, then we defer the
8760     // emission of that entry until the whole struct has been processed.
8761     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8762     for (const auto *Cl : Clauses) {
8763       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8764       if (!C)
8765         continue;
8766       for (const auto L : C->component_lists()) {
8767         assert(!std::get<1>(L).empty() &&
8768                "Not expecting empty list of components!");
8769         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8770         if (!Processed.insert(VD).second)
8771           continue;
8772         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8773         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8774         // If the first component is a member expression, we have to look into
8775         // 'this', which maps to null in the map of map information. Otherwise
8776         // look directly for the information.
8777         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8778 
8779         // We potentially have map information for this declaration already.
8780         // Look for the first set of components that refer to it.
8781         if (It != Info.end()) {
8782           bool Found = false;
8783           for (auto &Data : It->second) {
8784             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8785               return MI.Components.back().getAssociatedDeclaration() == VD;
8786             });
8787             // If we found a map entry, signal that the pointer has to be
8788             // returned and move on to the next declaration.
8789             if (CI != Data.end()) {
8790               CI->ReturnDevicePointer = true;
8791               Found = true;
8792               break;
8793             }
8794           }
8795           if (Found)
8796             continue;
8797         }
8798 
8799         // We didn't find any match in our map information - generate a zero
8800         // size array section - if the pointer is a struct member we defer this
8801         // action until the whole struct has been processed.
8802         if (isa<MemberExpr>(IE)) {
8803           // Insert the pointer into Info to be processed by
8804           // generateInfoForComponentList. Because it is a member pointer
8805           // without a pointee, no entry will be generated for it, therefore
8806           // we need to generate one after the whole struct has been processed.
8807           // Nonetheless, generateInfoForComponentList must be called to take
8808           // the pointer into account for the calculation of the range of the
8809           // partial struct.
8810           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8811                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8812                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8813           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8814         } else {
8815           llvm::Value *Ptr;
8816           if (IE->isGLValue())
8817             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8818           else
8819             Ptr = CGF.EmitScalarExpr(IE);
8820           CombinedInfo.Exprs.push_back(VD);
8821           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8822           CombinedInfo.Pointers.push_back(Ptr);
8823           CombinedInfo.Sizes.push_back(
8824               llvm::Constant::getNullValue(CGF.Int64Ty));
8825           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8826           CombinedInfo.Mappers.push_back(nullptr);
8827         }
8828       }
8829     }
8830 
8831     for (const auto &Data : Info) {
8832       StructRangeInfoTy PartialStruct;
8833       // Temporary generated information.
8834       MapCombinedInfoTy CurInfo;
8835       const Decl *D = Data.first;
8836       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8837       for (const auto &M : Data.second) {
8838         for (const MapInfo &L : M) {
8839           assert(!L.Components.empty() &&
8840                  "Not expecting declaration with no component lists.");
8841 
8842           // Remember the current base pointer index.
8843           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8844           CurInfo.NonContigInfo.IsNonContiguous =
8845               L.Components.back().isNonContiguous();
8846           generateInfoForComponentList(
8847               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8848               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8849               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8850 
8851           // If this entry relates with a device pointer, set the relevant
8852           // declaration and add the 'return pointer' flag.
8853           if (L.ReturnDevicePointer) {
8854             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8855                    "Unexpected number of mapped base pointers.");
8856 
8857             const ValueDecl *RelevantVD =
8858                 L.Components.back().getAssociatedDeclaration();
8859             assert(RelevantVD &&
8860                    "No relevant declaration related with device pointer??");
8861 
8862             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8863                 RelevantVD);
8864             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8865           }
8866         }
8867       }
8868 
8869       // Append any pending zero-length pointers which are struct members and
8870       // used with use_device_ptr or use_device_addr.
8871       auto CI = DeferredInfo.find(Data.first);
8872       if (CI != DeferredInfo.end()) {
8873         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8874           llvm::Value *BasePtr;
8875           llvm::Value *Ptr;
8876           if (L.ForDeviceAddr) {
8877             if (L.IE->isGLValue())
8878               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8879             else
8880               Ptr = this->CGF.EmitScalarExpr(L.IE);
8881             BasePtr = Ptr;
8882             // Entry is RETURN_PARAM. Also, set the placeholder value
8883             // MEMBER_OF=FFFF so that the entry is later updated with the
8884             // correct value of MEMBER_OF.
8885             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8886           } else {
8887             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8888             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8889                                              L.IE->getExprLoc());
8890             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8891             // placeholder value MEMBER_OF=FFFF so that the entry is later
8892             // updated with the correct value of MEMBER_OF.
8893             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8894                                     OMP_MAP_MEMBER_OF);
8895           }
8896           CurInfo.Exprs.push_back(L.VD);
8897           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8898           CurInfo.Pointers.push_back(Ptr);
8899           CurInfo.Sizes.push_back(
8900               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8901           CurInfo.Mappers.push_back(nullptr);
8902         }
8903       }
8904       // If there is an entry in PartialStruct it means we have a struct with
8905       // individual members mapped. Emit an extra combined entry.
8906       if (PartialStruct.Base.isValid()) {
8907         CurInfo.NonContigInfo.Dims.push_back(0);
8908         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8909       }
8910 
8911       // We need to append the results of this capture to what we already
8912       // have.
8913       CombinedInfo.append(CurInfo);
8914     }
8915     // Append data for use_device_ptr clauses.
8916     CombinedInfo.append(UseDevicePtrCombinedInfo);
8917   }
8918 
8919 public:
8920   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8921       : CurDir(&Dir), CGF(CGF) {
8922     // Extract firstprivate clause information.
8923     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8924       for (const auto *D : C->varlists())
8925         FirstPrivateDecls.try_emplace(
8926             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8927     // Extract implicit firstprivates from uses_allocators clauses.
8928     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8929       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8930         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8931         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8932           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8933                                         /*Implicit=*/true);
8934         else if (const auto *VD = dyn_cast<VarDecl>(
8935                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8936                          ->getDecl()))
8937           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8938       }
8939     }
8940     // Extract device pointer clause information.
8941     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8942       for (auto L : C->component_lists())
8943         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8944     // Extract map information.
8945     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8946       if (C->getMapType() != OMPC_MAP_to)
8947         continue;
8948       for (auto L : C->component_lists()) {
8949         const ValueDecl *VD = std::get<0>(L);
8950         const auto *RD = VD ? VD->getType()
8951                                   .getCanonicalType()
8952                                   .getNonReferenceType()
8953                                   ->getAsCXXRecordDecl()
8954                             : nullptr;
8955         if (RD && RD->isLambda())
8956           LambdasMap.try_emplace(std::get<0>(L), C);
8957       }
8958     }
8959   }
8960 
8961   /// Constructor for the declare mapper directive.
8962   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8963       : CurDir(&Dir), CGF(CGF) {}
8964 
8965   /// Generate code for the combined entry if we have a partially mapped struct
8966   /// and take care of the mapping flags of the arguments corresponding to
8967   /// individual struct members.
8968   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8969                          MapFlagsArrayTy &CurTypes,
8970                          const StructRangeInfoTy &PartialStruct,
8971                          const ValueDecl *VD = nullptr,
8972                          bool NotTargetParams = true) const {
8973     if (CurTypes.size() == 1 &&
8974         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8975         !PartialStruct.IsArraySection)
8976       return;
8977     Address LBAddr = PartialStruct.LowestElem.second;
8978     Address HBAddr = PartialStruct.HighestElem.second;
8979     if (PartialStruct.HasCompleteRecord) {
8980       LBAddr = PartialStruct.LB;
8981       HBAddr = PartialStruct.LB;
8982     }
8983     CombinedInfo.Exprs.push_back(VD);
8984     // Base is the base of the struct
8985     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8986     // Pointer is the address of the lowest element
8987     llvm::Value *LB = LBAddr.getPointer();
8988     CombinedInfo.Pointers.push_back(LB);
8989     // There should not be a mapper for a combined entry.
8990     CombinedInfo.Mappers.push_back(nullptr);
8991     // Size is (addr of {highest+1} element) - (addr of lowest element)
8992     llvm::Value *HB = HBAddr.getPointer();
8993     llvm::Value *HAddr =
8994         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8995     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8996     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8997     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8998     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8999                                                   /*isSigned=*/false);
9000     CombinedInfo.Sizes.push_back(Size);
9001     // Map type is always TARGET_PARAM, if generate info for captures.
9002     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
9003                                                  : OMP_MAP_TARGET_PARAM);
9004     // If any element has the present modifier, then make sure the runtime
9005     // doesn't attempt to allocate the struct.
9006     if (CurTypes.end() !=
9007         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9008           return Type & OMP_MAP_PRESENT;
9009         }))
9010       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
9011     // Remove TARGET_PARAM flag from the first element
9012     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
9013     // If any element has the ompx_hold modifier, then make sure the runtime
9014     // uses the hold reference count for the struct as a whole so that it won't
9015     // be unmapped by an extra dynamic reference count decrement.  Add it to all
9016     // elements as well so the runtime knows which reference count to check
9017     // when determining whether it's time for device-to-host transfers of
9018     // individual elements.
9019     if (CurTypes.end() !=
9020         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9021           return Type & OMP_MAP_OMPX_HOLD;
9022         })) {
9023       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9024       for (auto &M : CurTypes)
9025         M |= OMP_MAP_OMPX_HOLD;
9026     }
9027 
9028     // All other current entries will be MEMBER_OF the combined entry
9029     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9030     // 0xFFFF in the MEMBER_OF field).
9031     OpenMPOffloadMappingFlags MemberOfFlag =
9032         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9033     for (auto &M : CurTypes)
9034       setCorrectMemberOfFlag(M, MemberOfFlag);
9035   }
9036 
9037   /// Generate all the base pointers, section pointers, sizes, map types, and
9038   /// mappers for the extracted mappable expressions (all included in \a
9039   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9040   /// pair of the relevant declaration and index where it occurs is appended to
9041   /// the device pointers info array.
9042   void generateAllInfo(
9043       MapCombinedInfoTy &CombinedInfo,
9044       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9045           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9046     assert(CurDir.is<const OMPExecutableDirective *>() &&
9047            "Expect a executable directive");
9048     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9049     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9050   }
9051 
9052   /// Generate all the base pointers, section pointers, sizes, map types, and
9053   /// mappers for the extracted map clauses of user-defined mapper (all included
9054   /// in \a CombinedInfo).
9055   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9056     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9057            "Expect a declare mapper directive");
9058     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9059     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9060   }
9061 
9062   /// Emit capture info for lambdas for variables captured by reference.
9063   void generateInfoForLambdaCaptures(
9064       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9065       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9066     const auto *RD = VD->getType()
9067                          .getCanonicalType()
9068                          .getNonReferenceType()
9069                          ->getAsCXXRecordDecl();
9070     if (!RD || !RD->isLambda())
9071       return;
9072     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9073     LValue VDLVal = CGF.MakeAddrLValue(
9074         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9075     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9076     FieldDecl *ThisCapture = nullptr;
9077     RD->getCaptureFields(Captures, ThisCapture);
9078     if (ThisCapture) {
9079       LValue ThisLVal =
9080           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9081       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9082       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9083                                  VDLVal.getPointer(CGF));
9084       CombinedInfo.Exprs.push_back(VD);
9085       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9086       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9087       CombinedInfo.Sizes.push_back(
9088           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9089                                     CGF.Int64Ty, /*isSigned=*/true));
9090       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9091                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9092       CombinedInfo.Mappers.push_back(nullptr);
9093     }
9094     for (const LambdaCapture &LC : RD->captures()) {
9095       if (!LC.capturesVariable())
9096         continue;
9097       const VarDecl *VD = LC.getCapturedVar();
9098       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9099         continue;
9100       auto It = Captures.find(VD);
9101       assert(It != Captures.end() && "Found lambda capture without field.");
9102       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9103       if (LC.getCaptureKind() == LCK_ByRef) {
9104         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9105         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9106                                    VDLVal.getPointer(CGF));
9107         CombinedInfo.Exprs.push_back(VD);
9108         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9109         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9110         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9111             CGF.getTypeSize(
9112                 VD->getType().getCanonicalType().getNonReferenceType()),
9113             CGF.Int64Ty, /*isSigned=*/true));
9114       } else {
9115         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9116         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9117                                    VDLVal.getPointer(CGF));
9118         CombinedInfo.Exprs.push_back(VD);
9119         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9120         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9121         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9122       }
9123       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9124                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9125       CombinedInfo.Mappers.push_back(nullptr);
9126     }
9127   }
9128 
9129   /// Set correct indices for lambdas captures.
9130   void adjustMemberOfForLambdaCaptures(
9131       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9132       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9133       MapFlagsArrayTy &Types) const {
9134     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9135       // Set correct member_of idx for all implicit lambda captures.
9136       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9137                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9138         continue;
9139       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9140       assert(BasePtr && "Unable to find base lambda address.");
9141       int TgtIdx = -1;
9142       for (unsigned J = I; J > 0; --J) {
9143         unsigned Idx = J - 1;
9144         if (Pointers[Idx] != BasePtr)
9145           continue;
9146         TgtIdx = Idx;
9147         break;
9148       }
9149       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9150       // All other current entries will be MEMBER_OF the combined entry
9151       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9152       // 0xFFFF in the MEMBER_OF field).
9153       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9154       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9155     }
9156   }
9157 
9158   /// Generate the base pointers, section pointers, sizes, map types, and
9159   /// mappers associated to a given capture (all included in \a CombinedInfo).
9160   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9161                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9162                               StructRangeInfoTy &PartialStruct) const {
9163     assert(!Cap->capturesVariableArrayType() &&
9164            "Not expecting to generate map info for a variable array type!");
9165 
9166     // We need to know when we generating information for the first component
9167     const ValueDecl *VD = Cap->capturesThis()
9168                               ? nullptr
9169                               : Cap->getCapturedVar()->getCanonicalDecl();
9170 
9171     // for map(to: lambda): skip here, processing it in
9172     // generateDefaultMapInfo
9173     if (LambdasMap.count(VD))
9174       return;
9175 
9176     // If this declaration appears in a is_device_ptr clause we just have to
9177     // pass the pointer by value. If it is a reference to a declaration, we just
9178     // pass its value.
9179     if (DevPointersMap.count(VD)) {
9180       CombinedInfo.Exprs.push_back(VD);
9181       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9182       CombinedInfo.Pointers.push_back(Arg);
9183       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9184           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9185           /*isSigned=*/true));
9186       CombinedInfo.Types.push_back(
9187           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9188           OMP_MAP_TARGET_PARAM);
9189       CombinedInfo.Mappers.push_back(nullptr);
9190       return;
9191     }
9192 
9193     using MapData =
9194         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9195                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9196                    const ValueDecl *, const Expr *>;
9197     SmallVector<MapData, 4> DeclComponentLists;
9198     assert(CurDir.is<const OMPExecutableDirective *>() &&
9199            "Expect a executable directive");
9200     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9201     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9202       const auto *EI = C->getVarRefs().begin();
9203       for (const auto L : C->decl_component_lists(VD)) {
9204         const ValueDecl *VDecl, *Mapper;
9205         // The Expression is not correct if the mapping is implicit
9206         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9207         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9208         std::tie(VDecl, Components, Mapper) = L;
9209         assert(VDecl == VD && "We got information for the wrong declaration??");
9210         assert(!Components.empty() &&
9211                "Not expecting declaration with no component lists.");
9212         DeclComponentLists.emplace_back(Components, C->getMapType(),
9213                                         C->getMapTypeModifiers(),
9214                                         C->isImplicit(), Mapper, E);
9215         ++EI;
9216       }
9217     }
9218     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9219                                              const MapData &RHS) {
9220       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9221       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9222       bool HasPresent =
9223           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9224       bool HasAllocs = MapType == OMPC_MAP_alloc;
9225       MapModifiers = std::get<2>(RHS);
9226       MapType = std::get<1>(LHS);
9227       bool HasPresentR =
9228           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9229       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9230       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9231     });
9232 
9233     // Find overlapping elements (including the offset from the base element).
9234     llvm::SmallDenseMap<
9235         const MapData *,
9236         llvm::SmallVector<
9237             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9238         4>
9239         OverlappedData;
9240     size_t Count = 0;
9241     for (const MapData &L : DeclComponentLists) {
9242       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9243       OpenMPMapClauseKind MapType;
9244       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9245       bool IsImplicit;
9246       const ValueDecl *Mapper;
9247       const Expr *VarRef;
9248       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9249           L;
9250       ++Count;
9251       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9252         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9253         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9254                  VarRef) = L1;
9255         auto CI = Components.rbegin();
9256         auto CE = Components.rend();
9257         auto SI = Components1.rbegin();
9258         auto SE = Components1.rend();
9259         for (; CI != CE && SI != SE; ++CI, ++SI) {
9260           if (CI->getAssociatedExpression()->getStmtClass() !=
9261               SI->getAssociatedExpression()->getStmtClass())
9262             break;
9263           // Are we dealing with different variables/fields?
9264           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9265             break;
9266         }
9267         // Found overlapping if, at least for one component, reached the head
9268         // of the components list.
9269         if (CI == CE || SI == SE) {
9270           // Ignore it if it is the same component.
9271           if (CI == CE && SI == SE)
9272             continue;
9273           const auto It = (SI == SE) ? CI : SI;
9274           // If one component is a pointer and another one is a kind of
9275           // dereference of this pointer (array subscript, section, dereference,
9276           // etc.), it is not an overlapping.
9277           // Same, if one component is a base and another component is a
9278           // dereferenced pointer memberexpr with the same base.
9279           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9280               (std::prev(It)->getAssociatedDeclaration() &&
9281                std::prev(It)
9282                    ->getAssociatedDeclaration()
9283                    ->getType()
9284                    ->isPointerType()) ||
9285               (It->getAssociatedDeclaration() &&
9286                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9287                std::next(It) != CE && std::next(It) != SE))
9288             continue;
9289           const MapData &BaseData = CI == CE ? L : L1;
9290           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9291               SI == SE ? Components : Components1;
9292           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9293           OverlappedElements.getSecond().push_back(SubData);
9294         }
9295       }
9296     }
9297     // Sort the overlapped elements for each item.
9298     llvm::SmallVector<const FieldDecl *, 4> Layout;
9299     if (!OverlappedData.empty()) {
9300       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9301       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9302       while (BaseType != OrigType) {
9303         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9304         OrigType = BaseType->getPointeeOrArrayElementType();
9305       }
9306 
9307       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9308         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9309       else {
9310         const auto *RD = BaseType->getAsRecordDecl();
9311         Layout.append(RD->field_begin(), RD->field_end());
9312       }
9313     }
9314     for (auto &Pair : OverlappedData) {
9315       llvm::stable_sort(
9316           Pair.getSecond(),
9317           [&Layout](
9318               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9319               OMPClauseMappableExprCommon::MappableExprComponentListRef
9320                   Second) {
9321             auto CI = First.rbegin();
9322             auto CE = First.rend();
9323             auto SI = Second.rbegin();
9324             auto SE = Second.rend();
9325             for (; CI != CE && SI != SE; ++CI, ++SI) {
9326               if (CI->getAssociatedExpression()->getStmtClass() !=
9327                   SI->getAssociatedExpression()->getStmtClass())
9328                 break;
9329               // Are we dealing with different variables/fields?
9330               if (CI->getAssociatedDeclaration() !=
9331                   SI->getAssociatedDeclaration())
9332                 break;
9333             }
9334 
9335             // Lists contain the same elements.
9336             if (CI == CE && SI == SE)
9337               return false;
9338 
9339             // List with less elements is less than list with more elements.
9340             if (CI == CE || SI == SE)
9341               return CI == CE;
9342 
9343             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9344             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9345             if (FD1->getParent() == FD2->getParent())
9346               return FD1->getFieldIndex() < FD2->getFieldIndex();
9347             const auto *It =
9348                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9349                   return FD == FD1 || FD == FD2;
9350                 });
9351             return *It == FD1;
9352           });
9353     }
9354 
9355     // Associated with a capture, because the mapping flags depend on it.
9356     // Go through all of the elements with the overlapped elements.
9357     bool IsFirstComponentList = true;
9358     for (const auto &Pair : OverlappedData) {
9359       const MapData &L = *Pair.getFirst();
9360       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9361       OpenMPMapClauseKind MapType;
9362       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9363       bool IsImplicit;
9364       const ValueDecl *Mapper;
9365       const Expr *VarRef;
9366       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9367           L;
9368       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9369           OverlappedComponents = Pair.getSecond();
9370       generateInfoForComponentList(
9371           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9372           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9373           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9374       IsFirstComponentList = false;
9375     }
9376     // Go through other elements without overlapped elements.
9377     for (const MapData &L : DeclComponentLists) {
9378       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9379       OpenMPMapClauseKind MapType;
9380       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9381       bool IsImplicit;
9382       const ValueDecl *Mapper;
9383       const Expr *VarRef;
9384       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9385           L;
9386       auto It = OverlappedData.find(&L);
9387       if (It == OverlappedData.end())
9388         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9389                                      Components, CombinedInfo, PartialStruct,
9390                                      IsFirstComponentList, IsImplicit, Mapper,
9391                                      /*ForDeviceAddr=*/false, VD, VarRef);
9392       IsFirstComponentList = false;
9393     }
9394   }
9395 
9396   /// Generate the default map information for a given capture \a CI,
9397   /// record field declaration \a RI and captured value \a CV.
9398   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9399                               const FieldDecl &RI, llvm::Value *CV,
9400                               MapCombinedInfoTy &CombinedInfo) const {
9401     bool IsImplicit = true;
9402     // Do the default mapping.
9403     if (CI.capturesThis()) {
9404       CombinedInfo.Exprs.push_back(nullptr);
9405       CombinedInfo.BasePointers.push_back(CV);
9406       CombinedInfo.Pointers.push_back(CV);
9407       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9408       CombinedInfo.Sizes.push_back(
9409           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9410                                     CGF.Int64Ty, /*isSigned=*/true));
9411       // Default map type.
9412       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9413     } else if (CI.capturesVariableByCopy()) {
9414       const VarDecl *VD = CI.getCapturedVar();
9415       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9416       CombinedInfo.BasePointers.push_back(CV);
9417       CombinedInfo.Pointers.push_back(CV);
9418       if (!RI.getType()->isAnyPointerType()) {
9419         // We have to signal to the runtime captures passed by value that are
9420         // not pointers.
9421         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9422         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9423             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9424       } else {
9425         // Pointers are implicitly mapped with a zero size and no flags
9426         // (other than first map that is added for all implicit maps).
9427         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9428         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9429       }
9430       auto I = FirstPrivateDecls.find(VD);
9431       if (I != FirstPrivateDecls.end())
9432         IsImplicit = I->getSecond();
9433     } else {
9434       assert(CI.capturesVariable() && "Expected captured reference.");
9435       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9436       QualType ElementType = PtrTy->getPointeeType();
9437       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9438           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9439       // The default map type for a scalar/complex type is 'to' because by
9440       // default the value doesn't have to be retrieved. For an aggregate
9441       // type, the default is 'tofrom'.
9442       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9443       const VarDecl *VD = CI.getCapturedVar();
9444       auto I = FirstPrivateDecls.find(VD);
9445       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9446       CombinedInfo.BasePointers.push_back(CV);
9447       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9448         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9449             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9450             AlignmentSource::Decl));
9451         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9452       } else {
9453         CombinedInfo.Pointers.push_back(CV);
9454       }
9455       if (I != FirstPrivateDecls.end())
9456         IsImplicit = I->getSecond();
9457     }
9458     // Every default map produces a single argument which is a target parameter.
9459     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9460 
9461     // Add flag stating this is an implicit map.
9462     if (IsImplicit)
9463       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9464 
9465     // No user-defined mapper for default mapping.
9466     CombinedInfo.Mappers.push_back(nullptr);
9467   }
9468 };
9469 } // anonymous namespace
9470 
9471 static void emitNonContiguousDescriptor(
9472     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9473     CGOpenMPRuntime::TargetDataInfo &Info) {
9474   CodeGenModule &CGM = CGF.CGM;
9475   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9476       &NonContigInfo = CombinedInfo.NonContigInfo;
9477 
9478   // Build an array of struct descriptor_dim and then assign it to
9479   // offload_args.
9480   //
9481   // struct descriptor_dim {
9482   //  uint64_t offset;
9483   //  uint64_t count;
9484   //  uint64_t stride
9485   // };
9486   ASTContext &C = CGF.getContext();
9487   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9488   RecordDecl *RD;
9489   RD = C.buildImplicitRecord("descriptor_dim");
9490   RD->startDefinition();
9491   addFieldToRecordDecl(C, RD, Int64Ty);
9492   addFieldToRecordDecl(C, RD, Int64Ty);
9493   addFieldToRecordDecl(C, RD, Int64Ty);
9494   RD->completeDefinition();
9495   QualType DimTy = C.getRecordType(RD);
9496 
9497   enum { OffsetFD = 0, CountFD, StrideFD };
9498   // We need two index variable here since the size of "Dims" is the same as the
9499   // size of Components, however, the size of offset, count, and stride is equal
9500   // to the size of base declaration that is non-contiguous.
9501   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9502     // Skip emitting ir if dimension size is 1 since it cannot be
9503     // non-contiguous.
9504     if (NonContigInfo.Dims[I] == 1)
9505       continue;
9506     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9507     QualType ArrayTy =
9508         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9509     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9510     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9511       unsigned RevIdx = EE - II - 1;
9512       LValue DimsLVal = CGF.MakeAddrLValue(
9513           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9514       // Offset
9515       LValue OffsetLVal = CGF.EmitLValueForField(
9516           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9517       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9518       // Count
9519       LValue CountLVal = CGF.EmitLValueForField(
9520           DimsLVal, *std::next(RD->field_begin(), CountFD));
9521       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9522       // Stride
9523       LValue StrideLVal = CGF.EmitLValueForField(
9524           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9525       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9526     }
9527     // args[I] = &dims
9528     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9529         DimsAddr, CGM.Int8PtrTy);
9530     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9531         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9532         Info.PointersArray, 0, I);
9533     Address PAddr(P, CGF.getPointerAlign());
9534     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9535     ++L;
9536   }
9537 }
9538 
9539 // Try to extract the base declaration from a `this->x` expression if possible.
9540 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9541   if (!E)
9542     return nullptr;
9543 
9544   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9545     if (const MemberExpr *ME =
9546             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9547       return ME->getMemberDecl();
9548   return nullptr;
9549 }
9550 
9551 /// Emit a string constant containing the names of the values mapped to the
9552 /// offloading runtime library.
9553 llvm::Constant *
9554 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9555                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9556 
9557   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9558     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9559 
9560   SourceLocation Loc;
9561   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9562     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9563       Loc = VD->getLocation();
9564     else
9565       Loc = MapExprs.getMapExpr()->getExprLoc();
9566   } else {
9567     Loc = MapExprs.getMapDecl()->getLocation();
9568   }
9569 
9570   std::string ExprName = "";
9571   if (MapExprs.getMapExpr()) {
9572     PrintingPolicy P(CGF.getContext().getLangOpts());
9573     llvm::raw_string_ostream OS(ExprName);
9574     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9575     OS.flush();
9576   } else {
9577     ExprName = MapExprs.getMapDecl()->getNameAsString();
9578   }
9579 
9580   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9581   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9582                                          PLoc.getLine(), PLoc.getColumn());
9583 }
9584 
9585 /// Emit the arrays used to pass the captures and map information to the
9586 /// offloading runtime library. If there is no map or capture information,
9587 /// return nullptr by reference.
9588 static void emitOffloadingArrays(
9589     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9590     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9591     bool IsNonContiguous = false) {
9592   CodeGenModule &CGM = CGF.CGM;
9593   ASTContext &Ctx = CGF.getContext();
9594 
9595   // Reset the array information.
9596   Info.clearArrayInfo();
9597   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9598 
9599   if (Info.NumberOfPtrs) {
9600     // Detect if we have any capture size requiring runtime evaluation of the
9601     // size so that a constant array could be eventually used.
9602     bool hasRuntimeEvaluationCaptureSize = false;
9603     for (llvm::Value *S : CombinedInfo.Sizes)
9604       if (!isa<llvm::Constant>(S)) {
9605         hasRuntimeEvaluationCaptureSize = true;
9606         break;
9607       }
9608 
9609     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9610     QualType PointerArrayType = Ctx.getConstantArrayType(
9611         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9612         /*IndexTypeQuals=*/0);
9613 
9614     Info.BasePointersArray =
9615         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9616     Info.PointersArray =
9617         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9618     Address MappersArray =
9619         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9620     Info.MappersArray = MappersArray.getPointer();
9621 
9622     // If we don't have any VLA types or other types that require runtime
9623     // evaluation, we can use a constant array for the map sizes, otherwise we
9624     // need to fill up the arrays as we do for the pointers.
9625     QualType Int64Ty =
9626         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9627     if (hasRuntimeEvaluationCaptureSize) {
9628       QualType SizeArrayType = Ctx.getConstantArrayType(
9629           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9630           /*IndexTypeQuals=*/0);
9631       Info.SizesArray =
9632           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9633     } else {
9634       // We expect all the sizes to be constant, so we collect them to create
9635       // a constant array.
9636       SmallVector<llvm::Constant *, 16> ConstSizes;
9637       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9638         if (IsNonContiguous &&
9639             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9640           ConstSizes.push_back(llvm::ConstantInt::get(
9641               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9642         } else {
9643           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9644         }
9645       }
9646 
9647       auto *SizesArrayInit = llvm::ConstantArray::get(
9648           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9649       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9650       auto *SizesArrayGbl = new llvm::GlobalVariable(
9651           CGM.getModule(), SizesArrayInit->getType(),
9652           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9653           SizesArrayInit, Name);
9654       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9655       Info.SizesArray = SizesArrayGbl;
9656     }
9657 
9658     // The map types are always constant so we don't need to generate code to
9659     // fill arrays. Instead, we create an array constant.
9660     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9661     llvm::copy(CombinedInfo.Types, Mapping.begin());
9662     std::string MaptypesName =
9663         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9664     auto *MapTypesArrayGbl =
9665         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9666     Info.MapTypesArray = MapTypesArrayGbl;
9667 
9668     // The information types are only built if there is debug information
9669     // requested.
9670     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9671       Info.MapNamesArray = llvm::Constant::getNullValue(
9672           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9673     } else {
9674       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9675         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9676       };
9677       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9678       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9679       std::string MapnamesName =
9680           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9681       auto *MapNamesArrayGbl =
9682           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9683       Info.MapNamesArray = MapNamesArrayGbl;
9684     }
9685 
9686     // If there's a present map type modifier, it must not be applied to the end
9687     // of a region, so generate a separate map type array in that case.
9688     if (Info.separateBeginEndCalls()) {
9689       bool EndMapTypesDiffer = false;
9690       for (uint64_t &Type : Mapping) {
9691         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9692           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9693           EndMapTypesDiffer = true;
9694         }
9695       }
9696       if (EndMapTypesDiffer) {
9697         MapTypesArrayGbl =
9698             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9699         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9700       }
9701     }
9702 
9703     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9704       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9705       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9706           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9707           Info.BasePointersArray, 0, I);
9708       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9709           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9710       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9711       CGF.Builder.CreateStore(BPVal, BPAddr);
9712 
9713       if (Info.requiresDevicePointerInfo())
9714         if (const ValueDecl *DevVD =
9715                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9716           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9717 
9718       llvm::Value *PVal = CombinedInfo.Pointers[I];
9719       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9720           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9721           Info.PointersArray, 0, I);
9722       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9723           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9724       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9725       CGF.Builder.CreateStore(PVal, PAddr);
9726 
9727       if (hasRuntimeEvaluationCaptureSize) {
9728         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9729             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9730             Info.SizesArray,
9731             /*Idx0=*/0,
9732             /*Idx1=*/I);
9733         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9734         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9735                                                           CGM.Int64Ty,
9736                                                           /*isSigned=*/true),
9737                                 SAddr);
9738       }
9739 
9740       // Fill up the mapper array.
9741       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9742       if (CombinedInfo.Mappers[I]) {
9743         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9744             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9745         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9746         Info.HasMapper = true;
9747       }
9748       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9749       CGF.Builder.CreateStore(MFunc, MAddr);
9750     }
9751   }
9752 
9753   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9754       Info.NumberOfPtrs == 0)
9755     return;
9756 
9757   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9758 }
9759 
9760 namespace {
9761 /// Additional arguments for emitOffloadingArraysArgument function.
9762 struct ArgumentsOptions {
9763   bool ForEndCall = false;
9764   ArgumentsOptions() = default;
9765   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9766 };
9767 } // namespace
9768 
9769 /// Emit the arguments to be passed to the runtime library based on the
9770 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9771 /// ForEndCall, emit map types to be passed for the end of the region instead of
9772 /// the beginning.
9773 static void emitOffloadingArraysArgument(
9774     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9775     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9776     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9777     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9778     const ArgumentsOptions &Options = ArgumentsOptions()) {
9779   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9780          "expected region end call to runtime only when end call is separate");
9781   CodeGenModule &CGM = CGF.CGM;
9782   if (Info.NumberOfPtrs) {
9783     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9784         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9785         Info.BasePointersArray,
9786         /*Idx0=*/0, /*Idx1=*/0);
9787     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9788         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9789         Info.PointersArray,
9790         /*Idx0=*/0,
9791         /*Idx1=*/0);
9792     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9793         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9794         /*Idx0=*/0, /*Idx1=*/0);
9795     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9796         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9797         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9798                                                     : Info.MapTypesArray,
9799         /*Idx0=*/0,
9800         /*Idx1=*/0);
9801 
9802     // Only emit the mapper information arrays if debug information is
9803     // requested.
9804     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9805       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9806     else
9807       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9808           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9809           Info.MapNamesArray,
9810           /*Idx0=*/0,
9811           /*Idx1=*/0);
9812     // If there is no user-defined mapper, set the mapper array to nullptr to
9813     // avoid an unnecessary data privatization
9814     if (!Info.HasMapper)
9815       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9816     else
9817       MappersArrayArg =
9818           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9819   } else {
9820     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9821     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9822     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9823     MapTypesArrayArg =
9824         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9825     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9826     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9827   }
9828 }
9829 
9830 /// Check for inner distribute directive.
9831 static const OMPExecutableDirective *
9832 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9833   const auto *CS = D.getInnermostCapturedStmt();
9834   const auto *Body =
9835       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9836   const Stmt *ChildStmt =
9837       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9838 
9839   if (const auto *NestedDir =
9840           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9841     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9842     switch (D.getDirectiveKind()) {
9843     case OMPD_target:
9844       if (isOpenMPDistributeDirective(DKind))
9845         return NestedDir;
9846       if (DKind == OMPD_teams) {
9847         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9848             /*IgnoreCaptured=*/true);
9849         if (!Body)
9850           return nullptr;
9851         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9852         if (const auto *NND =
9853                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9854           DKind = NND->getDirectiveKind();
9855           if (isOpenMPDistributeDirective(DKind))
9856             return NND;
9857         }
9858       }
9859       return nullptr;
9860     case OMPD_target_teams:
9861       if (isOpenMPDistributeDirective(DKind))
9862         return NestedDir;
9863       return nullptr;
9864     case OMPD_target_parallel:
9865     case OMPD_target_simd:
9866     case OMPD_target_parallel_for:
9867     case OMPD_target_parallel_for_simd:
9868       return nullptr;
9869     case OMPD_target_teams_distribute:
9870     case OMPD_target_teams_distribute_simd:
9871     case OMPD_target_teams_distribute_parallel_for:
9872     case OMPD_target_teams_distribute_parallel_for_simd:
9873     case OMPD_parallel:
9874     case OMPD_for:
9875     case OMPD_parallel_for:
9876     case OMPD_parallel_master:
9877     case OMPD_parallel_sections:
9878     case OMPD_for_simd:
9879     case OMPD_parallel_for_simd:
9880     case OMPD_cancel:
9881     case OMPD_cancellation_point:
9882     case OMPD_ordered:
9883     case OMPD_threadprivate:
9884     case OMPD_allocate:
9885     case OMPD_task:
9886     case OMPD_simd:
9887     case OMPD_tile:
9888     case OMPD_unroll:
9889     case OMPD_sections:
9890     case OMPD_section:
9891     case OMPD_single:
9892     case OMPD_master:
9893     case OMPD_critical:
9894     case OMPD_taskyield:
9895     case OMPD_barrier:
9896     case OMPD_taskwait:
9897     case OMPD_taskgroup:
9898     case OMPD_atomic:
9899     case OMPD_flush:
9900     case OMPD_depobj:
9901     case OMPD_scan:
9902     case OMPD_teams:
9903     case OMPD_target_data:
9904     case OMPD_target_exit_data:
9905     case OMPD_target_enter_data:
9906     case OMPD_distribute:
9907     case OMPD_distribute_simd:
9908     case OMPD_distribute_parallel_for:
9909     case OMPD_distribute_parallel_for_simd:
9910     case OMPD_teams_distribute:
9911     case OMPD_teams_distribute_simd:
9912     case OMPD_teams_distribute_parallel_for:
9913     case OMPD_teams_distribute_parallel_for_simd:
9914     case OMPD_target_update:
9915     case OMPD_declare_simd:
9916     case OMPD_declare_variant:
9917     case OMPD_begin_declare_variant:
9918     case OMPD_end_declare_variant:
9919     case OMPD_declare_target:
9920     case OMPD_end_declare_target:
9921     case OMPD_declare_reduction:
9922     case OMPD_declare_mapper:
9923     case OMPD_taskloop:
9924     case OMPD_taskloop_simd:
9925     case OMPD_master_taskloop:
9926     case OMPD_master_taskloop_simd:
9927     case OMPD_parallel_master_taskloop:
9928     case OMPD_parallel_master_taskloop_simd:
9929     case OMPD_requires:
9930     case OMPD_metadirective:
9931     case OMPD_unknown:
9932     default:
9933       llvm_unreachable("Unexpected directive.");
9934     }
9935   }
9936 
9937   return nullptr;
9938 }
9939 
9940 /// Emit the user-defined mapper function. The code generation follows the
9941 /// pattern in the example below.
9942 /// \code
9943 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9944 ///                                           void *base, void *begin,
9945 ///                                           int64_t size, int64_t type,
9946 ///                                           void *name = nullptr) {
9947 ///   // Allocate space for an array section first or add a base/begin for
9948 ///   // pointer dereference.
9949 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9950 ///       !maptype.IsDelete)
9951 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9952 ///                                 size*sizeof(Ty), clearToFromMember(type));
9953 ///   // Map members.
9954 ///   for (unsigned i = 0; i < size; i++) {
9955 ///     // For each component specified by this mapper:
9956 ///     for (auto c : begin[i]->all_components) {
9957 ///       if (c.hasMapper())
9958 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9959 ///                       c.arg_type, c.arg_name);
9960 ///       else
9961 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9962 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9963 ///                                     c.arg_name);
9964 ///     }
9965 ///   }
9966 ///   // Delete the array section.
9967 ///   if (size > 1 && maptype.IsDelete)
9968 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9969 ///                                 size*sizeof(Ty), clearToFromMember(type));
9970 /// }
9971 /// \endcode
9972 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9973                                             CodeGenFunction *CGF) {
9974   if (UDMMap.count(D) > 0)
9975     return;
9976   ASTContext &C = CGM.getContext();
9977   QualType Ty = D->getType();
9978   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9979   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9980   auto *MapperVarDecl =
9981       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9982   SourceLocation Loc = D->getLocation();
9983   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9984 
9985   // Prepare mapper function arguments and attributes.
9986   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9987                               C.VoidPtrTy, ImplicitParamDecl::Other);
9988   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9989                             ImplicitParamDecl::Other);
9990   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9991                              C.VoidPtrTy, ImplicitParamDecl::Other);
9992   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9993                             ImplicitParamDecl::Other);
9994   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9995                             ImplicitParamDecl::Other);
9996   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9997                             ImplicitParamDecl::Other);
9998   FunctionArgList Args;
9999   Args.push_back(&HandleArg);
10000   Args.push_back(&BaseArg);
10001   Args.push_back(&BeginArg);
10002   Args.push_back(&SizeArg);
10003   Args.push_back(&TypeArg);
10004   Args.push_back(&NameArg);
10005   const CGFunctionInfo &FnInfo =
10006       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
10007   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
10008   SmallString<64> TyStr;
10009   llvm::raw_svector_ostream Out(TyStr);
10010   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
10011   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10012   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
10013                                     Name, &CGM.getModule());
10014   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
10015   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
10016   // Start the mapper function code generation.
10017   CodeGenFunction MapperCGF(CGM);
10018   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10019   // Compute the starting and end addresses of array elements.
10020   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10021       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10022       C.getPointerType(Int64Ty), Loc);
10023   // Prepare common arguments for array initiation and deletion.
10024   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10025       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10026       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10027   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10028       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10029       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10030   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10031       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10032       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10033   // Convert the size in bytes into the number of array elements.
10034   Size = MapperCGF.Builder.CreateExactUDiv(
10035       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10036   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10037       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10038   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10039       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10040   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10041       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10042       C.getPointerType(Int64Ty), Loc);
10043   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10044       MapperCGF.GetAddrOfLocalVar(&NameArg),
10045       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10046 
10047   // Emit array initiation if this is an array section and \p MapType indicates
10048   // that memory allocation is required.
10049   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10050   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10051                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10052 
10053   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10054 
10055   // Emit the loop header block.
10056   MapperCGF.EmitBlock(HeadBB);
10057   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10058   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10059   // Evaluate whether the initial condition is satisfied.
10060   llvm::Value *IsEmpty =
10061       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10062   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10063   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10064 
10065   // Emit the loop body block.
10066   MapperCGF.EmitBlock(BodyBB);
10067   llvm::BasicBlock *LastBB = BodyBB;
10068   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10069       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10070   PtrPHI->addIncoming(PtrBegin, EntryBB);
10071   Address PtrCurrent =
10072       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10073                           .getAlignment()
10074                           .alignmentOfArrayElement(ElementSize));
10075   // Privatize the declared variable of mapper to be the current array element.
10076   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10077   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10078   (void)Scope.Privatize();
10079 
10080   // Get map clause information. Fill up the arrays with all mapped variables.
10081   MappableExprsHandler::MapCombinedInfoTy Info;
10082   MappableExprsHandler MEHandler(*D, MapperCGF);
10083   MEHandler.generateAllInfoForMapper(Info);
10084 
10085   // Call the runtime API __tgt_mapper_num_components to get the number of
10086   // pre-existing components.
10087   llvm::Value *OffloadingArgs[] = {Handle};
10088   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10089       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10090                                             OMPRTL___tgt_mapper_num_components),
10091       OffloadingArgs);
10092   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10093       PreviousSize,
10094       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10095 
10096   // Fill up the runtime mapper handle for all components.
10097   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10098     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10099         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10100     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10101         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10102     llvm::Value *CurSizeArg = Info.Sizes[I];
10103     llvm::Value *CurNameArg =
10104         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10105             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10106             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10107 
10108     // Extract the MEMBER_OF field from the map type.
10109     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10110     llvm::Value *MemberMapType =
10111         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10112 
10113     // Combine the map type inherited from user-defined mapper with that
10114     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10115     // bits of the \a MapType, which is the input argument of the mapper
10116     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10117     // bits of MemberMapType.
10118     // [OpenMP 5.0], 1.2.6. map-type decay.
10119     //        | alloc |  to   | from  | tofrom | release | delete
10120     // ----------------------------------------------------------
10121     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10122     // to     | alloc |  to   | alloc |   to   | release | delete
10123     // from   | alloc | alloc | from  |  from  | release | delete
10124     // tofrom | alloc |  to   | from  | tofrom | release | delete
10125     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10126         MapType,
10127         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10128                                    MappableExprsHandler::OMP_MAP_FROM));
10129     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10130     llvm::BasicBlock *AllocElseBB =
10131         MapperCGF.createBasicBlock("omp.type.alloc.else");
10132     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10133     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10134     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10135     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10136     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10137     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10138     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10139     MapperCGF.EmitBlock(AllocBB);
10140     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10141         MemberMapType,
10142         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10143                                      MappableExprsHandler::OMP_MAP_FROM)));
10144     MapperCGF.Builder.CreateBr(EndBB);
10145     MapperCGF.EmitBlock(AllocElseBB);
10146     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10147         LeftToFrom,
10148         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10149     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10150     // In case of to, clear OMP_MAP_FROM.
10151     MapperCGF.EmitBlock(ToBB);
10152     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10153         MemberMapType,
10154         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10155     MapperCGF.Builder.CreateBr(EndBB);
10156     MapperCGF.EmitBlock(ToElseBB);
10157     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10158         LeftToFrom,
10159         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10160     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10161     // In case of from, clear OMP_MAP_TO.
10162     MapperCGF.EmitBlock(FromBB);
10163     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10164         MemberMapType,
10165         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10166     // In case of tofrom, do nothing.
10167     MapperCGF.EmitBlock(EndBB);
10168     LastBB = EndBB;
10169     llvm::PHINode *CurMapType =
10170         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10171     CurMapType->addIncoming(AllocMapType, AllocBB);
10172     CurMapType->addIncoming(ToMapType, ToBB);
10173     CurMapType->addIncoming(FromMapType, FromBB);
10174     CurMapType->addIncoming(MemberMapType, ToElseBB);
10175 
10176     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10177                                      CurSizeArg, CurMapType, CurNameArg};
10178     if (Info.Mappers[I]) {
10179       // Call the corresponding mapper function.
10180       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10181           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10182       assert(MapperFunc && "Expect a valid mapper function is available.");
10183       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10184     } else {
10185       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10186       // data structure.
10187       MapperCGF.EmitRuntimeCall(
10188           OMPBuilder.getOrCreateRuntimeFunction(
10189               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10190           OffloadingArgs);
10191     }
10192   }
10193 
10194   // Update the pointer to point to the next element that needs to be mapped,
10195   // and check whether we have mapped all elements.
10196   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10197   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10198       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10199   PtrPHI->addIncoming(PtrNext, LastBB);
10200   llvm::Value *IsDone =
10201       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10202   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10203   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10204 
10205   MapperCGF.EmitBlock(ExitBB);
10206   // Emit array deletion if this is an array section and \p MapType indicates
10207   // that deletion is required.
10208   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10209                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10210 
10211   // Emit the function exit block.
10212   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10213   MapperCGF.FinishFunction();
10214   UDMMap.try_emplace(D, Fn);
10215   if (CGF) {
10216     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10217     Decls.second.push_back(D);
10218   }
10219 }
10220 
10221 /// Emit the array initialization or deletion portion for user-defined mapper
10222 /// code generation. First, it evaluates whether an array section is mapped and
10223 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10224 /// true, and \a MapType indicates to not delete this array, array
10225 /// initialization code is generated. If \a IsInit is false, and \a MapType
10226 /// indicates to not this array, array deletion code is generated.
10227 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10228     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10229     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10230     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10231     bool IsInit) {
10232   StringRef Prefix = IsInit ? ".init" : ".del";
10233 
10234   // Evaluate if this is an array section.
10235   llvm::BasicBlock *BodyBB =
10236       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10237   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10238       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10239   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10240       MapType,
10241       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10242   llvm::Value *DeleteCond;
10243   llvm::Value *Cond;
10244   if (IsInit) {
10245     // base != begin?
10246     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10247         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10248     // IsPtrAndObj?
10249     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10250         MapType,
10251         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10252     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10253     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10254     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10255     DeleteCond = MapperCGF.Builder.CreateIsNull(
10256         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10257   } else {
10258     Cond = IsArray;
10259     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10260         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10261   }
10262   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10263   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10264 
10265   MapperCGF.EmitBlock(BodyBB);
10266   // Get the array size by multiplying element size and element number (i.e., \p
10267   // Size).
10268   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10269       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10270   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10271   // memory allocation/deletion purpose only.
10272   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10273       MapType,
10274       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10275                                    MappableExprsHandler::OMP_MAP_FROM)));
10276   MapTypeArg = MapperCGF.Builder.CreateOr(
10277       MapTypeArg,
10278       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10279 
10280   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10281   // data structure.
10282   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10283                                    ArraySize, MapTypeArg, MapName};
10284   MapperCGF.EmitRuntimeCall(
10285       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10286                                             OMPRTL___tgt_push_mapper_component),
10287       OffloadingArgs);
10288 }
10289 
10290 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10291     const OMPDeclareMapperDecl *D) {
10292   auto I = UDMMap.find(D);
10293   if (I != UDMMap.end())
10294     return I->second;
10295   emitUserDefinedMapper(D);
10296   return UDMMap.lookup(D);
10297 }
10298 
10299 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10300     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10301     llvm::Value *DeviceID,
10302     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10303                                      const OMPLoopDirective &D)>
10304         SizeEmitter) {
10305   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10306   const OMPExecutableDirective *TD = &D;
10307   // Get nested teams distribute kind directive, if any.
10308   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10309     TD = getNestedDistributeDirective(CGM.getContext(), D);
10310   if (!TD)
10311     return;
10312   const auto *LD = cast<OMPLoopDirective>(TD);
10313   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10314                                                          PrePostActionTy &) {
10315     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10316       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10317       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10318       CGF.EmitRuntimeCall(
10319           OMPBuilder.getOrCreateRuntimeFunction(
10320               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10321           Args);
10322     }
10323   };
10324   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10325 }
10326 
10327 void CGOpenMPRuntime::emitTargetCall(
10328     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10329     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10330     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10331     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10332                                      const OMPLoopDirective &D)>
10333         SizeEmitter) {
10334   if (!CGF.HaveInsertPoint())
10335     return;
10336 
10337   assert(OutlinedFn && "Invalid outlined function!");
10338 
10339   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10340                                  D.hasClausesOfKind<OMPNowaitClause>();
10341   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10342   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10343   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10344                                             PrePostActionTy &) {
10345     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10346   };
10347   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10348 
10349   CodeGenFunction::OMPTargetDataInfo InputInfo;
10350   llvm::Value *MapTypesArray = nullptr;
10351   llvm::Value *MapNamesArray = nullptr;
10352   // Fill up the pointer arrays and transfer execution to the device.
10353   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10354                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10355                     &CapturedVars,
10356                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10357     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10358       // Reverse offloading is not supported, so just execute on the host.
10359       if (RequiresOuterTask) {
10360         CapturedVars.clear();
10361         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10362       }
10363       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10364       return;
10365     }
10366 
10367     // On top of the arrays that were filled up, the target offloading call
10368     // takes as arguments the device id as well as the host pointer. The host
10369     // pointer is used by the runtime library to identify the current target
10370     // region, so it only has to be unique and not necessarily point to
10371     // anything. It could be the pointer to the outlined function that
10372     // implements the target region, but we aren't using that so that the
10373     // compiler doesn't need to keep that, and could therefore inline the host
10374     // function if proven worthwhile during optimization.
10375 
10376     // From this point on, we need to have an ID of the target region defined.
10377     assert(OutlinedFnID && "Invalid outlined function ID!");
10378 
10379     // Emit device ID if any.
10380     llvm::Value *DeviceID;
10381     if (Device.getPointer()) {
10382       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10383               Device.getInt() == OMPC_DEVICE_device_num) &&
10384              "Expected device_num modifier.");
10385       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10386       DeviceID =
10387           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10388     } else {
10389       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10390     }
10391 
10392     // Emit the number of elements in the offloading arrays.
10393     llvm::Value *PointerNum =
10394         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10395 
10396     // Return value of the runtime offloading call.
10397     llvm::Value *Return;
10398 
10399     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10400     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10401 
10402     // Source location for the ident struct
10403     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10404 
10405     // Emit tripcount for the target loop-based directive.
10406     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10407 
10408     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10409     // The target region is an outlined function launched by the runtime
10410     // via calls __tgt_target() or __tgt_target_teams().
10411     //
10412     // __tgt_target() launches a target region with one team and one thread,
10413     // executing a serial region.  This master thread may in turn launch
10414     // more threads within its team upon encountering a parallel region,
10415     // however, no additional teams can be launched on the device.
10416     //
10417     // __tgt_target_teams() launches a target region with one or more teams,
10418     // each with one or more threads.  This call is required for target
10419     // constructs such as:
10420     //  'target teams'
10421     //  'target' / 'teams'
10422     //  'target teams distribute parallel for'
10423     //  'target parallel'
10424     // and so on.
10425     //
10426     // Note that on the host and CPU targets, the runtime implementation of
10427     // these calls simply call the outlined function without forking threads.
10428     // The outlined functions themselves have runtime calls to
10429     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10430     // the compiler in emitTeamsCall() and emitParallelCall().
10431     //
10432     // In contrast, on the NVPTX target, the implementation of
10433     // __tgt_target_teams() launches a GPU kernel with the requested number
10434     // of teams and threads so no additional calls to the runtime are required.
10435     if (NumTeams) {
10436       // If we have NumTeams defined this means that we have an enclosed teams
10437       // region. Therefore we also expect to have NumThreads defined. These two
10438       // values should be defined in the presence of a teams directive,
10439       // regardless of having any clauses associated. If the user is using teams
10440       // but no clauses, these two values will be the default that should be
10441       // passed to the runtime library - a 32-bit integer with the value zero.
10442       assert(NumThreads && "Thread limit expression should be available along "
10443                            "with number of teams.");
10444       SmallVector<llvm::Value *> OffloadingArgs = {
10445           RTLoc,
10446           DeviceID,
10447           OutlinedFnID,
10448           PointerNum,
10449           InputInfo.BasePointersArray.getPointer(),
10450           InputInfo.PointersArray.getPointer(),
10451           InputInfo.SizesArray.getPointer(),
10452           MapTypesArray,
10453           MapNamesArray,
10454           InputInfo.MappersArray.getPointer(),
10455           NumTeams,
10456           NumThreads};
10457       if (HasNowait) {
10458         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10459         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10460         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10461         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10462         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10463         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10464       }
10465       Return = CGF.EmitRuntimeCall(
10466           OMPBuilder.getOrCreateRuntimeFunction(
10467               CGM.getModule(), HasNowait
10468                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10469                                    : OMPRTL___tgt_target_teams_mapper),
10470           OffloadingArgs);
10471     } else {
10472       SmallVector<llvm::Value *> OffloadingArgs = {
10473           RTLoc,
10474           DeviceID,
10475           OutlinedFnID,
10476           PointerNum,
10477           InputInfo.BasePointersArray.getPointer(),
10478           InputInfo.PointersArray.getPointer(),
10479           InputInfo.SizesArray.getPointer(),
10480           MapTypesArray,
10481           MapNamesArray,
10482           InputInfo.MappersArray.getPointer()};
10483       if (HasNowait) {
10484         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10485         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10486         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10487         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10488         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10489         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10490       }
10491       Return = CGF.EmitRuntimeCall(
10492           OMPBuilder.getOrCreateRuntimeFunction(
10493               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10494                                          : OMPRTL___tgt_target_mapper),
10495           OffloadingArgs);
10496     }
10497 
10498     // Check the error code and execute the host version if required.
10499     llvm::BasicBlock *OffloadFailedBlock =
10500         CGF.createBasicBlock("omp_offload.failed");
10501     llvm::BasicBlock *OffloadContBlock =
10502         CGF.createBasicBlock("omp_offload.cont");
10503     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10504     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10505 
10506     CGF.EmitBlock(OffloadFailedBlock);
10507     if (RequiresOuterTask) {
10508       CapturedVars.clear();
10509       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10510     }
10511     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10512     CGF.EmitBranch(OffloadContBlock);
10513 
10514     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10515   };
10516 
10517   // Notify that the host version must be executed.
10518   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10519                     RequiresOuterTask](CodeGenFunction &CGF,
10520                                        PrePostActionTy &) {
10521     if (RequiresOuterTask) {
10522       CapturedVars.clear();
10523       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10524     }
10525     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10526   };
10527 
10528   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10529                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10530                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10531     // Fill up the arrays with all the captured variables.
10532     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10533 
10534     // Get mappable expression information.
10535     MappableExprsHandler MEHandler(D, CGF);
10536     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10537     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10538 
10539     auto RI = CS.getCapturedRecordDecl()->field_begin();
10540     auto *CV = CapturedVars.begin();
10541     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10542                                               CE = CS.capture_end();
10543          CI != CE; ++CI, ++RI, ++CV) {
10544       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10545       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10546 
10547       // VLA sizes are passed to the outlined region by copy and do not have map
10548       // information associated.
10549       if (CI->capturesVariableArrayType()) {
10550         CurInfo.Exprs.push_back(nullptr);
10551         CurInfo.BasePointers.push_back(*CV);
10552         CurInfo.Pointers.push_back(*CV);
10553         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10554             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10555         // Copy to the device as an argument. No need to retrieve it.
10556         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10557                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10558                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10559         CurInfo.Mappers.push_back(nullptr);
10560       } else {
10561         // If we have any information in the map clause, we use it, otherwise we
10562         // just do a default mapping.
10563         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10564         if (!CI->capturesThis())
10565           MappedVarSet.insert(CI->getCapturedVar());
10566         else
10567           MappedVarSet.insert(nullptr);
10568         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10569           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10570         // Generate correct mapping for variables captured by reference in
10571         // lambdas.
10572         if (CI->capturesVariable())
10573           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10574                                                   CurInfo, LambdaPointers);
10575       }
10576       // We expect to have at least an element of information for this capture.
10577       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10578              "Non-existing map pointer for capture!");
10579       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10580              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10581              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10582              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10583              "Inconsistent map information sizes!");
10584 
10585       // If there is an entry in PartialStruct it means we have a struct with
10586       // individual members mapped. Emit an extra combined entry.
10587       if (PartialStruct.Base.isValid()) {
10588         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10589         MEHandler.emitCombinedEntry(
10590             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10591             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10592       }
10593 
10594       // We need to append the results of this capture to what we already have.
10595       CombinedInfo.append(CurInfo);
10596     }
10597     // Adjust MEMBER_OF flags for the lambdas captures.
10598     MEHandler.adjustMemberOfForLambdaCaptures(
10599         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10600         CombinedInfo.Types);
10601     // Map any list items in a map clause that were not captures because they
10602     // weren't referenced within the construct.
10603     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10604 
10605     TargetDataInfo Info;
10606     // Fill up the arrays and create the arguments.
10607     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10608     emitOffloadingArraysArgument(
10609         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10610         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10611         {/*ForEndTask=*/false});
10612 
10613     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10614     InputInfo.BasePointersArray =
10615         Address(Info.BasePointersArray, CGM.getPointerAlign());
10616     InputInfo.PointersArray =
10617         Address(Info.PointersArray, CGM.getPointerAlign());
10618     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10619     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10620     MapTypesArray = Info.MapTypesArray;
10621     MapNamesArray = Info.MapNamesArray;
10622     if (RequiresOuterTask)
10623       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10624     else
10625       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10626   };
10627 
10628   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10629                              CodeGenFunction &CGF, PrePostActionTy &) {
10630     if (RequiresOuterTask) {
10631       CodeGenFunction::OMPTargetDataInfo InputInfo;
10632       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10633     } else {
10634       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10635     }
10636   };
10637 
10638   // If we have a target function ID it means that we need to support
10639   // offloading, otherwise, just execute on the host. We need to execute on host
10640   // regardless of the conditional in the if clause if, e.g., the user do not
10641   // specify target triples.
10642   if (OutlinedFnID) {
10643     if (IfCond) {
10644       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10645     } else {
10646       RegionCodeGenTy ThenRCG(TargetThenGen);
10647       ThenRCG(CGF);
10648     }
10649   } else {
10650     RegionCodeGenTy ElseRCG(TargetElseGen);
10651     ElseRCG(CGF);
10652   }
10653 }
10654 
10655 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10656                                                     StringRef ParentName) {
10657   if (!S)
10658     return;
10659 
10660   // Codegen OMP target directives that offload compute to the device.
10661   bool RequiresDeviceCodegen =
10662       isa<OMPExecutableDirective>(S) &&
10663       isOpenMPTargetExecutionDirective(
10664           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10665 
10666   if (RequiresDeviceCodegen) {
10667     const auto &E = *cast<OMPExecutableDirective>(S);
10668     unsigned DeviceID;
10669     unsigned FileID;
10670     unsigned Line;
10671     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10672                              FileID, Line);
10673 
10674     // Is this a target region that should not be emitted as an entry point? If
10675     // so just signal we are done with this target region.
10676     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10677                                                             ParentName, Line))
10678       return;
10679 
10680     switch (E.getDirectiveKind()) {
10681     case OMPD_target:
10682       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10683                                                    cast<OMPTargetDirective>(E));
10684       break;
10685     case OMPD_target_parallel:
10686       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10687           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10688       break;
10689     case OMPD_target_teams:
10690       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10691           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10692       break;
10693     case OMPD_target_teams_distribute:
10694       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10695           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10696       break;
10697     case OMPD_target_teams_distribute_simd:
10698       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10699           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10700       break;
10701     case OMPD_target_parallel_for:
10702       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10703           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10704       break;
10705     case OMPD_target_parallel_for_simd:
10706       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10707           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10708       break;
10709     case OMPD_target_simd:
10710       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10711           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10712       break;
10713     case OMPD_target_teams_distribute_parallel_for:
10714       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10715           CGM, ParentName,
10716           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10717       break;
10718     case OMPD_target_teams_distribute_parallel_for_simd:
10719       CodeGenFunction::
10720           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10721               CGM, ParentName,
10722               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10723       break;
10724     case OMPD_parallel:
10725     case OMPD_for:
10726     case OMPD_parallel_for:
10727     case OMPD_parallel_master:
10728     case OMPD_parallel_sections:
10729     case OMPD_for_simd:
10730     case OMPD_parallel_for_simd:
10731     case OMPD_cancel:
10732     case OMPD_cancellation_point:
10733     case OMPD_ordered:
10734     case OMPD_threadprivate:
10735     case OMPD_allocate:
10736     case OMPD_task:
10737     case OMPD_simd:
10738     case OMPD_tile:
10739     case OMPD_unroll:
10740     case OMPD_sections:
10741     case OMPD_section:
10742     case OMPD_single:
10743     case OMPD_master:
10744     case OMPD_critical:
10745     case OMPD_taskyield:
10746     case OMPD_barrier:
10747     case OMPD_taskwait:
10748     case OMPD_taskgroup:
10749     case OMPD_atomic:
10750     case OMPD_flush:
10751     case OMPD_depobj:
10752     case OMPD_scan:
10753     case OMPD_teams:
10754     case OMPD_target_data:
10755     case OMPD_target_exit_data:
10756     case OMPD_target_enter_data:
10757     case OMPD_distribute:
10758     case OMPD_distribute_simd:
10759     case OMPD_distribute_parallel_for:
10760     case OMPD_distribute_parallel_for_simd:
10761     case OMPD_teams_distribute:
10762     case OMPD_teams_distribute_simd:
10763     case OMPD_teams_distribute_parallel_for:
10764     case OMPD_teams_distribute_parallel_for_simd:
10765     case OMPD_target_update:
10766     case OMPD_declare_simd:
10767     case OMPD_declare_variant:
10768     case OMPD_begin_declare_variant:
10769     case OMPD_end_declare_variant:
10770     case OMPD_declare_target:
10771     case OMPD_end_declare_target:
10772     case OMPD_declare_reduction:
10773     case OMPD_declare_mapper:
10774     case OMPD_taskloop:
10775     case OMPD_taskloop_simd:
10776     case OMPD_master_taskloop:
10777     case OMPD_master_taskloop_simd:
10778     case OMPD_parallel_master_taskloop:
10779     case OMPD_parallel_master_taskloop_simd:
10780     case OMPD_requires:
10781     case OMPD_metadirective:
10782     case OMPD_unknown:
10783     default:
10784       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10785     }
10786     return;
10787   }
10788 
10789   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10790     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10791       return;
10792 
10793     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10794     return;
10795   }
10796 
10797   // If this is a lambda function, look into its body.
10798   if (const auto *L = dyn_cast<LambdaExpr>(S))
10799     S = L->getBody();
10800 
10801   // Keep looking for target regions recursively.
10802   for (const Stmt *II : S->children())
10803     scanForTargetRegionsFunctions(II, ParentName);
10804 }
10805 
10806 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10807   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10808       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10809   if (!DevTy)
10810     return false;
10811   // Do not emit device_type(nohost) functions for the host.
10812   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10813     return true;
10814   // Do not emit device_type(host) functions for the device.
10815   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10816     return true;
10817   return false;
10818 }
10819 
10820 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10821   // If emitting code for the host, we do not process FD here. Instead we do
10822   // the normal code generation.
10823   if (!CGM.getLangOpts().OpenMPIsDevice) {
10824     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10825       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10826                                   CGM.getLangOpts().OpenMPIsDevice))
10827         return true;
10828     return false;
10829   }
10830 
10831   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10832   // Try to detect target regions in the function.
10833   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10834     StringRef Name = CGM.getMangledName(GD);
10835     scanForTargetRegionsFunctions(FD->getBody(), Name);
10836     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10837                                 CGM.getLangOpts().OpenMPIsDevice))
10838       return true;
10839   }
10840 
10841   // Do not to emit function if it is not marked as declare target.
10842   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10843          AlreadyEmittedTargetDecls.count(VD) == 0;
10844 }
10845 
10846 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10847   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10848                               CGM.getLangOpts().OpenMPIsDevice))
10849     return true;
10850 
10851   if (!CGM.getLangOpts().OpenMPIsDevice)
10852     return false;
10853 
10854   // Check if there are Ctors/Dtors in this declaration and look for target
10855   // regions in it. We use the complete variant to produce the kernel name
10856   // mangling.
10857   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10858   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10859     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10860       StringRef ParentName =
10861           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10862       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10863     }
10864     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10865       StringRef ParentName =
10866           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10867       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10868     }
10869   }
10870 
10871   // Do not to emit variable if it is not marked as declare target.
10872   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10873       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10874           cast<VarDecl>(GD.getDecl()));
10875   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10876       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10877        HasRequiresUnifiedSharedMemory)) {
10878     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10879     return true;
10880   }
10881   return false;
10882 }
10883 
10884 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10885                                                    llvm::Constant *Addr) {
10886   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10887       !CGM.getLangOpts().OpenMPIsDevice)
10888     return;
10889 
10890   // If we have host/nohost variables, they do not need to be registered.
10891   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10892       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10893   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10894     return;
10895 
10896   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10897       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10898   if (!Res) {
10899     if (CGM.getLangOpts().OpenMPIsDevice) {
10900       // Register non-target variables being emitted in device code (debug info
10901       // may cause this).
10902       StringRef VarName = CGM.getMangledName(VD);
10903       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10904     }
10905     return;
10906   }
10907   // Register declare target variables.
10908   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10909   StringRef VarName;
10910   CharUnits VarSize;
10911   llvm::GlobalValue::LinkageTypes Linkage;
10912 
10913   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10914       !HasRequiresUnifiedSharedMemory) {
10915     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10916     VarName = CGM.getMangledName(VD);
10917     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10918       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10919       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10920     } else {
10921       VarSize = CharUnits::Zero();
10922     }
10923     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10924     // Temp solution to prevent optimizations of the internal variables.
10925     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10926       // Do not create a "ref-variable" if the original is not also available
10927       // on the host.
10928       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10929         return;
10930       std::string RefName = getName({VarName, "ref"});
10931       if (!CGM.GetGlobalValue(RefName)) {
10932         llvm::Constant *AddrRef =
10933             getOrCreateInternalVariable(Addr->getType(), RefName);
10934         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10935         GVAddrRef->setConstant(/*Val=*/true);
10936         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10937         GVAddrRef->setInitializer(Addr);
10938         CGM.addCompilerUsedGlobal(GVAddrRef);
10939       }
10940     }
10941   } else {
10942     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10943             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10944              HasRequiresUnifiedSharedMemory)) &&
10945            "Declare target attribute must link or to with unified memory.");
10946     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10947       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10948     else
10949       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10950 
10951     if (CGM.getLangOpts().OpenMPIsDevice) {
10952       VarName = Addr->getName();
10953       Addr = nullptr;
10954     } else {
10955       VarName = getAddrOfDeclareTargetVar(VD).getName();
10956       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10957     }
10958     VarSize = CGM.getPointerSize();
10959     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10960   }
10961 
10962   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10963       VarName, Addr, VarSize, Flags, Linkage);
10964 }
10965 
10966 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10967   if (isa<FunctionDecl>(GD.getDecl()) ||
10968       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10969     return emitTargetFunctions(GD);
10970 
10971   return emitTargetGlobalVariable(GD);
10972 }
10973 
10974 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10975   for (const VarDecl *VD : DeferredGlobalVariables) {
10976     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10977         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10978     if (!Res)
10979       continue;
10980     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10981         !HasRequiresUnifiedSharedMemory) {
10982       CGM.EmitGlobal(VD);
10983     } else {
10984       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10985               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10986                HasRequiresUnifiedSharedMemory)) &&
10987              "Expected link clause or to clause with unified memory.");
10988       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10989     }
10990   }
10991 }
10992 
10993 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10994     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10995   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10996          " Expected target-based directive.");
10997 }
10998 
10999 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11000   for (const OMPClause *Clause : D->clauselists()) {
11001     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11002       HasRequiresUnifiedSharedMemory = true;
11003     } else if (const auto *AC =
11004                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11005       switch (AC->getAtomicDefaultMemOrderKind()) {
11006       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11007         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11008         break;
11009       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11010         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11011         break;
11012       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11013         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11014         break;
11015       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11016         break;
11017       }
11018     }
11019   }
11020 }
11021 
11022 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11023   return RequiresAtomicOrdering;
11024 }
11025 
11026 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11027                                                        LangAS &AS) {
11028   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11029     return false;
11030   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11031   switch(A->getAllocatorType()) {
11032   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11033   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11034   // Not supported, fallback to the default mem space.
11035   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11036   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11037   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11038   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11039   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11040   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11041   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11042     AS = LangAS::Default;
11043     return true;
11044   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11045     llvm_unreachable("Expected predefined allocator for the variables with the "
11046                      "static storage.");
11047   }
11048   return false;
11049 }
11050 
11051 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11052   return HasRequiresUnifiedSharedMemory;
11053 }
11054 
11055 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11056     CodeGenModule &CGM)
11057     : CGM(CGM) {
11058   if (CGM.getLangOpts().OpenMPIsDevice) {
11059     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11060     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11061   }
11062 }
11063 
11064 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11065   if (CGM.getLangOpts().OpenMPIsDevice)
11066     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11067 }
11068 
11069 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11070   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11071     return true;
11072 
11073   const auto *D = cast<FunctionDecl>(GD.getDecl());
11074   // Do not to emit function if it is marked as declare target as it was already
11075   // emitted.
11076   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11077     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11078       if (auto *F = dyn_cast_or_null<llvm::Function>(
11079               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11080         return !F->isDeclaration();
11081       return false;
11082     }
11083     return true;
11084   }
11085 
11086   return !AlreadyEmittedTargetDecls.insert(D).second;
11087 }
11088 
11089 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11090   // If we don't have entries or if we are emitting code for the device, we
11091   // don't need to do anything.
11092   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11093       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11094       (OffloadEntriesInfoManager.empty() &&
11095        !HasEmittedDeclareTargetRegion &&
11096        !HasEmittedTargetRegion))
11097     return nullptr;
11098 
11099   // Create and register the function that handles the requires directives.
11100   ASTContext &C = CGM.getContext();
11101 
11102   llvm::Function *RequiresRegFn;
11103   {
11104     CodeGenFunction CGF(CGM);
11105     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11106     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11107     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11108     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11109     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11110     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11111     // TODO: check for other requires clauses.
11112     // The requires directive takes effect only when a target region is
11113     // present in the compilation unit. Otherwise it is ignored and not
11114     // passed to the runtime. This avoids the runtime from throwing an error
11115     // for mismatching requires clauses across compilation units that don't
11116     // contain at least 1 target region.
11117     assert((HasEmittedTargetRegion ||
11118             HasEmittedDeclareTargetRegion ||
11119             !OffloadEntriesInfoManager.empty()) &&
11120            "Target or declare target region expected.");
11121     if (HasRequiresUnifiedSharedMemory)
11122       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11123     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11124                             CGM.getModule(), OMPRTL___tgt_register_requires),
11125                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11126     CGF.FinishFunction();
11127   }
11128   return RequiresRegFn;
11129 }
11130 
11131 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11132                                     const OMPExecutableDirective &D,
11133                                     SourceLocation Loc,
11134                                     llvm::Function *OutlinedFn,
11135                                     ArrayRef<llvm::Value *> CapturedVars) {
11136   if (!CGF.HaveInsertPoint())
11137     return;
11138 
11139   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11140   CodeGenFunction::RunCleanupsScope Scope(CGF);
11141 
11142   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11143   llvm::Value *Args[] = {
11144       RTLoc,
11145       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11146       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11147   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11148   RealArgs.append(std::begin(Args), std::end(Args));
11149   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11150 
11151   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11152       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11153   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11154 }
11155 
11156 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11157                                          const Expr *NumTeams,
11158                                          const Expr *ThreadLimit,
11159                                          SourceLocation Loc) {
11160   if (!CGF.HaveInsertPoint())
11161     return;
11162 
11163   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11164 
11165   llvm::Value *NumTeamsVal =
11166       NumTeams
11167           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11168                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11169           : CGF.Builder.getInt32(0);
11170 
11171   llvm::Value *ThreadLimitVal =
11172       ThreadLimit
11173           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11174                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11175           : CGF.Builder.getInt32(0);
11176 
11177   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11178   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11179                                      ThreadLimitVal};
11180   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11181                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11182                       PushNumTeamsArgs);
11183 }
11184 
11185 void CGOpenMPRuntime::emitTargetDataCalls(
11186     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11187     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11188   if (!CGF.HaveInsertPoint())
11189     return;
11190 
11191   // Action used to replace the default codegen action and turn privatization
11192   // off.
11193   PrePostActionTy NoPrivAction;
11194 
11195   // Generate the code for the opening of the data environment. Capture all the
11196   // arguments of the runtime call by reference because they are used in the
11197   // closing of the region.
11198   auto &&BeginThenGen = [this, &D, Device, &Info,
11199                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11200     // Fill up the arrays with all the mapped variables.
11201     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11202 
11203     // Get map clause information.
11204     MappableExprsHandler MEHandler(D, CGF);
11205     MEHandler.generateAllInfo(CombinedInfo);
11206 
11207     // Fill up the arrays and create the arguments.
11208     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11209                          /*IsNonContiguous=*/true);
11210 
11211     llvm::Value *BasePointersArrayArg = nullptr;
11212     llvm::Value *PointersArrayArg = nullptr;
11213     llvm::Value *SizesArrayArg = nullptr;
11214     llvm::Value *MapTypesArrayArg = nullptr;
11215     llvm::Value *MapNamesArrayArg = nullptr;
11216     llvm::Value *MappersArrayArg = nullptr;
11217     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11218                                  SizesArrayArg, MapTypesArrayArg,
11219                                  MapNamesArrayArg, MappersArrayArg, Info);
11220 
11221     // Emit device ID if any.
11222     llvm::Value *DeviceID = nullptr;
11223     if (Device) {
11224       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11225                                            CGF.Int64Ty, /*isSigned=*/true);
11226     } else {
11227       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11228     }
11229 
11230     // Emit the number of elements in the offloading arrays.
11231     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11232     //
11233     // Source location for the ident struct
11234     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11235 
11236     llvm::Value *OffloadingArgs[] = {RTLoc,
11237                                      DeviceID,
11238                                      PointerNum,
11239                                      BasePointersArrayArg,
11240                                      PointersArrayArg,
11241                                      SizesArrayArg,
11242                                      MapTypesArrayArg,
11243                                      MapNamesArrayArg,
11244                                      MappersArrayArg};
11245     CGF.EmitRuntimeCall(
11246         OMPBuilder.getOrCreateRuntimeFunction(
11247             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11248         OffloadingArgs);
11249 
11250     // If device pointer privatization is required, emit the body of the region
11251     // here. It will have to be duplicated: with and without privatization.
11252     if (!Info.CaptureDeviceAddrMap.empty())
11253       CodeGen(CGF);
11254   };
11255 
11256   // Generate code for the closing of the data region.
11257   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11258                                                 PrePostActionTy &) {
11259     assert(Info.isValid() && "Invalid data environment closing arguments.");
11260 
11261     llvm::Value *BasePointersArrayArg = nullptr;
11262     llvm::Value *PointersArrayArg = nullptr;
11263     llvm::Value *SizesArrayArg = nullptr;
11264     llvm::Value *MapTypesArrayArg = nullptr;
11265     llvm::Value *MapNamesArrayArg = nullptr;
11266     llvm::Value *MappersArrayArg = nullptr;
11267     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11268                                  SizesArrayArg, MapTypesArrayArg,
11269                                  MapNamesArrayArg, MappersArrayArg, Info,
11270                                  {/*ForEndCall=*/true});
11271 
11272     // Emit device ID if any.
11273     llvm::Value *DeviceID = nullptr;
11274     if (Device) {
11275       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11276                                            CGF.Int64Ty, /*isSigned=*/true);
11277     } else {
11278       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11279     }
11280 
11281     // Emit the number of elements in the offloading arrays.
11282     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11283 
11284     // Source location for the ident struct
11285     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11286 
11287     llvm::Value *OffloadingArgs[] = {RTLoc,
11288                                      DeviceID,
11289                                      PointerNum,
11290                                      BasePointersArrayArg,
11291                                      PointersArrayArg,
11292                                      SizesArrayArg,
11293                                      MapTypesArrayArg,
11294                                      MapNamesArrayArg,
11295                                      MappersArrayArg};
11296     CGF.EmitRuntimeCall(
11297         OMPBuilder.getOrCreateRuntimeFunction(
11298             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11299         OffloadingArgs);
11300   };
11301 
11302   // If we need device pointer privatization, we need to emit the body of the
11303   // region with no privatization in the 'else' branch of the conditional.
11304   // Otherwise, we don't have to do anything.
11305   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11306                                                          PrePostActionTy &) {
11307     if (!Info.CaptureDeviceAddrMap.empty()) {
11308       CodeGen.setAction(NoPrivAction);
11309       CodeGen(CGF);
11310     }
11311   };
11312 
11313   // We don't have to do anything to close the region if the if clause evaluates
11314   // to false.
11315   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11316 
11317   if (IfCond) {
11318     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11319   } else {
11320     RegionCodeGenTy RCG(BeginThenGen);
11321     RCG(CGF);
11322   }
11323 
11324   // If we don't require privatization of device pointers, we emit the body in
11325   // between the runtime calls. This avoids duplicating the body code.
11326   if (Info.CaptureDeviceAddrMap.empty()) {
11327     CodeGen.setAction(NoPrivAction);
11328     CodeGen(CGF);
11329   }
11330 
11331   if (IfCond) {
11332     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11333   } else {
11334     RegionCodeGenTy RCG(EndThenGen);
11335     RCG(CGF);
11336   }
11337 }
11338 
11339 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11340     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11341     const Expr *Device) {
11342   if (!CGF.HaveInsertPoint())
11343     return;
11344 
11345   assert((isa<OMPTargetEnterDataDirective>(D) ||
11346           isa<OMPTargetExitDataDirective>(D) ||
11347           isa<OMPTargetUpdateDirective>(D)) &&
11348          "Expecting either target enter, exit data, or update directives.");
11349 
11350   CodeGenFunction::OMPTargetDataInfo InputInfo;
11351   llvm::Value *MapTypesArray = nullptr;
11352   llvm::Value *MapNamesArray = nullptr;
11353   // Generate the code for the opening of the data environment.
11354   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11355                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11356     // Emit device ID if any.
11357     llvm::Value *DeviceID = nullptr;
11358     if (Device) {
11359       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11360                                            CGF.Int64Ty, /*isSigned=*/true);
11361     } else {
11362       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11363     }
11364 
11365     // Emit the number of elements in the offloading arrays.
11366     llvm::Constant *PointerNum =
11367         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11368 
11369     // Source location for the ident struct
11370     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11371 
11372     llvm::Value *OffloadingArgs[] = {RTLoc,
11373                                      DeviceID,
11374                                      PointerNum,
11375                                      InputInfo.BasePointersArray.getPointer(),
11376                                      InputInfo.PointersArray.getPointer(),
11377                                      InputInfo.SizesArray.getPointer(),
11378                                      MapTypesArray,
11379                                      MapNamesArray,
11380                                      InputInfo.MappersArray.getPointer()};
11381 
11382     // Select the right runtime function call for each standalone
11383     // directive.
11384     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11385     RuntimeFunction RTLFn;
11386     switch (D.getDirectiveKind()) {
11387     case OMPD_target_enter_data:
11388       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11389                         : OMPRTL___tgt_target_data_begin_mapper;
11390       break;
11391     case OMPD_target_exit_data:
11392       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11393                         : OMPRTL___tgt_target_data_end_mapper;
11394       break;
11395     case OMPD_target_update:
11396       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11397                         : OMPRTL___tgt_target_data_update_mapper;
11398       break;
11399     case OMPD_parallel:
11400     case OMPD_for:
11401     case OMPD_parallel_for:
11402     case OMPD_parallel_master:
11403     case OMPD_parallel_sections:
11404     case OMPD_for_simd:
11405     case OMPD_parallel_for_simd:
11406     case OMPD_cancel:
11407     case OMPD_cancellation_point:
11408     case OMPD_ordered:
11409     case OMPD_threadprivate:
11410     case OMPD_allocate:
11411     case OMPD_task:
11412     case OMPD_simd:
11413     case OMPD_tile:
11414     case OMPD_unroll:
11415     case OMPD_sections:
11416     case OMPD_section:
11417     case OMPD_single:
11418     case OMPD_master:
11419     case OMPD_critical:
11420     case OMPD_taskyield:
11421     case OMPD_barrier:
11422     case OMPD_taskwait:
11423     case OMPD_taskgroup:
11424     case OMPD_atomic:
11425     case OMPD_flush:
11426     case OMPD_depobj:
11427     case OMPD_scan:
11428     case OMPD_teams:
11429     case OMPD_target_data:
11430     case OMPD_distribute:
11431     case OMPD_distribute_simd:
11432     case OMPD_distribute_parallel_for:
11433     case OMPD_distribute_parallel_for_simd:
11434     case OMPD_teams_distribute:
11435     case OMPD_teams_distribute_simd:
11436     case OMPD_teams_distribute_parallel_for:
11437     case OMPD_teams_distribute_parallel_for_simd:
11438     case OMPD_declare_simd:
11439     case OMPD_declare_variant:
11440     case OMPD_begin_declare_variant:
11441     case OMPD_end_declare_variant:
11442     case OMPD_declare_target:
11443     case OMPD_end_declare_target:
11444     case OMPD_declare_reduction:
11445     case OMPD_declare_mapper:
11446     case OMPD_taskloop:
11447     case OMPD_taskloop_simd:
11448     case OMPD_master_taskloop:
11449     case OMPD_master_taskloop_simd:
11450     case OMPD_parallel_master_taskloop:
11451     case OMPD_parallel_master_taskloop_simd:
11452     case OMPD_target:
11453     case OMPD_target_simd:
11454     case OMPD_target_teams_distribute:
11455     case OMPD_target_teams_distribute_simd:
11456     case OMPD_target_teams_distribute_parallel_for:
11457     case OMPD_target_teams_distribute_parallel_for_simd:
11458     case OMPD_target_teams:
11459     case OMPD_target_parallel:
11460     case OMPD_target_parallel_for:
11461     case OMPD_target_parallel_for_simd:
11462     case OMPD_requires:
11463     case OMPD_metadirective:
11464     case OMPD_unknown:
11465     default:
11466       llvm_unreachable("Unexpected standalone target data directive.");
11467       break;
11468     }
11469     CGF.EmitRuntimeCall(
11470         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11471         OffloadingArgs);
11472   };
11473 
11474   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11475                           &MapNamesArray](CodeGenFunction &CGF,
11476                                           PrePostActionTy &) {
11477     // Fill up the arrays with all the mapped variables.
11478     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11479 
11480     // Get map clause information.
11481     MappableExprsHandler MEHandler(D, CGF);
11482     MEHandler.generateAllInfo(CombinedInfo);
11483 
11484     TargetDataInfo Info;
11485     // Fill up the arrays and create the arguments.
11486     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11487                          /*IsNonContiguous=*/true);
11488     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11489                              D.hasClausesOfKind<OMPNowaitClause>();
11490     emitOffloadingArraysArgument(
11491         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11492         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11493         {/*ForEndTask=*/false});
11494     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11495     InputInfo.BasePointersArray =
11496         Address(Info.BasePointersArray, CGM.getPointerAlign());
11497     InputInfo.PointersArray =
11498         Address(Info.PointersArray, CGM.getPointerAlign());
11499     InputInfo.SizesArray =
11500         Address(Info.SizesArray, CGM.getPointerAlign());
11501     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11502     MapTypesArray = Info.MapTypesArray;
11503     MapNamesArray = Info.MapNamesArray;
11504     if (RequiresOuterTask)
11505       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11506     else
11507       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11508   };
11509 
11510   if (IfCond) {
11511     emitIfClause(CGF, IfCond, TargetThenGen,
11512                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11513   } else {
11514     RegionCodeGenTy ThenRCG(TargetThenGen);
11515     ThenRCG(CGF);
11516   }
11517 }
11518 
11519 namespace {
11520   /// Kind of parameter in a function with 'declare simd' directive.
11521   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11522   /// Attribute set of the parameter.
11523   struct ParamAttrTy {
11524     ParamKindTy Kind = Vector;
11525     llvm::APSInt StrideOrArg;
11526     llvm::APSInt Alignment;
11527   };
11528 } // namespace
11529 
11530 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11531                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11532   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11533   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11534   // of that clause. The VLEN value must be power of 2.
11535   // In other case the notion of the function`s "characteristic data type" (CDT)
11536   // is used to compute the vector length.
11537   // CDT is defined in the following order:
11538   //   a) For non-void function, the CDT is the return type.
11539   //   b) If the function has any non-uniform, non-linear parameters, then the
11540   //   CDT is the type of the first such parameter.
11541   //   c) If the CDT determined by a) or b) above is struct, union, or class
11542   //   type which is pass-by-value (except for the type that maps to the
11543   //   built-in complex data type), the characteristic data type is int.
11544   //   d) If none of the above three cases is applicable, the CDT is int.
11545   // The VLEN is then determined based on the CDT and the size of vector
11546   // register of that ISA for which current vector version is generated. The
11547   // VLEN is computed using the formula below:
11548   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11549   // where vector register size specified in section 3.2.1 Registers and the
11550   // Stack Frame of original AMD64 ABI document.
11551   QualType RetType = FD->getReturnType();
11552   if (RetType.isNull())
11553     return 0;
11554   ASTContext &C = FD->getASTContext();
11555   QualType CDT;
11556   if (!RetType.isNull() && !RetType->isVoidType()) {
11557     CDT = RetType;
11558   } else {
11559     unsigned Offset = 0;
11560     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11561       if (ParamAttrs[Offset].Kind == Vector)
11562         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11563       ++Offset;
11564     }
11565     if (CDT.isNull()) {
11566       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11567         if (ParamAttrs[I + Offset].Kind == Vector) {
11568           CDT = FD->getParamDecl(I)->getType();
11569           break;
11570         }
11571       }
11572     }
11573   }
11574   if (CDT.isNull())
11575     CDT = C.IntTy;
11576   CDT = CDT->getCanonicalTypeUnqualified();
11577   if (CDT->isRecordType() || CDT->isUnionType())
11578     CDT = C.IntTy;
11579   return C.getTypeSize(CDT);
11580 }
11581 
11582 static void
11583 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11584                            const llvm::APSInt &VLENVal,
11585                            ArrayRef<ParamAttrTy> ParamAttrs,
11586                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11587   struct ISADataTy {
11588     char ISA;
11589     unsigned VecRegSize;
11590   };
11591   ISADataTy ISAData[] = {
11592       {
11593           'b', 128
11594       }, // SSE
11595       {
11596           'c', 256
11597       }, // AVX
11598       {
11599           'd', 256
11600       }, // AVX2
11601       {
11602           'e', 512
11603       }, // AVX512
11604   };
11605   llvm::SmallVector<char, 2> Masked;
11606   switch (State) {
11607   case OMPDeclareSimdDeclAttr::BS_Undefined:
11608     Masked.push_back('N');
11609     Masked.push_back('M');
11610     break;
11611   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11612     Masked.push_back('N');
11613     break;
11614   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11615     Masked.push_back('M');
11616     break;
11617   }
11618   for (char Mask : Masked) {
11619     for (const ISADataTy &Data : ISAData) {
11620       SmallString<256> Buffer;
11621       llvm::raw_svector_ostream Out(Buffer);
11622       Out << "_ZGV" << Data.ISA << Mask;
11623       if (!VLENVal) {
11624         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11625         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11626         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11627       } else {
11628         Out << VLENVal;
11629       }
11630       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11631         switch (ParamAttr.Kind){
11632         case LinearWithVarStride:
11633           Out << 's' << ParamAttr.StrideOrArg;
11634           break;
11635         case Linear:
11636           Out << 'l';
11637           if (ParamAttr.StrideOrArg != 1)
11638             Out << ParamAttr.StrideOrArg;
11639           break;
11640         case Uniform:
11641           Out << 'u';
11642           break;
11643         case Vector:
11644           Out << 'v';
11645           break;
11646         }
11647         if (!!ParamAttr.Alignment)
11648           Out << 'a' << ParamAttr.Alignment;
11649       }
11650       Out << '_' << Fn->getName();
11651       Fn->addFnAttr(Out.str());
11652     }
11653   }
11654 }
11655 
11656 // This are the Functions that are needed to mangle the name of the
11657 // vector functions generated by the compiler, according to the rules
11658 // defined in the "Vector Function ABI specifications for AArch64",
11659 // available at
11660 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11661 
11662 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11663 ///
11664 /// TODO: Need to implement the behavior for reference marked with a
11665 /// var or no linear modifiers (1.b in the section). For this, we
11666 /// need to extend ParamKindTy to support the linear modifiers.
11667 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11668   QT = QT.getCanonicalType();
11669 
11670   if (QT->isVoidType())
11671     return false;
11672 
11673   if (Kind == ParamKindTy::Uniform)
11674     return false;
11675 
11676   if (Kind == ParamKindTy::Linear)
11677     return false;
11678 
11679   // TODO: Handle linear references with modifiers
11680 
11681   if (Kind == ParamKindTy::LinearWithVarStride)
11682     return false;
11683 
11684   return true;
11685 }
11686 
11687 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11688 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11689   QT = QT.getCanonicalType();
11690   unsigned Size = C.getTypeSize(QT);
11691 
11692   // Only scalars and complex within 16 bytes wide set PVB to true.
11693   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11694     return false;
11695 
11696   if (QT->isFloatingType())
11697     return true;
11698 
11699   if (QT->isIntegerType())
11700     return true;
11701 
11702   if (QT->isPointerType())
11703     return true;
11704 
11705   // TODO: Add support for complex types (section 3.1.2, item 2).
11706 
11707   return false;
11708 }
11709 
11710 /// Computes the lane size (LS) of a return type or of an input parameter,
11711 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11712 /// TODO: Add support for references, section 3.2.1, item 1.
11713 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11714   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11715     QualType PTy = QT.getCanonicalType()->getPointeeType();
11716     if (getAArch64PBV(PTy, C))
11717       return C.getTypeSize(PTy);
11718   }
11719   if (getAArch64PBV(QT, C))
11720     return C.getTypeSize(QT);
11721 
11722   return C.getTypeSize(C.getUIntPtrType());
11723 }
11724 
11725 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11726 // signature of the scalar function, as defined in 3.2.2 of the
11727 // AAVFABI.
11728 static std::tuple<unsigned, unsigned, bool>
11729 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11730   QualType RetType = FD->getReturnType().getCanonicalType();
11731 
11732   ASTContext &C = FD->getASTContext();
11733 
11734   bool OutputBecomesInput = false;
11735 
11736   llvm::SmallVector<unsigned, 8> Sizes;
11737   if (!RetType->isVoidType()) {
11738     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11739     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11740       OutputBecomesInput = true;
11741   }
11742   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11743     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11744     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11745   }
11746 
11747   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11748   // The LS of a function parameter / return value can only be a power
11749   // of 2, starting from 8 bits, up to 128.
11750   assert(llvm::all_of(Sizes,
11751                       [](unsigned Size) {
11752                         return Size == 8 || Size == 16 || Size == 32 ||
11753                                Size == 64 || Size == 128;
11754                       }) &&
11755          "Invalid size");
11756 
11757   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11758                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11759                          OutputBecomesInput);
11760 }
11761 
11762 /// Mangle the parameter part of the vector function name according to
11763 /// their OpenMP classification. The mangling function is defined in
11764 /// section 3.5 of the AAVFABI.
11765 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11766   SmallString<256> Buffer;
11767   llvm::raw_svector_ostream Out(Buffer);
11768   for (const auto &ParamAttr : ParamAttrs) {
11769     switch (ParamAttr.Kind) {
11770     case LinearWithVarStride:
11771       Out << "ls" << ParamAttr.StrideOrArg;
11772       break;
11773     case Linear:
11774       Out << 'l';
11775       // Don't print the step value if it is not present or if it is
11776       // equal to 1.
11777       if (ParamAttr.StrideOrArg != 1)
11778         Out << ParamAttr.StrideOrArg;
11779       break;
11780     case Uniform:
11781       Out << 'u';
11782       break;
11783     case Vector:
11784       Out << 'v';
11785       break;
11786     }
11787 
11788     if (!!ParamAttr.Alignment)
11789       Out << 'a' << ParamAttr.Alignment;
11790   }
11791 
11792   return std::string(Out.str());
11793 }
11794 
11795 // Function used to add the attribute. The parameter `VLEN` is
11796 // templated to allow the use of "x" when targeting scalable functions
11797 // for SVE.
11798 template <typename T>
11799 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11800                                  char ISA, StringRef ParSeq,
11801                                  StringRef MangledName, bool OutputBecomesInput,
11802                                  llvm::Function *Fn) {
11803   SmallString<256> Buffer;
11804   llvm::raw_svector_ostream Out(Buffer);
11805   Out << Prefix << ISA << LMask << VLEN;
11806   if (OutputBecomesInput)
11807     Out << "v";
11808   Out << ParSeq << "_" << MangledName;
11809   Fn->addFnAttr(Out.str());
11810 }
11811 
11812 // Helper function to generate the Advanced SIMD names depending on
11813 // the value of the NDS when simdlen is not present.
11814 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11815                                       StringRef Prefix, char ISA,
11816                                       StringRef ParSeq, StringRef MangledName,
11817                                       bool OutputBecomesInput,
11818                                       llvm::Function *Fn) {
11819   switch (NDS) {
11820   case 8:
11821     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11822                          OutputBecomesInput, Fn);
11823     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11824                          OutputBecomesInput, Fn);
11825     break;
11826   case 16:
11827     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11828                          OutputBecomesInput, Fn);
11829     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11830                          OutputBecomesInput, Fn);
11831     break;
11832   case 32:
11833     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11834                          OutputBecomesInput, Fn);
11835     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11836                          OutputBecomesInput, Fn);
11837     break;
11838   case 64:
11839   case 128:
11840     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11841                          OutputBecomesInput, Fn);
11842     break;
11843   default:
11844     llvm_unreachable("Scalar type is too wide.");
11845   }
11846 }
11847 
11848 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11849 static void emitAArch64DeclareSimdFunction(
11850     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11851     ArrayRef<ParamAttrTy> ParamAttrs,
11852     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11853     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11854 
11855   // Get basic data for building the vector signature.
11856   const auto Data = getNDSWDS(FD, ParamAttrs);
11857   const unsigned NDS = std::get<0>(Data);
11858   const unsigned WDS = std::get<1>(Data);
11859   const bool OutputBecomesInput = std::get<2>(Data);
11860 
11861   // Check the values provided via `simdlen` by the user.
11862   // 1. A `simdlen(1)` doesn't produce vector signatures,
11863   if (UserVLEN == 1) {
11864     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11865         DiagnosticsEngine::Warning,
11866         "The clause simdlen(1) has no effect when targeting aarch64.");
11867     CGM.getDiags().Report(SLoc, DiagID);
11868     return;
11869   }
11870 
11871   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11872   // Advanced SIMD output.
11873   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11874     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11875         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11876                                     "power of 2 when targeting Advanced SIMD.");
11877     CGM.getDiags().Report(SLoc, DiagID);
11878     return;
11879   }
11880 
11881   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11882   // limits.
11883   if (ISA == 's' && UserVLEN != 0) {
11884     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11885       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11886           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11887                                       "lanes in the architectural constraints "
11888                                       "for SVE (min is 128-bit, max is "
11889                                       "2048-bit, by steps of 128-bit)");
11890       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11891       return;
11892     }
11893   }
11894 
11895   // Sort out parameter sequence.
11896   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11897   StringRef Prefix = "_ZGV";
11898   // Generate simdlen from user input (if any).
11899   if (UserVLEN) {
11900     if (ISA == 's') {
11901       // SVE generates only a masked function.
11902       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11903                            OutputBecomesInput, Fn);
11904     } else {
11905       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11906       // Advanced SIMD generates one or two functions, depending on
11907       // the `[not]inbranch` clause.
11908       switch (State) {
11909       case OMPDeclareSimdDeclAttr::BS_Undefined:
11910         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11911                              OutputBecomesInput, Fn);
11912         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11913                              OutputBecomesInput, Fn);
11914         break;
11915       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11916         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11917                              OutputBecomesInput, Fn);
11918         break;
11919       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11920         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11921                              OutputBecomesInput, Fn);
11922         break;
11923       }
11924     }
11925   } else {
11926     // If no user simdlen is provided, follow the AAVFABI rules for
11927     // generating the vector length.
11928     if (ISA == 's') {
11929       // SVE, section 3.4.1, item 1.
11930       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11931                            OutputBecomesInput, Fn);
11932     } else {
11933       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11934       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11935       // two vector names depending on the use of the clause
11936       // `[not]inbranch`.
11937       switch (State) {
11938       case OMPDeclareSimdDeclAttr::BS_Undefined:
11939         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11940                                   OutputBecomesInput, Fn);
11941         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11942                                   OutputBecomesInput, Fn);
11943         break;
11944       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11945         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11946                                   OutputBecomesInput, Fn);
11947         break;
11948       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11949         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11950                                   OutputBecomesInput, Fn);
11951         break;
11952       }
11953     }
11954   }
11955 }
11956 
11957 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11958                                               llvm::Function *Fn) {
11959   ASTContext &C = CGM.getContext();
11960   FD = FD->getMostRecentDecl();
11961   // Map params to their positions in function decl.
11962   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11963   if (isa<CXXMethodDecl>(FD))
11964     ParamPositions.try_emplace(FD, 0);
11965   unsigned ParamPos = ParamPositions.size();
11966   for (const ParmVarDecl *P : FD->parameters()) {
11967     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11968     ++ParamPos;
11969   }
11970   while (FD) {
11971     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11972       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11973       // Mark uniform parameters.
11974       for (const Expr *E : Attr->uniforms()) {
11975         E = E->IgnoreParenImpCasts();
11976         unsigned Pos;
11977         if (isa<CXXThisExpr>(E)) {
11978           Pos = ParamPositions[FD];
11979         } else {
11980           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11981                                 ->getCanonicalDecl();
11982           Pos = ParamPositions[PVD];
11983         }
11984         ParamAttrs[Pos].Kind = Uniform;
11985       }
11986       // Get alignment info.
11987       auto NI = Attr->alignments_begin();
11988       for (const Expr *E : Attr->aligneds()) {
11989         E = E->IgnoreParenImpCasts();
11990         unsigned Pos;
11991         QualType ParmTy;
11992         if (isa<CXXThisExpr>(E)) {
11993           Pos = ParamPositions[FD];
11994           ParmTy = E->getType();
11995         } else {
11996           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11997                                 ->getCanonicalDecl();
11998           Pos = ParamPositions[PVD];
11999           ParmTy = PVD->getType();
12000         }
12001         ParamAttrs[Pos].Alignment =
12002             (*NI)
12003                 ? (*NI)->EvaluateKnownConstInt(C)
12004                 : llvm::APSInt::getUnsigned(
12005                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12006                           .getQuantity());
12007         ++NI;
12008       }
12009       // Mark linear parameters.
12010       auto SI = Attr->steps_begin();
12011       auto MI = Attr->modifiers_begin();
12012       for (const Expr *E : Attr->linears()) {
12013         E = E->IgnoreParenImpCasts();
12014         unsigned Pos;
12015         // Rescaling factor needed to compute the linear parameter
12016         // value in the mangled name.
12017         unsigned PtrRescalingFactor = 1;
12018         if (isa<CXXThisExpr>(E)) {
12019           Pos = ParamPositions[FD];
12020         } else {
12021           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12022                                 ->getCanonicalDecl();
12023           Pos = ParamPositions[PVD];
12024           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12025             PtrRescalingFactor = CGM.getContext()
12026                                      .getTypeSizeInChars(P->getPointeeType())
12027                                      .getQuantity();
12028         }
12029         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12030         ParamAttr.Kind = Linear;
12031         // Assuming a stride of 1, for `linear` without modifiers.
12032         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12033         if (*SI) {
12034           Expr::EvalResult Result;
12035           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12036             if (const auto *DRE =
12037                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12038               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12039                 ParamAttr.Kind = LinearWithVarStride;
12040                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12041                     ParamPositions[StridePVD->getCanonicalDecl()]);
12042               }
12043             }
12044           } else {
12045             ParamAttr.StrideOrArg = Result.Val.getInt();
12046           }
12047         }
12048         // If we are using a linear clause on a pointer, we need to
12049         // rescale the value of linear_step with the byte size of the
12050         // pointee type.
12051         if (Linear == ParamAttr.Kind)
12052           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12053         ++SI;
12054         ++MI;
12055       }
12056       llvm::APSInt VLENVal;
12057       SourceLocation ExprLoc;
12058       const Expr *VLENExpr = Attr->getSimdlen();
12059       if (VLENExpr) {
12060         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12061         ExprLoc = VLENExpr->getExprLoc();
12062       }
12063       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12064       if (CGM.getTriple().isX86()) {
12065         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12066       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12067         unsigned VLEN = VLENVal.getExtValue();
12068         StringRef MangledName = Fn->getName();
12069         if (CGM.getTarget().hasFeature("sve"))
12070           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12071                                          MangledName, 's', 128, Fn, ExprLoc);
12072         if (CGM.getTarget().hasFeature("neon"))
12073           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12074                                          MangledName, 'n', 128, Fn, ExprLoc);
12075       }
12076     }
12077     FD = FD->getPreviousDecl();
12078   }
12079 }
12080 
12081 namespace {
12082 /// Cleanup action for doacross support.
12083 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12084 public:
12085   static const int DoacrossFinArgs = 2;
12086 
12087 private:
12088   llvm::FunctionCallee RTLFn;
12089   llvm::Value *Args[DoacrossFinArgs];
12090 
12091 public:
12092   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12093                     ArrayRef<llvm::Value *> CallArgs)
12094       : RTLFn(RTLFn) {
12095     assert(CallArgs.size() == DoacrossFinArgs);
12096     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12097   }
12098   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12099     if (!CGF.HaveInsertPoint())
12100       return;
12101     CGF.EmitRuntimeCall(RTLFn, Args);
12102   }
12103 };
12104 } // namespace
12105 
12106 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12107                                        const OMPLoopDirective &D,
12108                                        ArrayRef<Expr *> NumIterations) {
12109   if (!CGF.HaveInsertPoint())
12110     return;
12111 
12112   ASTContext &C = CGM.getContext();
12113   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12114   RecordDecl *RD;
12115   if (KmpDimTy.isNull()) {
12116     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12117     //  kmp_int64 lo; // lower
12118     //  kmp_int64 up; // upper
12119     //  kmp_int64 st; // stride
12120     // };
12121     RD = C.buildImplicitRecord("kmp_dim");
12122     RD->startDefinition();
12123     addFieldToRecordDecl(C, RD, Int64Ty);
12124     addFieldToRecordDecl(C, RD, Int64Ty);
12125     addFieldToRecordDecl(C, RD, Int64Ty);
12126     RD->completeDefinition();
12127     KmpDimTy = C.getRecordType(RD);
12128   } else {
12129     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12130   }
12131   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12132   QualType ArrayTy =
12133       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12134 
12135   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12136   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12137   enum { LowerFD = 0, UpperFD, StrideFD };
12138   // Fill dims with data.
12139   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12140     LValue DimsLVal = CGF.MakeAddrLValue(
12141         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12142     // dims.upper = num_iterations;
12143     LValue UpperLVal = CGF.EmitLValueForField(
12144         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12145     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12146         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12147         Int64Ty, NumIterations[I]->getExprLoc());
12148     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12149     // dims.stride = 1;
12150     LValue StrideLVal = CGF.EmitLValueForField(
12151         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12152     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12153                           StrideLVal);
12154   }
12155 
12156   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12157   // kmp_int32 num_dims, struct kmp_dim * dims);
12158   llvm::Value *Args[] = {
12159       emitUpdateLocation(CGF, D.getBeginLoc()),
12160       getThreadID(CGF, D.getBeginLoc()),
12161       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12162       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12163           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12164           CGM.VoidPtrTy)};
12165 
12166   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12167       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12168   CGF.EmitRuntimeCall(RTLFn, Args);
12169   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12170       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12171   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12172       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12173   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12174                                              llvm::makeArrayRef(FiniArgs));
12175 }
12176 
12177 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12178                                           const OMPDependClause *C) {
12179   QualType Int64Ty =
12180       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12181   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12182   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12183       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12184   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12185   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12186     const Expr *CounterVal = C->getLoopData(I);
12187     assert(CounterVal);
12188     llvm::Value *CntVal = CGF.EmitScalarConversion(
12189         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12190         CounterVal->getExprLoc());
12191     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12192                           /*Volatile=*/false, Int64Ty);
12193   }
12194   llvm::Value *Args[] = {
12195       emitUpdateLocation(CGF, C->getBeginLoc()),
12196       getThreadID(CGF, C->getBeginLoc()),
12197       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12198   llvm::FunctionCallee RTLFn;
12199   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12200     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12201                                                   OMPRTL___kmpc_doacross_post);
12202   } else {
12203     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12204     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12205                                                   OMPRTL___kmpc_doacross_wait);
12206   }
12207   CGF.EmitRuntimeCall(RTLFn, Args);
12208 }
12209 
12210 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12211                                llvm::FunctionCallee Callee,
12212                                ArrayRef<llvm::Value *> Args) const {
12213   assert(Loc.isValid() && "Outlined function call location must be valid.");
12214   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12215 
12216   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12217     if (Fn->doesNotThrow()) {
12218       CGF.EmitNounwindRuntimeCall(Fn, Args);
12219       return;
12220     }
12221   }
12222   CGF.EmitRuntimeCall(Callee, Args);
12223 }
12224 
12225 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12226     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12227     ArrayRef<llvm::Value *> Args) const {
12228   emitCall(CGF, Loc, OutlinedFn, Args);
12229 }
12230 
12231 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12232   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12233     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12234       HasEmittedDeclareTargetRegion = true;
12235 }
12236 
12237 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12238                                              const VarDecl *NativeParam,
12239                                              const VarDecl *TargetParam) const {
12240   return CGF.GetAddrOfLocalVar(NativeParam);
12241 }
12242 
12243 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12244                                                    const VarDecl *VD) {
12245   if (!VD)
12246     return Address::invalid();
12247   Address UntiedAddr = Address::invalid();
12248   Address UntiedRealAddr = Address::invalid();
12249   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12250   if (It != FunctionToUntiedTaskStackMap.end()) {
12251     const UntiedLocalVarsAddressesMap &UntiedData =
12252         UntiedLocalVarsStack[It->second];
12253     auto I = UntiedData.find(VD);
12254     if (I != UntiedData.end()) {
12255       UntiedAddr = I->second.first;
12256       UntiedRealAddr = I->second.second;
12257     }
12258   }
12259   const VarDecl *CVD = VD->getCanonicalDecl();
12260   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12261     // Use the default allocation.
12262     if (!isAllocatableDecl(VD))
12263       return UntiedAddr;
12264     llvm::Value *Size;
12265     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12266     if (CVD->getType()->isVariablyModifiedType()) {
12267       Size = CGF.getTypeSize(CVD->getType());
12268       // Align the size: ((size + align - 1) / align) * align
12269       Size = CGF.Builder.CreateNUWAdd(
12270           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12271       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12272       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12273     } else {
12274       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12275       Size = CGM.getSize(Sz.alignTo(Align));
12276     }
12277     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12278     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12279     assert(AA->getAllocator() &&
12280            "Expected allocator expression for non-default allocator.");
12281     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12282     // According to the standard, the original allocator type is a enum
12283     // (integer). Convert to pointer type, if required.
12284     Allocator = CGF.EmitScalarConversion(
12285         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12286         AA->getAllocator()->getExprLoc());
12287     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12288 
12289     llvm::Value *Addr =
12290         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12291                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12292                             Args, getName({CVD->getName(), ".void.addr"}));
12293     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12294         CGM.getModule(), OMPRTL___kmpc_free);
12295     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12296     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12297         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12298     if (UntiedAddr.isValid())
12299       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12300 
12301     // Cleanup action for allocate support.
12302     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12303       llvm::FunctionCallee RTLFn;
12304       SourceLocation::UIntTy LocEncoding;
12305       Address Addr;
12306       const Expr *Allocator;
12307 
12308     public:
12309       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12310                            SourceLocation::UIntTy LocEncoding, Address Addr,
12311                            const Expr *Allocator)
12312           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12313             Allocator(Allocator) {}
12314       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12315         if (!CGF.HaveInsertPoint())
12316           return;
12317         llvm::Value *Args[3];
12318         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12319             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12320         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12321             Addr.getPointer(), CGF.VoidPtrTy);
12322         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12323         // According to the standard, the original allocator type is a enum
12324         // (integer). Convert to pointer type, if required.
12325         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12326                                             CGF.getContext().VoidPtrTy,
12327                                             Allocator->getExprLoc());
12328         Args[2] = AllocVal;
12329 
12330         CGF.EmitRuntimeCall(RTLFn, Args);
12331       }
12332     };
12333     Address VDAddr =
12334         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12335     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12336         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12337         VDAddr, AA->getAllocator());
12338     if (UntiedRealAddr.isValid())
12339       if (auto *Region =
12340               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12341         Region->emitUntiedSwitch(CGF);
12342     return VDAddr;
12343   }
12344   return UntiedAddr;
12345 }
12346 
12347 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12348                                              const VarDecl *VD) const {
12349   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12350   if (It == FunctionToUntiedTaskStackMap.end())
12351     return false;
12352   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12353 }
12354 
12355 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12356     CodeGenModule &CGM, const OMPLoopDirective &S)
12357     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12358   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12359   if (!NeedToPush)
12360     return;
12361   NontemporalDeclsSet &DS =
12362       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12363   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12364     for (const Stmt *Ref : C->private_refs()) {
12365       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12366       const ValueDecl *VD;
12367       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12368         VD = DRE->getDecl();
12369       } else {
12370         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12371         assert((ME->isImplicitCXXThis() ||
12372                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12373                "Expected member of current class.");
12374         VD = ME->getMemberDecl();
12375       }
12376       DS.insert(VD);
12377     }
12378   }
12379 }
12380 
12381 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12382   if (!NeedToPush)
12383     return;
12384   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12385 }
12386 
12387 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12388     CodeGenFunction &CGF,
12389     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12390                           std::pair<Address, Address>> &LocalVars)
12391     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12392   if (!NeedToPush)
12393     return;
12394   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12395       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12396   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12397 }
12398 
12399 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12400   if (!NeedToPush)
12401     return;
12402   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12403 }
12404 
12405 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12406   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12407 
12408   return llvm::any_of(
12409       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12410       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12411 }
12412 
12413 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12414     const OMPExecutableDirective &S,
12415     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12416     const {
12417   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12418   // Vars in target/task regions must be excluded completely.
12419   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12420       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12421     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12422     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12423     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12424     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12425       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12426         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12427     }
12428   }
12429   // Exclude vars in private clauses.
12430   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12431     for (const Expr *Ref : C->varlists()) {
12432       if (!Ref->getType()->isScalarType())
12433         continue;
12434       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12435       if (!DRE)
12436         continue;
12437       NeedToCheckForLPCs.insert(DRE->getDecl());
12438     }
12439   }
12440   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12441     for (const Expr *Ref : C->varlists()) {
12442       if (!Ref->getType()->isScalarType())
12443         continue;
12444       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12445       if (!DRE)
12446         continue;
12447       NeedToCheckForLPCs.insert(DRE->getDecl());
12448     }
12449   }
12450   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12451     for (const Expr *Ref : C->varlists()) {
12452       if (!Ref->getType()->isScalarType())
12453         continue;
12454       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12455       if (!DRE)
12456         continue;
12457       NeedToCheckForLPCs.insert(DRE->getDecl());
12458     }
12459   }
12460   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12461     for (const Expr *Ref : C->varlists()) {
12462       if (!Ref->getType()->isScalarType())
12463         continue;
12464       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12465       if (!DRE)
12466         continue;
12467       NeedToCheckForLPCs.insert(DRE->getDecl());
12468     }
12469   }
12470   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12471     for (const Expr *Ref : C->varlists()) {
12472       if (!Ref->getType()->isScalarType())
12473         continue;
12474       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12475       if (!DRE)
12476         continue;
12477       NeedToCheckForLPCs.insert(DRE->getDecl());
12478     }
12479   }
12480   for (const Decl *VD : NeedToCheckForLPCs) {
12481     for (const LastprivateConditionalData &Data :
12482          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12483       if (Data.DeclToUniqueName.count(VD) > 0) {
12484         if (!Data.Disabled)
12485           NeedToAddForLPCsAsDisabled.insert(VD);
12486         break;
12487       }
12488     }
12489   }
12490 }
12491 
12492 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12493     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12494     : CGM(CGF.CGM),
12495       Action((CGM.getLangOpts().OpenMP >= 50 &&
12496               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12497                            [](const OMPLastprivateClause *C) {
12498                              return C->getKind() ==
12499                                     OMPC_LASTPRIVATE_conditional;
12500                            }))
12501                  ? ActionToDo::PushAsLastprivateConditional
12502                  : ActionToDo::DoNotPush) {
12503   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12504   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12505     return;
12506   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12507          "Expected a push action.");
12508   LastprivateConditionalData &Data =
12509       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12510   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12511     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12512       continue;
12513 
12514     for (const Expr *Ref : C->varlists()) {
12515       Data.DeclToUniqueName.insert(std::make_pair(
12516           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12517           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12518     }
12519   }
12520   Data.IVLVal = IVLVal;
12521   Data.Fn = CGF.CurFn;
12522 }
12523 
12524 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12525     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12526     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12527   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12528   if (CGM.getLangOpts().OpenMP < 50)
12529     return;
12530   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12531   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12532   if (!NeedToAddForLPCsAsDisabled.empty()) {
12533     Action = ActionToDo::DisableLastprivateConditional;
12534     LastprivateConditionalData &Data =
12535         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12536     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12537       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12538     Data.Fn = CGF.CurFn;
12539     Data.Disabled = true;
12540   }
12541 }
12542 
12543 CGOpenMPRuntime::LastprivateConditionalRAII
12544 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12545     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12546   return LastprivateConditionalRAII(CGF, S);
12547 }
12548 
12549 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12550   if (CGM.getLangOpts().OpenMP < 50)
12551     return;
12552   if (Action == ActionToDo::DisableLastprivateConditional) {
12553     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12554            "Expected list of disabled private vars.");
12555     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12556   }
12557   if (Action == ActionToDo::PushAsLastprivateConditional) {
12558     assert(
12559         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12560         "Expected list of lastprivate conditional vars.");
12561     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12562   }
12563 }
12564 
12565 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12566                                                         const VarDecl *VD) {
12567   ASTContext &C = CGM.getContext();
12568   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12569   if (I == LastprivateConditionalToTypes.end())
12570     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12571   QualType NewType;
12572   const FieldDecl *VDField;
12573   const FieldDecl *FiredField;
12574   LValue BaseLVal;
12575   auto VI = I->getSecond().find(VD);
12576   if (VI == I->getSecond().end()) {
12577     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12578     RD->startDefinition();
12579     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12580     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12581     RD->completeDefinition();
12582     NewType = C.getRecordType(RD);
12583     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12584     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12585     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12586   } else {
12587     NewType = std::get<0>(VI->getSecond());
12588     VDField = std::get<1>(VI->getSecond());
12589     FiredField = std::get<2>(VI->getSecond());
12590     BaseLVal = std::get<3>(VI->getSecond());
12591   }
12592   LValue FiredLVal =
12593       CGF.EmitLValueForField(BaseLVal, FiredField);
12594   CGF.EmitStoreOfScalar(
12595       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12596       FiredLVal);
12597   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12598 }
12599 
12600 namespace {
12601 /// Checks if the lastprivate conditional variable is referenced in LHS.
12602 class LastprivateConditionalRefChecker final
12603     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12604   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12605   const Expr *FoundE = nullptr;
12606   const Decl *FoundD = nullptr;
12607   StringRef UniqueDeclName;
12608   LValue IVLVal;
12609   llvm::Function *FoundFn = nullptr;
12610   SourceLocation Loc;
12611 
12612 public:
12613   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12614     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12615          llvm::reverse(LPM)) {
12616       auto It = D.DeclToUniqueName.find(E->getDecl());
12617       if (It == D.DeclToUniqueName.end())
12618         continue;
12619       if (D.Disabled)
12620         return false;
12621       FoundE = E;
12622       FoundD = E->getDecl()->getCanonicalDecl();
12623       UniqueDeclName = It->second;
12624       IVLVal = D.IVLVal;
12625       FoundFn = D.Fn;
12626       break;
12627     }
12628     return FoundE == E;
12629   }
12630   bool VisitMemberExpr(const MemberExpr *E) {
12631     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12632       return false;
12633     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12634          llvm::reverse(LPM)) {
12635       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12636       if (It == D.DeclToUniqueName.end())
12637         continue;
12638       if (D.Disabled)
12639         return false;
12640       FoundE = E;
12641       FoundD = E->getMemberDecl()->getCanonicalDecl();
12642       UniqueDeclName = It->second;
12643       IVLVal = D.IVLVal;
12644       FoundFn = D.Fn;
12645       break;
12646     }
12647     return FoundE == E;
12648   }
12649   bool VisitStmt(const Stmt *S) {
12650     for (const Stmt *Child : S->children()) {
12651       if (!Child)
12652         continue;
12653       if (const auto *E = dyn_cast<Expr>(Child))
12654         if (!E->isGLValue())
12655           continue;
12656       if (Visit(Child))
12657         return true;
12658     }
12659     return false;
12660   }
12661   explicit LastprivateConditionalRefChecker(
12662       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12663       : LPM(LPM) {}
12664   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12665   getFoundData() const {
12666     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12667   }
12668 };
12669 } // namespace
12670 
12671 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12672                                                        LValue IVLVal,
12673                                                        StringRef UniqueDeclName,
12674                                                        LValue LVal,
12675                                                        SourceLocation Loc) {
12676   // Last updated loop counter for the lastprivate conditional var.
12677   // int<xx> last_iv = 0;
12678   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12679   llvm::Constant *LastIV =
12680       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12681   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12682       IVLVal.getAlignment().getAsAlign());
12683   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12684 
12685   // Last value of the lastprivate conditional.
12686   // decltype(priv_a) last_a;
12687   llvm::Constant *Last = getOrCreateInternalVariable(
12688       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12689   cast<llvm::GlobalVariable>(Last)->setAlignment(
12690       LVal.getAlignment().getAsAlign());
12691   LValue LastLVal =
12692       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12693 
12694   // Global loop counter. Required to handle inner parallel-for regions.
12695   // iv
12696   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12697 
12698   // #pragma omp critical(a)
12699   // if (last_iv <= iv) {
12700   //   last_iv = iv;
12701   //   last_a = priv_a;
12702   // }
12703   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12704                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12705     Action.Enter(CGF);
12706     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12707     // (last_iv <= iv) ? Check if the variable is updated and store new
12708     // value in global var.
12709     llvm::Value *CmpRes;
12710     if (IVLVal.getType()->isSignedIntegerType()) {
12711       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12712     } else {
12713       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12714              "Loop iteration variable must be integer.");
12715       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12716     }
12717     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12718     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12719     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12720     // {
12721     CGF.EmitBlock(ThenBB);
12722 
12723     //   last_iv = iv;
12724     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12725 
12726     //   last_a = priv_a;
12727     switch (CGF.getEvaluationKind(LVal.getType())) {
12728     case TEK_Scalar: {
12729       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12730       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12731       break;
12732     }
12733     case TEK_Complex: {
12734       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12735       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12736       break;
12737     }
12738     case TEK_Aggregate:
12739       llvm_unreachable(
12740           "Aggregates are not supported in lastprivate conditional.");
12741     }
12742     // }
12743     CGF.EmitBranch(ExitBB);
12744     // There is no need to emit line number for unconditional branch.
12745     (void)ApplyDebugLocation::CreateEmpty(CGF);
12746     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12747   };
12748 
12749   if (CGM.getLangOpts().OpenMPSimd) {
12750     // Do not emit as a critical region as no parallel region could be emitted.
12751     RegionCodeGenTy ThenRCG(CodeGen);
12752     ThenRCG(CGF);
12753   } else {
12754     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12755   }
12756 }
12757 
12758 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12759                                                          const Expr *LHS) {
12760   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12761     return;
12762   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12763   if (!Checker.Visit(LHS))
12764     return;
12765   const Expr *FoundE;
12766   const Decl *FoundD;
12767   StringRef UniqueDeclName;
12768   LValue IVLVal;
12769   llvm::Function *FoundFn;
12770   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12771       Checker.getFoundData();
12772   if (FoundFn != CGF.CurFn) {
12773     // Special codegen for inner parallel regions.
12774     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12775     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12776     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12777            "Lastprivate conditional is not found in outer region.");
12778     QualType StructTy = std::get<0>(It->getSecond());
12779     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12780     LValue PrivLVal = CGF.EmitLValue(FoundE);
12781     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12782         PrivLVal.getAddress(CGF),
12783         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12784     LValue BaseLVal =
12785         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12786     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12787     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12788                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12789                         FiredLVal, llvm::AtomicOrdering::Unordered,
12790                         /*IsVolatile=*/true, /*isInit=*/false);
12791     return;
12792   }
12793 
12794   // Private address of the lastprivate conditional in the current context.
12795   // priv_a
12796   LValue LVal = CGF.EmitLValue(FoundE);
12797   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12798                                    FoundE->getExprLoc());
12799 }
12800 
12801 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12802     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12803     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12804   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12805     return;
12806   auto Range = llvm::reverse(LastprivateConditionalStack);
12807   auto It = llvm::find_if(
12808       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12809   if (It == Range.end() || It->Fn != CGF.CurFn)
12810     return;
12811   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12812   assert(LPCI != LastprivateConditionalToTypes.end() &&
12813          "Lastprivates must be registered already.");
12814   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12815   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12816   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12817   for (const auto &Pair : It->DeclToUniqueName) {
12818     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12819     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12820       continue;
12821     auto I = LPCI->getSecond().find(Pair.first);
12822     assert(I != LPCI->getSecond().end() &&
12823            "Lastprivate must be rehistered already.");
12824     // bool Cmp = priv_a.Fired != 0;
12825     LValue BaseLVal = std::get<3>(I->getSecond());
12826     LValue FiredLVal =
12827         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12828     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12829     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12830     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12831     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12832     // if (Cmp) {
12833     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12834     CGF.EmitBlock(ThenBB);
12835     Address Addr = CGF.GetAddrOfLocalVar(VD);
12836     LValue LVal;
12837     if (VD->getType()->isReferenceType())
12838       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12839                                            AlignmentSource::Decl);
12840     else
12841       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12842                                 AlignmentSource::Decl);
12843     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12844                                      D.getBeginLoc());
12845     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12846     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12847     // }
12848   }
12849 }
12850 
12851 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12852     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12853     SourceLocation Loc) {
12854   if (CGF.getLangOpts().OpenMP < 50)
12855     return;
12856   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12857   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12858          "Unknown lastprivate conditional variable.");
12859   StringRef UniqueName = It->second;
12860   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12861   // The variable was not updated in the region - exit.
12862   if (!GV)
12863     return;
12864   LValue LPLVal = CGF.MakeAddrLValue(
12865       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12866   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12867   CGF.EmitStoreOfScalar(Res, PrivLVal);
12868 }
12869 
12870 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12871     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12872     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12873   llvm_unreachable("Not supported in SIMD-only mode");
12874 }
12875 
12876 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12877     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12878     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12879   llvm_unreachable("Not supported in SIMD-only mode");
12880 }
12881 
12882 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12883     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12884     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12885     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12886     bool Tied, unsigned &NumberOfParts) {
12887   llvm_unreachable("Not supported in SIMD-only mode");
12888 }
12889 
12890 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12891                                            SourceLocation Loc,
12892                                            llvm::Function *OutlinedFn,
12893                                            ArrayRef<llvm::Value *> CapturedVars,
12894                                            const Expr *IfCond,
12895                                            llvm::Value *NumThreads) {
12896   llvm_unreachable("Not supported in SIMD-only mode");
12897 }
12898 
12899 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12900     CodeGenFunction &CGF, StringRef CriticalName,
12901     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12902     const Expr *Hint) {
12903   llvm_unreachable("Not supported in SIMD-only mode");
12904 }
12905 
12906 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12907                                            const RegionCodeGenTy &MasterOpGen,
12908                                            SourceLocation Loc) {
12909   llvm_unreachable("Not supported in SIMD-only mode");
12910 }
12911 
12912 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12913                                            const RegionCodeGenTy &MasterOpGen,
12914                                            SourceLocation Loc,
12915                                            const Expr *Filter) {
12916   llvm_unreachable("Not supported in SIMD-only mode");
12917 }
12918 
12919 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12920                                             SourceLocation Loc) {
12921   llvm_unreachable("Not supported in SIMD-only mode");
12922 }
12923 
12924 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12925     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12926     SourceLocation Loc) {
12927   llvm_unreachable("Not supported in SIMD-only mode");
12928 }
12929 
12930 void CGOpenMPSIMDRuntime::emitSingleRegion(
12931     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12932     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12933     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12934     ArrayRef<const Expr *> AssignmentOps) {
12935   llvm_unreachable("Not supported in SIMD-only mode");
12936 }
12937 
12938 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12939                                             const RegionCodeGenTy &OrderedOpGen,
12940                                             SourceLocation Loc,
12941                                             bool IsThreads) {
12942   llvm_unreachable("Not supported in SIMD-only mode");
12943 }
12944 
12945 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12946                                           SourceLocation Loc,
12947                                           OpenMPDirectiveKind Kind,
12948                                           bool EmitChecks,
12949                                           bool ForceSimpleCall) {
12950   llvm_unreachable("Not supported in SIMD-only mode");
12951 }
12952 
12953 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12954     CodeGenFunction &CGF, SourceLocation Loc,
12955     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12956     bool Ordered, const DispatchRTInput &DispatchValues) {
12957   llvm_unreachable("Not supported in SIMD-only mode");
12958 }
12959 
12960 void CGOpenMPSIMDRuntime::emitForStaticInit(
12961     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12962     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12963   llvm_unreachable("Not supported in SIMD-only mode");
12964 }
12965 
12966 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12967     CodeGenFunction &CGF, SourceLocation Loc,
12968     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12969   llvm_unreachable("Not supported in SIMD-only mode");
12970 }
12971 
12972 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12973                                                      SourceLocation Loc,
12974                                                      unsigned IVSize,
12975                                                      bool IVSigned) {
12976   llvm_unreachable("Not supported in SIMD-only mode");
12977 }
12978 
12979 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12980                                               SourceLocation Loc,
12981                                               OpenMPDirectiveKind DKind) {
12982   llvm_unreachable("Not supported in SIMD-only mode");
12983 }
12984 
12985 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12986                                               SourceLocation Loc,
12987                                               unsigned IVSize, bool IVSigned,
12988                                               Address IL, Address LB,
12989                                               Address UB, Address ST) {
12990   llvm_unreachable("Not supported in SIMD-only mode");
12991 }
12992 
12993 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12994                                                llvm::Value *NumThreads,
12995                                                SourceLocation Loc) {
12996   llvm_unreachable("Not supported in SIMD-only mode");
12997 }
12998 
12999 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13000                                              ProcBindKind ProcBind,
13001                                              SourceLocation Loc) {
13002   llvm_unreachable("Not supported in SIMD-only mode");
13003 }
13004 
13005 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13006                                                     const VarDecl *VD,
13007                                                     Address VDAddr,
13008                                                     SourceLocation Loc) {
13009   llvm_unreachable("Not supported in SIMD-only mode");
13010 }
13011 
13012 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13013     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13014     CodeGenFunction *CGF) {
13015   llvm_unreachable("Not supported in SIMD-only mode");
13016 }
13017 
13018 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13019     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13020   llvm_unreachable("Not supported in SIMD-only mode");
13021 }
13022 
13023 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13024                                     ArrayRef<const Expr *> Vars,
13025                                     SourceLocation Loc,
13026                                     llvm::AtomicOrdering AO) {
13027   llvm_unreachable("Not supported in SIMD-only mode");
13028 }
13029 
13030 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13031                                        const OMPExecutableDirective &D,
13032                                        llvm::Function *TaskFunction,
13033                                        QualType SharedsTy, Address Shareds,
13034                                        const Expr *IfCond,
13035                                        const OMPTaskDataTy &Data) {
13036   llvm_unreachable("Not supported in SIMD-only mode");
13037 }
13038 
13039 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13040     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13041     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13042     const Expr *IfCond, const OMPTaskDataTy &Data) {
13043   llvm_unreachable("Not supported in SIMD-only mode");
13044 }
13045 
13046 void CGOpenMPSIMDRuntime::emitReduction(
13047     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13048     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13049     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13050   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13051   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13052                                  ReductionOps, Options);
13053 }
13054 
13055 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13056     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13057     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13058   llvm_unreachable("Not supported in SIMD-only mode");
13059 }
13060 
13061 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13062                                                 SourceLocation Loc,
13063                                                 bool IsWorksharingReduction) {
13064   llvm_unreachable("Not supported in SIMD-only mode");
13065 }
13066 
13067 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13068                                                   SourceLocation Loc,
13069                                                   ReductionCodeGen &RCG,
13070                                                   unsigned N) {
13071   llvm_unreachable("Not supported in SIMD-only mode");
13072 }
13073 
13074 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13075                                                   SourceLocation Loc,
13076                                                   llvm::Value *ReductionsPtr,
13077                                                   LValue SharedLVal) {
13078   llvm_unreachable("Not supported in SIMD-only mode");
13079 }
13080 
13081 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13082                                            SourceLocation Loc,
13083                                            const OMPTaskDataTy &Data) {
13084   llvm_unreachable("Not supported in SIMD-only mode");
13085 }
13086 
13087 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13088     CodeGenFunction &CGF, SourceLocation Loc,
13089     OpenMPDirectiveKind CancelRegion) {
13090   llvm_unreachable("Not supported in SIMD-only mode");
13091 }
13092 
13093 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13094                                          SourceLocation Loc, const Expr *IfCond,
13095                                          OpenMPDirectiveKind CancelRegion) {
13096   llvm_unreachable("Not supported in SIMD-only mode");
13097 }
13098 
13099 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13100     const OMPExecutableDirective &D, StringRef ParentName,
13101     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13102     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13103   llvm_unreachable("Not supported in SIMD-only mode");
13104 }
13105 
13106 void CGOpenMPSIMDRuntime::emitTargetCall(
13107     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13108     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13109     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13110     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13111                                      const OMPLoopDirective &D)>
13112         SizeEmitter) {
13113   llvm_unreachable("Not supported in SIMD-only mode");
13114 }
13115 
13116 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13117   llvm_unreachable("Not supported in SIMD-only mode");
13118 }
13119 
13120 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13121   llvm_unreachable("Not supported in SIMD-only mode");
13122 }
13123 
13124 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13125   return false;
13126 }
13127 
13128 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13129                                         const OMPExecutableDirective &D,
13130                                         SourceLocation Loc,
13131                                         llvm::Function *OutlinedFn,
13132                                         ArrayRef<llvm::Value *> CapturedVars) {
13133   llvm_unreachable("Not supported in SIMD-only mode");
13134 }
13135 
13136 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13137                                              const Expr *NumTeams,
13138                                              const Expr *ThreadLimit,
13139                                              SourceLocation Loc) {
13140   llvm_unreachable("Not supported in SIMD-only mode");
13141 }
13142 
13143 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13144     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13145     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13146   llvm_unreachable("Not supported in SIMD-only mode");
13147 }
13148 
13149 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13150     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13151     const Expr *Device) {
13152   llvm_unreachable("Not supported in SIMD-only mode");
13153 }
13154 
13155 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13156                                            const OMPLoopDirective &D,
13157                                            ArrayRef<Expr *> NumIterations) {
13158   llvm_unreachable("Not supported in SIMD-only mode");
13159 }
13160 
13161 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13162                                               const OMPDependClause *C) {
13163   llvm_unreachable("Not supported in SIMD-only mode");
13164 }
13165 
13166 const VarDecl *
13167 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13168                                         const VarDecl *NativeParam) const {
13169   llvm_unreachable("Not supported in SIMD-only mode");
13170 }
13171 
13172 Address
13173 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13174                                          const VarDecl *NativeParam,
13175                                          const VarDecl *TargetParam) const {
13176   llvm_unreachable("Not supported in SIMD-only mode");
13177 }
13178