1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/Format.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <numeric>
43 
44 using namespace clang;
45 using namespace CodeGen;
46 using namespace llvm::omp;
47 
48 namespace {
49 /// Base class for handling code generation inside OpenMP regions.
50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51 public:
52   /// Kinds of OpenMP regions used in codegen.
53   enum CGOpenMPRegionKind {
54     /// Region with outlined function for standalone 'parallel'
55     /// directive.
56     ParallelOutlinedRegion,
57     /// Region with outlined function for standalone 'task' directive.
58     TaskOutlinedRegion,
59     /// Region for constructs that do not require function outlining,
60     /// like 'for', 'sections', 'atomic' etc. directives.
61     InlinedRegion,
62     /// Region with outlined function for standalone 'target' directive.
63     TargetRegion,
64   };
65 
66   CGOpenMPRegionInfo(const CapturedStmt &CS,
67                      const CGOpenMPRegionKind RegionKind,
68                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69                      bool HasCancel)
70       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72 
73   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75                      bool HasCancel)
76       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77         Kind(Kind), HasCancel(HasCancel) {}
78 
79   /// Get a variable or parameter for storing global thread id
80   /// inside OpenMP construct.
81   virtual const VarDecl *getThreadIDVariable() const = 0;
82 
83   /// Emit the captured statement body.
84   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85 
86   /// Get an LValue for the current ThreadID variable.
87   /// \return LValue for thread id variable. This LValue always has type int32*.
88   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89 
90   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91 
92   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93 
94   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95 
96   bool hasCancel() const { return HasCancel; }
97 
98   static bool classof(const CGCapturedStmtInfo *Info) {
99     return Info->getKind() == CR_OpenMP;
100   }
101 
102   ~CGOpenMPRegionInfo() override = default;
103 
104 protected:
105   CGOpenMPRegionKind RegionKind;
106   RegionCodeGenTy CodeGen;
107   OpenMPDirectiveKind Kind;
108   bool HasCancel;
109 };
110 
111 /// API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115                              const RegionCodeGenTy &CodeGen,
116                              OpenMPDirectiveKind Kind, bool HasCancel,
117                              StringRef HelperName)
118       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119                            HasCancel),
120         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
122   }
123 
124   /// Get a variable or parameter for storing global thread id
125   /// inside OpenMP construct.
126   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127 
128   /// Get the name of the capture helper.
129   StringRef getHelperName() const override { return HelperName; }
130 
131   static bool classof(const CGCapturedStmtInfo *Info) {
132     return CGOpenMPRegionInfo::classof(Info) &&
133            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134                ParallelOutlinedRegion;
135   }
136 
137 private:
138   /// A variable or parameter storing global thread id for OpenMP
139   /// constructs.
140   const VarDecl *ThreadIDVar;
141   StringRef HelperName;
142 };
143 
144 /// API for captured statement code generation in OpenMP constructs.
145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146 public:
147   class UntiedTaskActionTy final : public PrePostActionTy {
148     bool Untied;
149     const VarDecl *PartIDVar;
150     const RegionCodeGenTy UntiedCodeGen;
151     llvm::SwitchInst *UntiedSwitch = nullptr;
152 
153   public:
154     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155                        const RegionCodeGenTy &UntiedCodeGen)
156         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157     void Enter(CodeGenFunction &CGF) override {
158       if (Untied) {
159         // Emit task switching point.
160         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         llvm::Value *Res =
164             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167         CGF.EmitBlock(DoneBB);
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171                               CGF.Builder.GetInsertBlock());
172         emitUntiedSwitch(CGF);
173       }
174     }
175     void emitUntiedSwitch(CodeGenFunction &CGF) const {
176       if (Untied) {
177         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178             CGF.GetAddrOfLocalVar(PartIDVar),
179             PartIDVar->getType()->castAs<PointerType>());
180         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181                               PartIdLVal);
182         UntiedCodeGen(CGF);
183         CodeGenFunction::JumpDest CurPoint =
184             CGF.getJumpDestInCurrentScope(".untied.next.");
185         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188                               CGF.Builder.GetInsertBlock());
189         CGF.EmitBranchThroughCleanup(CurPoint);
190         CGF.EmitBlock(CurPoint.getBlock());
191       }
192     }
193     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194   };
195   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196                                  const VarDecl *ThreadIDVar,
197                                  const RegionCodeGenTy &CodeGen,
198                                  OpenMPDirectiveKind Kind, bool HasCancel,
199                                  const UntiedTaskActionTy &Action)
200       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201         ThreadIDVar(ThreadIDVar), Action(Action) {
202     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
203   }
204 
205   /// Get a variable or parameter for storing global thread id
206   /// inside OpenMP construct.
207   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208 
209   /// Get an LValue for the current ThreadID variable.
210   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211 
212   /// Get the name of the capture helper.
213   StringRef getHelperName() const override { return ".omp_outlined."; }
214 
215   void emitUntiedSwitch(CodeGenFunction &CGF) override {
216     Action.emitUntiedSwitch(CGF);
217   }
218 
219   static bool classof(const CGCapturedStmtInfo *Info) {
220     return CGOpenMPRegionInfo::classof(Info) &&
221            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222                TaskOutlinedRegion;
223   }
224 
225 private:
226   /// A variable or parameter storing global thread id for OpenMP
227   /// constructs.
228   const VarDecl *ThreadIDVar;
229   /// Action for emitting code for untied tasks.
230   const UntiedTaskActionTy &Action;
231 };
232 
233 /// API for inlined captured statement code generation in OpenMP
234 /// constructs.
235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236 public:
237   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238                             const RegionCodeGenTy &CodeGen,
239                             OpenMPDirectiveKind Kind, bool HasCancel)
240       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241         OldCSI(OldCSI),
242         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243 
244   // Retrieve the value of the context parameter.
245   llvm::Value *getContextValue() const override {
246     if (OuterRegionInfo)
247       return OuterRegionInfo->getContextValue();
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   void setContextValue(llvm::Value *V) override {
252     if (OuterRegionInfo) {
253       OuterRegionInfo->setContextValue(V);
254       return;
255     }
256     llvm_unreachable("No context value for inlined OpenMP region");
257   }
258 
259   /// Lookup the captured field decl for a variable.
260   const FieldDecl *lookup(const VarDecl *VD) const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->lookup(VD);
263     // If there is no outer outlined region,no need to lookup in a list of
264     // captured variables, we can use the original one.
265     return nullptr;
266   }
267 
268   FieldDecl *getThisFieldDecl() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThisFieldDecl();
271     return nullptr;
272   }
273 
274   /// Get a variable or parameter for storing global thread id
275   /// inside OpenMP construct.
276   const VarDecl *getThreadIDVariable() const override {
277     if (OuterRegionInfo)
278       return OuterRegionInfo->getThreadIDVariable();
279     return nullptr;
280   }
281 
282   /// Get an LValue for the current ThreadID variable.
283   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284     if (OuterRegionInfo)
285       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286     llvm_unreachable("No LValue for inlined OpenMP construct");
287   }
288 
289   /// Get the name of the capture helper.
290   StringRef getHelperName() const override {
291     if (auto *OuterRegionInfo = getOldCSI())
292       return OuterRegionInfo->getHelperName();
293     llvm_unreachable("No helper name for inlined OpenMP construct");
294   }
295 
296   void emitUntiedSwitch(CodeGenFunction &CGF) override {
297     if (OuterRegionInfo)
298       OuterRegionInfo->emitUntiedSwitch(CGF);
299   }
300 
301   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302 
303   static bool classof(const CGCapturedStmtInfo *Info) {
304     return CGOpenMPRegionInfo::classof(Info) &&
305            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306   }
307 
308   ~CGOpenMPInlinedRegionInfo() override = default;
309 
310 private:
311   /// CodeGen info about outer OpenMP region.
312   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313   CGOpenMPRegionInfo *OuterRegionInfo;
314 };
315 
316 /// API for captured statement code generation in OpenMP target
317 /// constructs. For this captures, implicit parameters are used instead of the
318 /// captured fields. The name of the target region has to be unique in a given
319 /// application so it is provided by the client, because only the client has
320 /// the information to generate that.
321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322 public:
323   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
325       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326                            /*HasCancel=*/false),
327         HelperName(HelperName) {}
328 
329   /// This is unused for target regions because each starts executing
330   /// with a single thread.
331   const VarDecl *getThreadIDVariable() const override { return nullptr; }
332 
333   /// Get the name of the capture helper.
334   StringRef getHelperName() const override { return HelperName; }
335 
336   static bool classof(const CGCapturedStmtInfo *Info) {
337     return CGOpenMPRegionInfo::classof(Info) &&
338            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339   }
340 
341 private:
342   StringRef HelperName;
343 };
344 
345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346   llvm_unreachable("No codegen for expressions");
347 }
348 /// API for generation of expressions captured in a innermost OpenMP
349 /// region.
350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351 public:
352   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354                                   OMPD_unknown,
355                                   /*HasCancel=*/false),
356         PrivScope(CGF) {
357     // Make sure the globals captured in the provided statement are local by
358     // using the privatization logic. We assume the same variable is not
359     // captured more than once.
360     for (const auto &C : CS.captures()) {
361       if (!C.capturesVariable() && !C.capturesVariableByCopy())
362         continue;
363 
364       const VarDecl *VD = C.getCapturedVar();
365       if (VD->isLocalVarDeclOrParm())
366         continue;
367 
368       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369                       /*RefersToEnclosingVariableOrCapture=*/false,
370                       VD->getType().getNonReferenceType(), VK_LValue,
371                       C.getLocation());
372       PrivScope.addPrivate(
373           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 namespace {
481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
482 /// Values for bit flags for marking which requires clauses have been used.
483 enum OpenMPOffloadingRequiresDirFlags : int64_t {
484   /// flag undefined.
485   OMP_REQ_UNDEFINED               = 0x000,
486   /// no requires clause present.
487   OMP_REQ_NONE                    = 0x001,
488   /// reverse_offload clause.
489   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
490   /// unified_address clause.
491   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
492   /// unified_shared_memory clause.
493   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
494   /// dynamic_allocators clause.
495   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
496   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
497 };
498 
499 enum OpenMPOffloadingReservedDeviceIDs {
500   /// Device ID if the device was not defined, runtime should get it
501   /// from environment variables in the spec.
502   OMP_DEVICEID_UNDEF = -1,
503 };
504 } // anonymous namespace
505 
506 /// Describes ident structure that describes a source location.
507 /// All descriptions are taken from
508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509 /// Original structure:
510 /// typedef struct ident {
511 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
512 ///                                  see above  */
513 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
514 ///                                  KMP_IDENT_KMPC identifies this union
515 ///                                  member  */
516 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
517 ///                                  see above */
518 ///#if USE_ITT_BUILD
519 ///                            /*  but currently used for storing
520 ///                                region-specific ITT */
521 ///                            /*  contextual information. */
522 ///#endif /* USE_ITT_BUILD */
523 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
524 ///                                 C++  */
525 ///    char const *psource;    /**< String describing the source location.
526 ///                            The string is composed of semi-colon separated
527 //                             fields which describe the source file,
528 ///                            the function and a pair of line numbers that
529 ///                            delimit the construct.
530 ///                             */
531 /// } ident_t;
532 enum IdentFieldIndex {
533   /// might be used in Fortran
534   IdentField_Reserved_1,
535   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536   IdentField_Flags,
537   /// Not really used in Fortran any more
538   IdentField_Reserved_2,
539   /// Source[4] in Fortran, do not use for C++
540   IdentField_Reserved_3,
541   /// String describing the source location. The string is composed of
542   /// semi-colon separated fields which describe the source file, the function
543   /// and a pair of line numbers that delimit the construct.
544   IdentField_PSource
545 };
546 
547 /// Schedule types for 'omp for' loops (these enumerators are taken from
548 /// the enum sched_type in kmp.h).
549 enum OpenMPSchedType {
550   /// Lower bound for default (unordered) versions.
551   OMP_sch_lower = 32,
552   OMP_sch_static_chunked = 33,
553   OMP_sch_static = 34,
554   OMP_sch_dynamic_chunked = 35,
555   OMP_sch_guided_chunked = 36,
556   OMP_sch_runtime = 37,
557   OMP_sch_auto = 38,
558   /// static with chunk adjustment (e.g., simd)
559   OMP_sch_static_balanced_chunked = 45,
560   /// Lower bound for 'ordered' versions.
561   OMP_ord_lower = 64,
562   OMP_ord_static_chunked = 65,
563   OMP_ord_static = 66,
564   OMP_ord_dynamic_chunked = 67,
565   OMP_ord_guided_chunked = 68,
566   OMP_ord_runtime = 69,
567   OMP_ord_auto = 70,
568   OMP_sch_default = OMP_sch_static,
569   /// dist_schedule types
570   OMP_dist_sch_static_chunked = 91,
571   OMP_dist_sch_static = 92,
572   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573   /// Set if the monotonic schedule modifier was present.
574   OMP_sch_modifier_monotonic = (1 << 29),
575   /// Set if the nonmonotonic schedule modifier was present.
576   OMP_sch_modifier_nonmonotonic = (1 << 30),
577 };
578 
579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580 /// region.
581 class CleanupTy final : public EHScopeStack::Cleanup {
582   PrePostActionTy *Action;
583 
584 public:
585   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587     if (!CGF.HaveInsertPoint())
588       return;
589     Action->Exit(CGF);
590   }
591 };
592 
593 } // anonymous namespace
594 
595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596   CodeGenFunction::RunCleanupsScope Scope(CGF);
597   if (PrePostAction) {
598     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599     Callback(CodeGen, CGF, *PrePostAction);
600   } else {
601     PrePostActionTy Action;
602     Callback(CodeGen, CGF, Action);
603   }
604 }
605 
606 /// Check if the combiner is a call to UDR combiner and if it is so return the
607 /// UDR decl used for reduction.
608 static const OMPDeclareReductionDecl *
609 getReductionInit(const Expr *ReductionOp) {
610   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612       if (const auto *DRE =
613               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615           return DRD;
616   return nullptr;
617 }
618 
619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620                                              const OMPDeclareReductionDecl *DRD,
621                                              const Expr *InitOp,
622                                              Address Private, Address Original,
623                                              QualType Ty) {
624   if (DRD->getInitializer()) {
625     std::pair<llvm::Function *, llvm::Function *> Reduction =
626         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627     const auto *CE = cast<CallExpr>(InitOp);
628     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631     const auto *LHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633     const auto *RHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637                             [=]() { return Private; });
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639                             [=]() { return Original; });
640     (void)PrivateScope.Privatize();
641     RValue Func = RValue::get(Reduction.second);
642     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643     CGF.EmitIgnoredExpr(InitOp);
644   } else {
645     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647     auto *GV = new llvm::GlobalVariable(
648         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649         llvm::GlobalValue::PrivateLinkage, Init, Name);
650     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651     RValue InitRVal;
652     switch (CGF.getEvaluationKind(Ty)) {
653     case TEK_Scalar:
654       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655       break;
656     case TEK_Complex:
657       InitRVal =
658           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659       break;
660     case TEK_Aggregate: {
661       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664                            /*IsInitializer=*/false);
665       return;
666     }
667     }
668     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671                          /*IsInitializer=*/false);
672   }
673 }
674 
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681                                  QualType Type, bool EmitDeclareReductionInit,
682                                  const Expr *Init,
683                                  const OMPDeclareReductionDecl *DRD,
684                                  Address SrcAddr = Address::invalid()) {
685   // Perform element-by-element initialization.
686   QualType ElementTy;
687 
688   // Drill down to the base element type on both arrays.
689   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691   if (DRD)
692     SrcAddr =
693         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 
695   llvm::Value *SrcBegin = nullptr;
696   if (DRD)
697     SrcBegin = SrcAddr.getPointer();
698   llvm::Value *DestBegin = DestAddr.getPointer();
699   // Cast from pointer to array type to pointer to single element.
700   llvm::Value *DestEnd =
701       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
702   // The basic structure here is a while-do loop.
703   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705   llvm::Value *IsEmpty =
706       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 
709   // Enter the loop body, making that address the current address.
710   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711   CGF.EmitBlock(BodyBB);
712 
713   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 
715   llvm::PHINode *SrcElementPHI = nullptr;
716   Address SrcElementCurrent = Address::invalid();
717   if (DRD) {
718     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                           "omp.arraycpy.srcElementPast");
720     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721     SrcElementCurrent =
722         Address(SrcElementPHI, SrcAddr.getElementType(),
723                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724   }
725   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727   DestElementPHI->addIncoming(DestBegin, EntryBB);
728   Address DestElementCurrent =
729       Address(DestElementPHI, DestAddr.getElementType(),
730               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 
732   // Emit copy.
733   {
734     CodeGenFunction::RunCleanupsScope InitScope(CGF);
735     if (EmitDeclareReductionInit) {
736       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                        SrcElementCurrent, ElementTy);
738     } else
739       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                            /*IsInitializer=*/false);
741   }
742 
743   if (DRD) {
744     // Shift the address forward by one element.
745     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
747         "omp.arraycpy.dest.element");
748     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
749   }
750 
751   // Shift the address forward by one element.
752   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
753       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
754       "omp.arraycpy.dest.element");
755   // Check whether we've reached the end.
756   llvm::Value *Done =
757       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
758   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
759   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
760 
761   // Done.
762   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
763 }
764 
765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
766   return CGF.EmitOMPSharedLValue(E);
767 }
768 
769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
770                                             const Expr *E) {
771   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
772     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
773   return LValue();
774 }
775 
776 void ReductionCodeGen::emitAggregateInitialization(
777     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
778     const OMPDeclareReductionDecl *DRD) {
779   // Emit VarDecl with copy init for arrays.
780   // Get the address of the original variable captured in current
781   // captured region.
782   const auto *PrivateVD =
783       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
784   bool EmitDeclareReductionInit =
785       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
786   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
787                        EmitDeclareReductionInit,
788                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
789                                                 : PrivateVD->getInit(),
790                        DRD, SharedAddr);
791 }
792 
793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
794                                    ArrayRef<const Expr *> Origs,
795                                    ArrayRef<const Expr *> Privates,
796                                    ArrayRef<const Expr *> ReductionOps) {
797   ClausesData.reserve(Shareds.size());
798   SharedAddresses.reserve(Shareds.size());
799   Sizes.reserve(Shareds.size());
800   BaseDecls.reserve(Shareds.size());
801   const auto *IOrig = Origs.begin();
802   const auto *IPriv = Privates.begin();
803   const auto *IRed = ReductionOps.begin();
804   for (const Expr *Ref : Shareds) {
805     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
806     std::advance(IOrig, 1);
807     std::advance(IPriv, 1);
808     std::advance(IRed, 1);
809   }
810 }
811 
812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
813   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
814          "Number of generated lvalues must be exactly N.");
815   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
816   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
817   SharedAddresses.emplace_back(First, Second);
818   if (ClausesData[N].Shared == ClausesData[N].Ref) {
819     OrigAddresses.emplace_back(First, Second);
820   } else {
821     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
822     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
823     OrigAddresses.emplace_back(First, Second);
824   }
825 }
826 
827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
828   const auto *PrivateVD =
829       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
830   QualType PrivateType = PrivateVD->getType();
831   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
832   if (!PrivateType->isVariablyModifiedType()) {
833     Sizes.emplace_back(
834         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
835         nullptr);
836     return;
837   }
838   llvm::Value *Size;
839   llvm::Value *SizeInChars;
840   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
841   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
842   if (AsArraySection) {
843     Size = CGF.Builder.CreatePtrDiff(ElemType,
844                                      OrigAddresses[N].second.getPointer(CGF),
845                                      OrigAddresses[N].first.getPointer(CGF));
846     Size = CGF.Builder.CreateNUWAdd(
847         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
848     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
849   } else {
850     SizeInChars =
851         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
852     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
853   }
854   Sizes.emplace_back(SizeInChars, Size);
855   CodeGenFunction::OpaqueValueMapping OpaqueMap(
856       CGF,
857       cast<OpaqueValueExpr>(
858           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
859       RValue::get(Size));
860   CGF.EmitVariablyModifiedType(PrivateType);
861 }
862 
863 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
864                                          llvm::Value *Size) {
865   const auto *PrivateVD =
866       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
867   QualType PrivateType = PrivateVD->getType();
868   if (!PrivateType->isVariablyModifiedType()) {
869     assert(!Size && !Sizes[N].second &&
870            "Size should be nullptr for non-variably modified reduction "
871            "items.");
872     return;
873   }
874   CodeGenFunction::OpaqueValueMapping OpaqueMap(
875       CGF,
876       cast<OpaqueValueExpr>(
877           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
878       RValue::get(Size));
879   CGF.EmitVariablyModifiedType(PrivateType);
880 }
881 
882 void ReductionCodeGen::emitInitialization(
883     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
884     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
885   assert(SharedAddresses.size() > N && "No variable was generated");
886   const auto *PrivateVD =
887       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
888   const OMPDeclareReductionDecl *DRD =
889       getReductionInit(ClausesData[N].ReductionOp);
890   QualType PrivateType = PrivateVD->getType();
891   PrivateAddr = CGF.Builder.CreateElementBitCast(
892       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
893   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
894     if (DRD && DRD->getInitializer())
895       (void)DefaultInit(CGF);
896     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
897   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
898     (void)DefaultInit(CGF);
899     QualType SharedType = SharedAddresses[N].first.getType();
900     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
901                                      PrivateAddr, SharedAddr, SharedType);
902   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
903              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
904     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
905                          PrivateVD->getType().getQualifiers(),
906                          /*IsInitializer=*/false);
907   }
908 }
909 
910 bool ReductionCodeGen::needCleanups(unsigned N) {
911   const auto *PrivateVD =
912       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
913   QualType PrivateType = PrivateVD->getType();
914   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
915   return DTorKind != QualType::DK_none;
916 }
917 
918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
919                                     Address PrivateAddr) {
920   const auto *PrivateVD =
921       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922   QualType PrivateType = PrivateVD->getType();
923   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
924   if (needCleanups(N)) {
925     PrivateAddr = CGF.Builder.CreateElementBitCast(
926         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
927     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
928   }
929 }
930 
931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
932                           LValue BaseLV) {
933   BaseTy = BaseTy.getNonReferenceType();
934   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
935          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
936     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
937       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
938     } else {
939       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
940       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
941     }
942     BaseTy = BaseTy->getPointeeType();
943   }
944   return CGF.MakeAddrLValue(
945       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
946                                        CGF.ConvertTypeForMem(ElTy)),
947       BaseLV.getType(), BaseLV.getBaseInfo(),
948       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
949 }
950 
951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
952                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
953                           llvm::Value *Addr) {
954   Address Tmp = Address::invalid();
955   Address TopTmp = Address::invalid();
956   Address MostTopTmp = Address::invalid();
957   BaseTy = BaseTy.getNonReferenceType();
958   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
959          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
960     Tmp = CGF.CreateMemTemp(BaseTy);
961     if (TopTmp.isValid())
962       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
963     else
964       MostTopTmp = Tmp;
965     TopTmp = Tmp;
966     BaseTy = BaseTy->getPointeeType();
967   }
968   llvm::Type *Ty = BaseLVType;
969   if (Tmp.isValid())
970     Ty = Tmp.getElementType();
971   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
972   if (Tmp.isValid()) {
973     CGF.Builder.CreateStore(Addr, Tmp);
974     return MostTopTmp;
975   }
976   return Address::deprecated(Addr, BaseLVAlignment);
977 }
978 
979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
980   const VarDecl *OrigVD = nullptr;
981   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
982     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
984       Base = TempOASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
990     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
992       Base = TempASE->getBase()->IgnoreParenImpCasts();
993     DE = cast<DeclRefExpr>(Base);
994     OrigVD = cast<VarDecl>(DE->getDecl());
995   }
996   return OrigVD;
997 }
998 
999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1000                                                Address PrivateAddr) {
1001   const DeclRefExpr *DE;
1002   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1003     BaseDecls.emplace_back(OrigVD);
1004     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1005     LValue BaseLValue =
1006         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1007                     OriginalBaseLValue);
1008     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1009     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1010         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1011         SharedAddr.getPointer());
1012     llvm::Value *PrivatePointer =
1013         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1014             PrivateAddr.getPointer(), SharedAddr.getType());
1015     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1016         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1017     return castToBase(CGF, OrigVD->getType(),
1018                       SharedAddresses[N].first.getType(),
1019                       OriginalBaseLValue.getAddress(CGF).getType(),
1020                       OriginalBaseLValue.getAlignment(), Ptr);
1021   }
1022   BaseDecls.emplace_back(
1023       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1024   return PrivateAddr;
1025 }
1026 
1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1028   const OMPDeclareReductionDecl *DRD =
1029       getReductionInit(ClausesData[N].ReductionOp);
1030   return DRD && DRD->getInitializer();
1031 }
1032 
1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1034   return CGF.EmitLoadOfPointerLValue(
1035       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1036       getThreadIDVariable()->getType()->castAs<PointerType>());
1037 }
1038 
1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1040   if (!CGF.HaveInsertPoint())
1041     return;
1042   // 1.2.2 OpenMP Language Terminology
1043   // Structured block - An executable statement with a single entry at the
1044   // top and a single exit at the bottom.
1045   // The point of exit cannot be a branch out of the structured block.
1046   // longjmp() and throw() must not violate the entry/exit criteria.
1047   CGF.EHStack.pushTerminate();
1048   if (S)
1049     CGF.incrementProfileCounter(S);
1050   CodeGen(CGF);
1051   CGF.EHStack.popTerminate();
1052 }
1053 
1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1055     CodeGenFunction &CGF) {
1056   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1057                             getThreadIDVariable()->getType(),
1058                             AlignmentSource::Decl);
1059 }
1060 
1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1062                                        QualType FieldTy) {
1063   auto *Field = FieldDecl::Create(
1064       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1065       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1066       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1067   Field->setAccess(AS_public);
1068   DC->addDecl(Field);
1069   return Field;
1070 }
1071 
1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1073                                  StringRef Separator)
1074     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1075       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1076   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1077 
1078   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1079   OMPBuilder.initialize();
1080   loadOffloadInfoMetadata();
1081 }
1082 
1083 void CGOpenMPRuntime::clear() {
1084   InternalVars.clear();
1085   // Clean non-target variable declarations possibly used only in debug info.
1086   for (const auto &Data : EmittedNonTargetVariables) {
1087     if (!Data.getValue().pointsToAliveValue())
1088       continue;
1089     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1090     if (!GV)
1091       continue;
1092     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1093       continue;
1094     GV->eraseFromParent();
1095   }
1096 }
1097 
1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1099   SmallString<128> Buffer;
1100   llvm::raw_svector_ostream OS(Buffer);
1101   StringRef Sep = FirstSeparator;
1102   for (StringRef Part : Parts) {
1103     OS << Sep << Part;
1104     Sep = Separator;
1105   }
1106   return std::string(OS.str());
1107 }
1108 
1109 static llvm::Function *
1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1111                           const Expr *CombinerInitializer, const VarDecl *In,
1112                           const VarDecl *Out, bool IsCombiner) {
1113   // void .omp_combiner.(Ty *in, Ty *out);
1114   ASTContext &C = CGM.getContext();
1115   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1116   FunctionArgList Args;
1117   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1118                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1119   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1120                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121   Args.push_back(&OmpOutParm);
1122   Args.push_back(&OmpInParm);
1123   const CGFunctionInfo &FnInfo =
1124       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1125   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1126   std::string Name = CGM.getOpenMPRuntime().getName(
1127       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1128   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1129                                     Name, &CGM.getModule());
1130   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1131   if (CGM.getLangOpts().Optimize) {
1132     Fn->removeFnAttr(llvm::Attribute::NoInline);
1133     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1134     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1135   }
1136   CodeGenFunction CGF(CGM);
1137   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1138   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1139   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1140                     Out->getLocation());
1141   CodeGenFunction::OMPPrivateScope Scope(CGF);
1142   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1143   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1144     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1145         .getAddress(CGF);
1146   });
1147   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1148   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1149     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1150         .getAddress(CGF);
1151   });
1152   (void)Scope.Privatize();
1153   if (!IsCombiner && Out->hasInit() &&
1154       !CGF.isTrivialInitializer(Out->getInit())) {
1155     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1156                          Out->getType().getQualifiers(),
1157                          /*IsInitializer=*/true);
1158   }
1159   if (CombinerInitializer)
1160     CGF.EmitIgnoredExpr(CombinerInitializer);
1161   Scope.ForceCleanup();
1162   CGF.FinishFunction();
1163   return Fn;
1164 }
1165 
1166 void CGOpenMPRuntime::emitUserDefinedReduction(
1167     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1168   if (UDRMap.count(D) > 0)
1169     return;
1170   llvm::Function *Combiner = emitCombinerOrInitializer(
1171       CGM, D->getType(), D->getCombiner(),
1172       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1173       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1174       /*IsCombiner=*/true);
1175   llvm::Function *Initializer = nullptr;
1176   if (const Expr *Init = D->getInitializer()) {
1177     Initializer = emitCombinerOrInitializer(
1178         CGM, D->getType(),
1179         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1180                                                                      : nullptr,
1181         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1182         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1183         /*IsCombiner=*/false);
1184   }
1185   UDRMap.try_emplace(D, Combiner, Initializer);
1186   if (CGF) {
1187     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1188     Decls.second.push_back(D);
1189   }
1190 }
1191 
1192 std::pair<llvm::Function *, llvm::Function *>
1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1194   auto I = UDRMap.find(D);
1195   if (I != UDRMap.end())
1196     return I->second;
1197   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1198   return UDRMap.lookup(D);
1199 }
1200 
1201 namespace {
1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1203 // Builder if one is present.
1204 struct PushAndPopStackRAII {
1205   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1206                       bool HasCancel, llvm::omp::Directive Kind)
1207       : OMPBuilder(OMPBuilder) {
1208     if (!OMPBuilder)
1209       return;
1210 
1211     // The following callback is the crucial part of clangs cleanup process.
1212     //
1213     // NOTE:
1214     // Once the OpenMPIRBuilder is used to create parallel regions (and
1215     // similar), the cancellation destination (Dest below) is determined via
1216     // IP. That means if we have variables to finalize we split the block at IP,
1217     // use the new block (=BB) as destination to build a JumpDest (via
1218     // getJumpDestInCurrentScope(BB)) which then is fed to
1219     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1220     // to push & pop an FinalizationInfo object.
1221     // The FiniCB will still be needed but at the point where the
1222     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1223     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1224       assert(IP.getBlock()->end() == IP.getPoint() &&
1225              "Clang CG should cause non-terminated block!");
1226       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1227       CGF.Builder.restoreIP(IP);
1228       CodeGenFunction::JumpDest Dest =
1229           CGF.getOMPCancelDestination(OMPD_parallel);
1230       CGF.EmitBranchThroughCleanup(Dest);
1231     };
1232 
1233     // TODO: Remove this once we emit parallel regions through the
1234     //       OpenMPIRBuilder as it can do this setup internally.
1235     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1236     OMPBuilder->pushFinalizationCB(std::move(FI));
1237   }
1238   ~PushAndPopStackRAII() {
1239     if (OMPBuilder)
1240       OMPBuilder->popFinalizationCB();
1241   }
1242   llvm::OpenMPIRBuilder *OMPBuilder;
1243 };
1244 } // namespace
1245 
1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1247     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1248     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1249     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1250   assert(ThreadIDVar->getType()->isPointerType() &&
1251          "thread id variable must be of type kmp_int32 *");
1252   CodeGenFunction CGF(CGM, true);
1253   bool HasCancel = false;
1254   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1255     HasCancel = OPD->hasCancel();
1256   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1257     HasCancel = OPD->hasCancel();
1258   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1259     HasCancel = OPSD->hasCancel();
1260   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1261     HasCancel = OPFD->hasCancel();
1262   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1263     HasCancel = OPFD->hasCancel();
1264   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1265     HasCancel = OPFD->hasCancel();
1266   else if (const auto *OPFD =
1267                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272 
1273   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1274   //       parallel region to make cancellation barriers work properly.
1275   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1276   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1277   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1278                                     HasCancel, OutlinedHelperName);
1279   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1280   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1281 }
1282 
1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1284     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1286   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1287   return emitParallelOrTeamsOutlinedFunction(
1288       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1289 }
1290 
1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1292     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1295   return emitParallelOrTeamsOutlinedFunction(
1296       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297 }
1298 
1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1300     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1302     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1303     bool Tied, unsigned &NumberOfParts) {
1304   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1305                                               PrePostActionTy &) {
1306     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1307     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1308     llvm::Value *TaskArgs[] = {
1309         UpLoc, ThreadID,
1310         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1311                                     TaskTVar->getType()->castAs<PointerType>())
1312             .getPointer(CGF)};
1313     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1314                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1315                         TaskArgs);
1316   };
1317   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1318                                                             UntiedCodeGen);
1319   CodeGen.setAction(Action);
1320   assert(!ThreadIDVar->getType()->isPointerType() &&
1321          "thread id variable must be of type kmp_int32 for tasks");
1322   const OpenMPDirectiveKind Region =
1323       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1324                                                       : OMPD_task;
1325   const CapturedStmt *CS = D.getCapturedStmt(Region);
1326   bool HasCancel = false;
1327   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1328     HasCancel = TD->hasCancel();
1329   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1330     HasCancel = TD->hasCancel();
1331   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1332     HasCancel = TD->hasCancel();
1333   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335 
1336   CodeGenFunction CGF(CGM, true);
1337   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1338                                         InnermostKind, HasCancel, Action);
1339   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1340   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1341   if (!Tied)
1342     NumberOfParts = Action.getNumberOfParts();
1343   return Res;
1344 }
1345 
1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1347                              const RecordDecl *RD, const CGRecordLayout &RL,
1348                              ArrayRef<llvm::Constant *> Data) {
1349   llvm::StructType *StructTy = RL.getLLVMType();
1350   unsigned PrevIdx = 0;
1351   ConstantInitBuilder CIBuilder(CGM);
1352   auto DI = Data.begin();
1353   for (const FieldDecl *FD : RD->fields()) {
1354     unsigned Idx = RL.getLLVMFieldNo(FD);
1355     // Fill the alignment.
1356     for (unsigned I = PrevIdx; I < Idx; ++I)
1357       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1358     PrevIdx = Idx + 1;
1359     Fields.add(*DI);
1360     ++DI;
1361   }
1362 }
1363 
1364 template <class... As>
1365 static llvm::GlobalVariable *
1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1367                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1368                    As &&... Args) {
1369   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1370   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1371   ConstantInitBuilder CIBuilder(CGM);
1372   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1373   buildStructValue(Fields, CGM, RD, RL, Data);
1374   return Fields.finishAndCreateGlobal(
1375       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1376       std::forward<As>(Args)...);
1377 }
1378 
1379 template <typename T>
1380 static void
1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1382                                          ArrayRef<llvm::Constant *> Data,
1383                                          T &Parent) {
1384   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1385   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1386   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1387   buildStructValue(Fields, CGM, RD, RL, Data);
1388   Fields.finishAndAddTo(Parent);
1389 }
1390 
1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1392                                              bool AtCurrentPoint) {
1393   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1394   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1395 
1396   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1397   if (AtCurrentPoint) {
1398     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1399         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1400   } else {
1401     Elem.second.ServiceInsertPt =
1402         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1403     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1404   }
1405 }
1406 
1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1408   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1409   if (Elem.second.ServiceInsertPt) {
1410     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1411     Elem.second.ServiceInsertPt = nullptr;
1412     Ptr->eraseFromParent();
1413   }
1414 }
1415 
1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1417                                                   SourceLocation Loc,
1418                                                   SmallString<128> &Buffer) {
1419   llvm::raw_svector_ostream OS(Buffer);
1420   // Build debug location
1421   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1422   OS << ";" << PLoc.getFilename() << ";";
1423   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1424     OS << FD->getQualifiedNameAsString();
1425   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1426   return OS.str();
1427 }
1428 
1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1430                                                  SourceLocation Loc,
1431                                                  unsigned Flags) {
1432   uint32_t SrcLocStrSize;
1433   llvm::Constant *SrcLocStr;
1434   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1435       Loc.isInvalid()) {
1436     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1437   } else {
1438     std::string FunctionName;
1439     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1440       FunctionName = FD->getQualifiedNameAsString();
1441     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1442     const char *FileName = PLoc.getFilename();
1443     unsigned Line = PLoc.getLine();
1444     unsigned Column = PLoc.getColumn();
1445     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1446                                                 Column, SrcLocStrSize);
1447   }
1448   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1449   return OMPBuilder.getOrCreateIdent(
1450       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1451 }
1452 
1453 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1454                                           SourceLocation Loc) {
1455   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1457   // the clang invariants used below might be broken.
1458   if (CGM.getLangOpts().OpenMPIRBuilder) {
1459     SmallString<128> Buffer;
1460     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1461     uint32_t SrcLocStrSize;
1462     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1463         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1464     return OMPBuilder.getOrCreateThreadID(
1465         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1466   }
1467 
1468   llvm::Value *ThreadID = nullptr;
1469   // Check whether we've already cached a load of the thread id in this
1470   // function.
1471   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472   if (I != OpenMPLocThreadIDMap.end()) {
1473     ThreadID = I->second.ThreadID;
1474     if (ThreadID != nullptr)
1475       return ThreadID;
1476   }
1477   // If exceptions are enabled, do not use parameter to avoid possible crash.
1478   if (auto *OMPRegionInfo =
1479           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1480     if (OMPRegionInfo->getThreadIDVariable()) {
1481       // Check if this an outlined function with thread id passed as argument.
1482       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1483       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1484       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1485           !CGF.getLangOpts().CXXExceptions ||
1486           CGF.Builder.GetInsertBlock() == TopBlock ||
1487           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1488           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1489               TopBlock ||
1490           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1491               CGF.Builder.GetInsertBlock()) {
1492         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1493         // If value loaded in entry block, cache it and use it everywhere in
1494         // function.
1495         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1496           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1497           Elem.second.ThreadID = ThreadID;
1498         }
1499         return ThreadID;
1500       }
1501     }
1502   }
1503 
1504   // This is not an outlined function region - need to call __kmpc_int32
1505   // kmpc_global_thread_num(ident_t *loc).
1506   // Generate thread id value and cache this value for use across the
1507   // function.
1508   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509   if (!Elem.second.ServiceInsertPt)
1510     setLocThreadIdInsertPt(CGF);
1511   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1512   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1513   llvm::CallInst *Call = CGF.Builder.CreateCall(
1514       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1515                                             OMPRTL___kmpc_global_thread_num),
1516       emitUpdateLocation(CGF, Loc));
1517   Call->setCallingConv(CGF.getRuntimeCC());
1518   Elem.second.ThreadID = Call;
1519   return Call;
1520 }
1521 
1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1523   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1524   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1525     clearLocThreadIdInsertPt(CGF);
1526     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1527   }
1528   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1529     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1530       UDRMap.erase(D);
1531     FunctionUDRMap.erase(CGF.CurFn);
1532   }
1533   auto I = FunctionUDMMap.find(CGF.CurFn);
1534   if (I != FunctionUDMMap.end()) {
1535     for(const auto *D : I->second)
1536       UDMMap.erase(D);
1537     FunctionUDMMap.erase(I);
1538   }
1539   LastprivateConditionalToTypes.erase(CGF.CurFn);
1540   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1541 }
1542 
1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1544   return OMPBuilder.IdentPtr;
1545 }
1546 
1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1548   if (!Kmpc_MicroTy) {
1549     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1550     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1551                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1552     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1553   }
1554   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1555 }
1556 
1557 llvm::FunctionCallee
1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1559                                              bool IsGPUDistribute) {
1560   assert((IVSize == 32 || IVSize == 64) &&
1561          "IV size is not compatible with the omp runtime");
1562   StringRef Name;
1563   if (IsGPUDistribute)
1564     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1565                                     : "__kmpc_distribute_static_init_4u")
1566                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1567                                     : "__kmpc_distribute_static_init_8u");
1568   else
1569     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1570                                     : "__kmpc_for_static_init_4u")
1571                         : (IVSigned ? "__kmpc_for_static_init_8"
1572                                     : "__kmpc_for_static_init_8u");
1573 
1574   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1575   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1576   llvm::Type *TypeParams[] = {
1577     getIdentTyPointerTy(),                     // loc
1578     CGM.Int32Ty,                               // tid
1579     CGM.Int32Ty,                               // schedtype
1580     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1581     PtrTy,                                     // p_lower
1582     PtrTy,                                     // p_upper
1583     PtrTy,                                     // p_stride
1584     ITy,                                       // incr
1585     ITy                                        // chunk
1586   };
1587   auto *FnTy =
1588       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1589   return CGM.CreateRuntimeFunction(FnTy, Name);
1590 }
1591 
1592 llvm::FunctionCallee
1593 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1594   assert((IVSize == 32 || IVSize == 64) &&
1595          "IV size is not compatible with the omp runtime");
1596   StringRef Name =
1597       IVSize == 32
1598           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1599           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1600   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1601   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1602                                CGM.Int32Ty,           // tid
1603                                CGM.Int32Ty,           // schedtype
1604                                ITy,                   // lower
1605                                ITy,                   // upper
1606                                ITy,                   // stride
1607                                ITy                    // chunk
1608   };
1609   auto *FnTy =
1610       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1611   return CGM.CreateRuntimeFunction(FnTy, Name);
1612 }
1613 
1614 llvm::FunctionCallee
1615 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1616   assert((IVSize == 32 || IVSize == 64) &&
1617          "IV size is not compatible with the omp runtime");
1618   StringRef Name =
1619       IVSize == 32
1620           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1621           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1622   llvm::Type *TypeParams[] = {
1623       getIdentTyPointerTy(), // loc
1624       CGM.Int32Ty,           // tid
1625   };
1626   auto *FnTy =
1627       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1628   return CGM.CreateRuntimeFunction(FnTy, Name);
1629 }
1630 
1631 llvm::FunctionCallee
1632 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1633   assert((IVSize == 32 || IVSize == 64) &&
1634          "IV size is not compatible with the omp runtime");
1635   StringRef Name =
1636       IVSize == 32
1637           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1638           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1639   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1640   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1641   llvm::Type *TypeParams[] = {
1642     getIdentTyPointerTy(),                     // loc
1643     CGM.Int32Ty,                               // tid
1644     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1645     PtrTy,                                     // p_lower
1646     PtrTy,                                     // p_upper
1647     PtrTy                                      // p_stride
1648   };
1649   auto *FnTy =
1650       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1651   return CGM.CreateRuntimeFunction(FnTy, Name);
1652 }
1653 
1654 /// Obtain information that uniquely identifies a target entry. This
1655 /// consists of the file and device IDs as well as line number associated with
1656 /// the relevant entry source location.
1657 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1658                                      unsigned &DeviceID, unsigned &FileID,
1659                                      unsigned &LineNum) {
1660   SourceManager &SM = C.getSourceManager();
1661 
1662   // The loc should be always valid and have a file ID (the user cannot use
1663   // #pragma directives in macros)
1664 
1665   assert(Loc.isValid() && "Source location is expected to be always valid.");
1666 
1667   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1668   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1669 
1670   llvm::sys::fs::UniqueID ID;
1671   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1672     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1673     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1674     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1675       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1676           << PLoc.getFilename() << EC.message();
1677   }
1678 
1679   DeviceID = ID.getDevice();
1680   FileID = ID.getFile();
1681   LineNum = PLoc.getLine();
1682 }
1683 
1684 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1685   if (CGM.getLangOpts().OpenMPSimd)
1686     return Address::invalid();
1687   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1688       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1689   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1690               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1691                HasRequiresUnifiedSharedMemory))) {
1692     SmallString<64> PtrName;
1693     {
1694       llvm::raw_svector_ostream OS(PtrName);
1695       OS << CGM.getMangledName(GlobalDecl(VD));
1696       if (!VD->isExternallyVisible()) {
1697         unsigned DeviceID, FileID, Line;
1698         getTargetEntryUniqueInfo(CGM.getContext(),
1699                                  VD->getCanonicalDecl()->getBeginLoc(),
1700                                  DeviceID, FileID, Line);
1701         OS << llvm::format("_%x", FileID);
1702       }
1703       OS << "_decl_tgt_ref_ptr";
1704     }
1705     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1706     if (!Ptr) {
1707       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1708       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1709                                         PtrName);
1710 
1711       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1712       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1713 
1714       if (!CGM.getLangOpts().OpenMPIsDevice)
1715         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1716       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1717     }
1718     return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD));
1719   }
1720   return Address::invalid();
1721 }
1722 
1723 llvm::Constant *
1724 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1725   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1726          !CGM.getContext().getTargetInfo().isTLSSupported());
1727   // Lookup the entry, lazily creating it if necessary.
1728   std::string Suffix = getName({"cache", ""});
1729   return getOrCreateInternalVariable(
1730       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1731 }
1732 
1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1734                                                 const VarDecl *VD,
1735                                                 Address VDAddr,
1736                                                 SourceLocation Loc) {
1737   if (CGM.getLangOpts().OpenMPUseTLS &&
1738       CGM.getContext().getTargetInfo().isTLSSupported())
1739     return VDAddr;
1740 
1741   llvm::Type *VarTy = VDAddr.getElementType();
1742   llvm::Value *Args[] = {
1743       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1744       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1745       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1746       getOrCreateThreadPrivateCache(VD)};
1747   return Address::deprecated(
1748       CGF.EmitRuntimeCall(
1749           OMPBuilder.getOrCreateRuntimeFunction(
1750               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1751           Args),
1752       VDAddr.getAlignment());
1753 }
1754 
1755 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1756     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1757     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1758   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1759   // library.
1760   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1761   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1762                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1763                       OMPLoc);
1764   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1765   // to register constructor/destructor for variable.
1766   llvm::Value *Args[] = {
1767       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1768       Ctor, CopyCtor, Dtor};
1769   CGF.EmitRuntimeCall(
1770       OMPBuilder.getOrCreateRuntimeFunction(
1771           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1772       Args);
1773 }
1774 
1775 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1776     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1777     bool PerformInit, CodeGenFunction *CGF) {
1778   if (CGM.getLangOpts().OpenMPUseTLS &&
1779       CGM.getContext().getTargetInfo().isTLSSupported())
1780     return nullptr;
1781 
1782   VD = VD->getDefinition(CGM.getContext());
1783   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1784     QualType ASTTy = VD->getType();
1785 
1786     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1787     const Expr *Init = VD->getAnyInitializer();
1788     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1789       // Generate function that re-emits the declaration's initializer into the
1790       // threadprivate copy of the variable VD
1791       CodeGenFunction CtorCGF(CGM);
1792       FunctionArgList Args;
1793       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1794                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1795                             ImplicitParamDecl::Other);
1796       Args.push_back(&Dst);
1797 
1798       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1799           CGM.getContext().VoidPtrTy, Args);
1800       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1801       std::string Name = getName({"__kmpc_global_ctor_", ""});
1802       llvm::Function *Fn =
1803           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1804       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1805                             Args, Loc, Loc);
1806       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1807           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1808           CGM.getContext().VoidPtrTy, Dst.getLocation());
1809       Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment());
1810       Arg = CtorCGF.Builder.CreateElementBitCast(
1811           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1812       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1813                                /*IsInitializer=*/true);
1814       ArgVal = CtorCGF.EmitLoadOfScalar(
1815           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1816           CGM.getContext().VoidPtrTy, Dst.getLocation());
1817       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1818       CtorCGF.FinishFunction();
1819       Ctor = Fn;
1820     }
1821     if (VD->getType().isDestructedType() != QualType::DK_none) {
1822       // Generate function that emits destructor call for the threadprivate copy
1823       // of the variable VD
1824       CodeGenFunction DtorCGF(CGM);
1825       FunctionArgList Args;
1826       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1827                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1828                             ImplicitParamDecl::Other);
1829       Args.push_back(&Dst);
1830 
1831       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1832           CGM.getContext().VoidTy, Args);
1833       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1834       std::string Name = getName({"__kmpc_global_dtor_", ""});
1835       llvm::Function *Fn =
1836           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1837       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1838       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1839                             Loc, Loc);
1840       // Create a scope with an artificial location for the body of this function.
1841       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1842       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1843           DtorCGF.GetAddrOfLocalVar(&Dst),
1844           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1845       DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()),
1846                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1847                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1848       DtorCGF.FinishFunction();
1849       Dtor = Fn;
1850     }
1851     // Do not emit init function if it is not required.
1852     if (!Ctor && !Dtor)
1853       return nullptr;
1854 
1855     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1856     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1857                                                /*isVarArg=*/false)
1858                            ->getPointerTo();
1859     // Copying constructor for the threadprivate variable.
1860     // Must be NULL - reserved by runtime, but currently it requires that this
1861     // parameter is always NULL. Otherwise it fires assertion.
1862     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1863     if (Ctor == nullptr) {
1864       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1865                                              /*isVarArg=*/false)
1866                          ->getPointerTo();
1867       Ctor = llvm::Constant::getNullValue(CtorTy);
1868     }
1869     if (Dtor == nullptr) {
1870       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1871                                              /*isVarArg=*/false)
1872                          ->getPointerTo();
1873       Dtor = llvm::Constant::getNullValue(DtorTy);
1874     }
1875     if (!CGF) {
1876       auto *InitFunctionTy =
1877           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1878       std::string Name = getName({"__omp_threadprivate_init_", ""});
1879       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1880           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1881       CodeGenFunction InitCGF(CGM);
1882       FunctionArgList ArgList;
1883       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1884                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1885                             Loc, Loc);
1886       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1887       InitCGF.FinishFunction();
1888       return InitFunction;
1889     }
1890     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1891   }
1892   return nullptr;
1893 }
1894 
1895 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1896                                                      llvm::GlobalVariable *Addr,
1897                                                      bool PerformInit) {
1898   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1899       !CGM.getLangOpts().OpenMPIsDevice)
1900     return false;
1901   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1902       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1903   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1904       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1905        HasRequiresUnifiedSharedMemory))
1906     return CGM.getLangOpts().OpenMPIsDevice;
1907   VD = VD->getDefinition(CGM.getContext());
1908   assert(VD && "Unknown VarDecl");
1909 
1910   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1911     return CGM.getLangOpts().OpenMPIsDevice;
1912 
1913   QualType ASTTy = VD->getType();
1914   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1915 
1916   // Produce the unique prefix to identify the new target regions. We use
1917   // the source location of the variable declaration which we know to not
1918   // conflict with any target region.
1919   unsigned DeviceID;
1920   unsigned FileID;
1921   unsigned Line;
1922   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1923   SmallString<128> Buffer, Out;
1924   {
1925     llvm::raw_svector_ostream OS(Buffer);
1926     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1927        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1928   }
1929 
1930   const Expr *Init = VD->getAnyInitializer();
1931   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1932     llvm::Constant *Ctor;
1933     llvm::Constant *ID;
1934     if (CGM.getLangOpts().OpenMPIsDevice) {
1935       // Generate function that re-emits the declaration's initializer into
1936       // the threadprivate copy of the variable VD
1937       CodeGenFunction CtorCGF(CGM);
1938 
1939       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1940       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1941       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1942           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1943       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1944       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1945                             FunctionArgList(), Loc, Loc);
1946       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1947       CtorCGF.EmitAnyExprToMem(
1948           Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)),
1949           Init->getType().getQualifiers(),
1950           /*IsInitializer=*/true);
1951       CtorCGF.FinishFunction();
1952       Ctor = Fn;
1953       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1954       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1955     } else {
1956       Ctor = new llvm::GlobalVariable(
1957           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1958           llvm::GlobalValue::PrivateLinkage,
1959           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1960       ID = Ctor;
1961     }
1962 
1963     // Register the information for the entry associated with the constructor.
1964     Out.clear();
1965     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1966         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1967         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1968   }
1969   if (VD->getType().isDestructedType() != QualType::DK_none) {
1970     llvm::Constant *Dtor;
1971     llvm::Constant *ID;
1972     if (CGM.getLangOpts().OpenMPIsDevice) {
1973       // Generate function that emits destructor call for the threadprivate
1974       // copy of the variable VD
1975       CodeGenFunction DtorCGF(CGM);
1976 
1977       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1978       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1979       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1980           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1981       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1982       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1983                             FunctionArgList(), Loc, Loc);
1984       // Create a scope with an artificial location for the body of this
1985       // function.
1986       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1987       DtorCGF.emitDestroy(
1988           Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy,
1989           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1990           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1991       DtorCGF.FinishFunction();
1992       Dtor = Fn;
1993       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1994       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1995     } else {
1996       Dtor = new llvm::GlobalVariable(
1997           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1998           llvm::GlobalValue::PrivateLinkage,
1999           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2000       ID = Dtor;
2001     }
2002     // Register the information for the entry associated with the destructor.
2003     Out.clear();
2004     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2005         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2006         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2007   }
2008   return CGM.getLangOpts().OpenMPIsDevice;
2009 }
2010 
2011 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2012                                                           QualType VarType,
2013                                                           StringRef Name) {
2014   std::string Suffix = getName({"artificial", ""});
2015   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2016   llvm::GlobalVariable *GAddr =
2017       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2018   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2019       CGM.getTarget().isTLSSupported()) {
2020     GAddr->setThreadLocal(/*Val=*/true);
2021     return Address(GAddr, GAddr->getValueType(),
2022                    CGM.getContext().getTypeAlignInChars(VarType));
2023   }
2024   std::string CacheSuffix = getName({"cache", ""});
2025   llvm::Value *Args[] = {
2026       emitUpdateLocation(CGF, SourceLocation()),
2027       getThreadID(CGF, SourceLocation()),
2028       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2029       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2030                                 /*isSigned=*/false),
2031       getOrCreateInternalVariable(
2032           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2033   return Address(
2034       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2035           CGF.EmitRuntimeCall(
2036               OMPBuilder.getOrCreateRuntimeFunction(
2037                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2038               Args),
2039           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2040       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2041 }
2042 
2043 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2044                                    const RegionCodeGenTy &ThenGen,
2045                                    const RegionCodeGenTy &ElseGen) {
2046   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2047 
2048   // If the condition constant folds and can be elided, try to avoid emitting
2049   // the condition and the dead arm of the if/else.
2050   bool CondConstant;
2051   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2052     if (CondConstant)
2053       ThenGen(CGF);
2054     else
2055       ElseGen(CGF);
2056     return;
2057   }
2058 
2059   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2060   // emit the conditional branch.
2061   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2062   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2063   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2064   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2065 
2066   // Emit the 'then' code.
2067   CGF.EmitBlock(ThenBlock);
2068   ThenGen(CGF);
2069   CGF.EmitBranch(ContBlock);
2070   // Emit the 'else' code if present.
2071   // There is no need to emit line number for unconditional branch.
2072   (void)ApplyDebugLocation::CreateEmpty(CGF);
2073   CGF.EmitBlock(ElseBlock);
2074   ElseGen(CGF);
2075   // There is no need to emit line number for unconditional branch.
2076   (void)ApplyDebugLocation::CreateEmpty(CGF);
2077   CGF.EmitBranch(ContBlock);
2078   // Emit the continuation block for code after the if.
2079   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2080 }
2081 
2082 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2083                                        llvm::Function *OutlinedFn,
2084                                        ArrayRef<llvm::Value *> CapturedVars,
2085                                        const Expr *IfCond,
2086                                        llvm::Value *NumThreads) {
2087   if (!CGF.HaveInsertPoint())
2088     return;
2089   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2090   auto &M = CGM.getModule();
2091   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2092                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2093     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2094     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2095     llvm::Value *Args[] = {
2096         RTLoc,
2097         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2098         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2099     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2100     RealArgs.append(std::begin(Args), std::end(Args));
2101     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2102 
2103     llvm::FunctionCallee RTLFn =
2104         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2105     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2106   };
2107   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2108                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2109     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2110     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2111     // Build calls:
2112     // __kmpc_serialized_parallel(&Loc, GTid);
2113     llvm::Value *Args[] = {RTLoc, ThreadID};
2114     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2115                             M, OMPRTL___kmpc_serialized_parallel),
2116                         Args);
2117 
2118     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2119     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2120     Address ZeroAddrBound =
2121         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2122                                          /*Name=*/".bound.zero.addr");
2123     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2124     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2125     // ThreadId for serialized parallels is 0.
2126     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2127     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2128     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2129 
2130     // Ensure we do not inline the function. This is trivially true for the ones
2131     // passed to __kmpc_fork_call but the ones called in serialized regions
2132     // could be inlined. This is not a perfect but it is closer to the invariant
2133     // we want, namely, every data environment starts with a new function.
2134     // TODO: We should pass the if condition to the runtime function and do the
2135     //       handling there. Much cleaner code.
2136     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2137     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2138     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2139 
2140     // __kmpc_end_serialized_parallel(&Loc, GTid);
2141     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2142     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2143                             M, OMPRTL___kmpc_end_serialized_parallel),
2144                         EndArgs);
2145   };
2146   if (IfCond) {
2147     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2148   } else {
2149     RegionCodeGenTy ThenRCG(ThenGen);
2150     ThenRCG(CGF);
2151   }
2152 }
2153 
2154 // If we're inside an (outlined) parallel region, use the region info's
2155 // thread-ID variable (it is passed in a first argument of the outlined function
2156 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2157 // regular serial code region, get thread ID by calling kmp_int32
2158 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2159 // return the address of that temp.
2160 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2161                                              SourceLocation Loc) {
2162   if (auto *OMPRegionInfo =
2163           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2164     if (OMPRegionInfo->getThreadIDVariable())
2165       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2166 
2167   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2168   QualType Int32Ty =
2169       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2170   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2171   CGF.EmitStoreOfScalar(ThreadID,
2172                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2173 
2174   return ThreadIDTemp;
2175 }
2176 
2177 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2178     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2179   SmallString<256> Buffer;
2180   llvm::raw_svector_ostream Out(Buffer);
2181   Out << Name;
2182   StringRef RuntimeName = Out.str();
2183   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2184   if (Elem.second) {
2185     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2186            "OMP internal variable has different type than requested");
2187     return &*Elem.second;
2188   }
2189 
2190   return Elem.second = new llvm::GlobalVariable(
2191              CGM.getModule(), Ty, /*IsConstant*/ false,
2192              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2193              Elem.first(), /*InsertBefore=*/nullptr,
2194              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2195 }
2196 
2197 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2198   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2199   std::string Name = getName({Prefix, "var"});
2200   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2201 }
2202 
2203 namespace {
2204 /// Common pre(post)-action for different OpenMP constructs.
2205 class CommonActionTy final : public PrePostActionTy {
2206   llvm::FunctionCallee EnterCallee;
2207   ArrayRef<llvm::Value *> EnterArgs;
2208   llvm::FunctionCallee ExitCallee;
2209   ArrayRef<llvm::Value *> ExitArgs;
2210   bool Conditional;
2211   llvm::BasicBlock *ContBlock = nullptr;
2212 
2213 public:
2214   CommonActionTy(llvm::FunctionCallee EnterCallee,
2215                  ArrayRef<llvm::Value *> EnterArgs,
2216                  llvm::FunctionCallee ExitCallee,
2217                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2218       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2219         ExitArgs(ExitArgs), Conditional(Conditional) {}
2220   void Enter(CodeGenFunction &CGF) override {
2221     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2222     if (Conditional) {
2223       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2224       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2225       ContBlock = CGF.createBasicBlock("omp_if.end");
2226       // Generate the branch (If-stmt)
2227       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2228       CGF.EmitBlock(ThenBlock);
2229     }
2230   }
2231   void Done(CodeGenFunction &CGF) {
2232     // Emit the rest of blocks/branches
2233     CGF.EmitBranch(ContBlock);
2234     CGF.EmitBlock(ContBlock, true);
2235   }
2236   void Exit(CodeGenFunction &CGF) override {
2237     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2238   }
2239 };
2240 } // anonymous namespace
2241 
2242 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2243                                          StringRef CriticalName,
2244                                          const RegionCodeGenTy &CriticalOpGen,
2245                                          SourceLocation Loc, const Expr *Hint) {
2246   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2247   // CriticalOpGen();
2248   // __kmpc_end_critical(ident_t *, gtid, Lock);
2249   // Prepare arguments and build a call to __kmpc_critical
2250   if (!CGF.HaveInsertPoint())
2251     return;
2252   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2253                          getCriticalRegionLock(CriticalName)};
2254   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2255                                                 std::end(Args));
2256   if (Hint) {
2257     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2258         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2259   }
2260   CommonActionTy Action(
2261       OMPBuilder.getOrCreateRuntimeFunction(
2262           CGM.getModule(),
2263           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2264       EnterArgs,
2265       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2266                                             OMPRTL___kmpc_end_critical),
2267       Args);
2268   CriticalOpGen.setAction(Action);
2269   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2270 }
2271 
2272 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2273                                        const RegionCodeGenTy &MasterOpGen,
2274                                        SourceLocation Loc) {
2275   if (!CGF.HaveInsertPoint())
2276     return;
2277   // if(__kmpc_master(ident_t *, gtid)) {
2278   //   MasterOpGen();
2279   //   __kmpc_end_master(ident_t *, gtid);
2280   // }
2281   // Prepare arguments and build a call to __kmpc_master
2282   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2283   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2284                             CGM.getModule(), OMPRTL___kmpc_master),
2285                         Args,
2286                         OMPBuilder.getOrCreateRuntimeFunction(
2287                             CGM.getModule(), OMPRTL___kmpc_end_master),
2288                         Args,
2289                         /*Conditional=*/true);
2290   MasterOpGen.setAction(Action);
2291   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2292   Action.Done(CGF);
2293 }
2294 
2295 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2296                                        const RegionCodeGenTy &MaskedOpGen,
2297                                        SourceLocation Loc, const Expr *Filter) {
2298   if (!CGF.HaveInsertPoint())
2299     return;
2300   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2301   //   MaskedOpGen();
2302   //   __kmpc_end_masked(iden_t *, gtid);
2303   // }
2304   // Prepare arguments and build a call to __kmpc_masked
2305   llvm::Value *FilterVal = Filter
2306                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2307                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2308   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2309                          FilterVal};
2310   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2311                             getThreadID(CGF, Loc)};
2312   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2313                             CGM.getModule(), OMPRTL___kmpc_masked),
2314                         Args,
2315                         OMPBuilder.getOrCreateRuntimeFunction(
2316                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2317                         ArgsEnd,
2318                         /*Conditional=*/true);
2319   MaskedOpGen.setAction(Action);
2320   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2321   Action.Done(CGF);
2322 }
2323 
2324 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2325                                         SourceLocation Loc) {
2326   if (!CGF.HaveInsertPoint())
2327     return;
2328   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2329     OMPBuilder.createTaskyield(CGF.Builder);
2330   } else {
2331     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2332     llvm::Value *Args[] = {
2333         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2334         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2335     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2336                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2337                         Args);
2338   }
2339 
2340   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2341     Region->emitUntiedSwitch(CGF);
2342 }
2343 
2344 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2345                                           const RegionCodeGenTy &TaskgroupOpGen,
2346                                           SourceLocation Loc) {
2347   if (!CGF.HaveInsertPoint())
2348     return;
2349   // __kmpc_taskgroup(ident_t *, gtid);
2350   // TaskgroupOpGen();
2351   // __kmpc_end_taskgroup(ident_t *, gtid);
2352   // Prepare arguments and build a call to __kmpc_taskgroup
2353   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2354   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2355                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2356                         Args,
2357                         OMPBuilder.getOrCreateRuntimeFunction(
2358                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2359                         Args);
2360   TaskgroupOpGen.setAction(Action);
2361   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2362 }
2363 
2364 /// Given an array of pointers to variables, project the address of a
2365 /// given variable.
2366 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2367                                       unsigned Index, const VarDecl *Var) {
2368   // Pull out the pointer to the variable.
2369   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2370   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2371 
2372   Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var));
2373   Addr = CGF.Builder.CreateElementBitCast(
2374       Addr, CGF.ConvertTypeForMem(Var->getType()));
2375   return Addr;
2376 }
2377 
2378 static llvm::Value *emitCopyprivateCopyFunction(
2379     CodeGenModule &CGM, llvm::Type *ArgsType,
2380     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2381     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2382     SourceLocation Loc) {
2383   ASTContext &C = CGM.getContext();
2384   // void copy_func(void *LHSArg, void *RHSArg);
2385   FunctionArgList Args;
2386   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387                            ImplicitParamDecl::Other);
2388   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2389                            ImplicitParamDecl::Other);
2390   Args.push_back(&LHSArg);
2391   Args.push_back(&RHSArg);
2392   const auto &CGFI =
2393       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2394   std::string Name =
2395       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2396   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2397                                     llvm::GlobalValue::InternalLinkage, Name,
2398                                     &CGM.getModule());
2399   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2400   Fn->setDoesNotRecurse();
2401   CodeGenFunction CGF(CGM);
2402   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2403   // Dest = (void*[n])(LHSArg);
2404   // Src = (void*[n])(RHSArg);
2405   Address LHS = Address::deprecated(
2406       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
2408       CGF.getPointerAlign());
2409   Address RHS = Address::deprecated(
2410       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2411           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
2412       CGF.getPointerAlign());
2413   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2414   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2415   // ...
2416   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2417   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2418     const auto *DestVar =
2419         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2420     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2421 
2422     const auto *SrcVar =
2423         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2424     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2425 
2426     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2427     QualType Type = VD->getType();
2428     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2429   }
2430   CGF.FinishFunction();
2431   return Fn;
2432 }
2433 
2434 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2435                                        const RegionCodeGenTy &SingleOpGen,
2436                                        SourceLocation Loc,
2437                                        ArrayRef<const Expr *> CopyprivateVars,
2438                                        ArrayRef<const Expr *> SrcExprs,
2439                                        ArrayRef<const Expr *> DstExprs,
2440                                        ArrayRef<const Expr *> AssignmentOps) {
2441   if (!CGF.HaveInsertPoint())
2442     return;
2443   assert(CopyprivateVars.size() == SrcExprs.size() &&
2444          CopyprivateVars.size() == DstExprs.size() &&
2445          CopyprivateVars.size() == AssignmentOps.size());
2446   ASTContext &C = CGM.getContext();
2447   // int32 did_it = 0;
2448   // if(__kmpc_single(ident_t *, gtid)) {
2449   //   SingleOpGen();
2450   //   __kmpc_end_single(ident_t *, gtid);
2451   //   did_it = 1;
2452   // }
2453   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2454   // <copy_func>, did_it);
2455 
2456   Address DidIt = Address::invalid();
2457   if (!CopyprivateVars.empty()) {
2458     // int32 did_it = 0;
2459     QualType KmpInt32Ty =
2460         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2461     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2462     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2463   }
2464   // Prepare arguments and build a call to __kmpc_single
2465   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2466   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2467                             CGM.getModule(), OMPRTL___kmpc_single),
2468                         Args,
2469                         OMPBuilder.getOrCreateRuntimeFunction(
2470                             CGM.getModule(), OMPRTL___kmpc_end_single),
2471                         Args,
2472                         /*Conditional=*/true);
2473   SingleOpGen.setAction(Action);
2474   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2475   if (DidIt.isValid()) {
2476     // did_it = 1;
2477     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2478   }
2479   Action.Done(CGF);
2480   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2481   // <copy_func>, did_it);
2482   if (DidIt.isValid()) {
2483     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2484     QualType CopyprivateArrayTy = C.getConstantArrayType(
2485         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2486         /*IndexTypeQuals=*/0);
2487     // Create a list of all private variables for copyprivate.
2488     Address CopyprivateList =
2489         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2490     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2491       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2492       CGF.Builder.CreateStore(
2493           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2494               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2495               CGF.VoidPtrTy),
2496           Elem);
2497     }
2498     // Build function that copies private values from single region to all other
2499     // threads in the corresponding parallel region.
2500     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2501         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2502         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2503     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2504     Address CL =
2505       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2506                                                       CGF.VoidPtrTy);
2507     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2508     llvm::Value *Args[] = {
2509         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2510         getThreadID(CGF, Loc),        // i32 <gtid>
2511         BufSize,                      // size_t <buf_size>
2512         CL.getPointer(),              // void *<copyprivate list>
2513         CpyFn,                        // void (*) (void *, void *) <copy_func>
2514         DidItVal                      // i32 did_it
2515     };
2516     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2517                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2518                         Args);
2519   }
2520 }
2521 
2522 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2523                                         const RegionCodeGenTy &OrderedOpGen,
2524                                         SourceLocation Loc, bool IsThreads) {
2525   if (!CGF.HaveInsertPoint())
2526     return;
2527   // __kmpc_ordered(ident_t *, gtid);
2528   // OrderedOpGen();
2529   // __kmpc_end_ordered(ident_t *, gtid);
2530   // Prepare arguments and build a call to __kmpc_ordered
2531   if (IsThreads) {
2532     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2533     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2534                               CGM.getModule(), OMPRTL___kmpc_ordered),
2535                           Args,
2536                           OMPBuilder.getOrCreateRuntimeFunction(
2537                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2538                           Args);
2539     OrderedOpGen.setAction(Action);
2540     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2541     return;
2542   }
2543   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2544 }
2545 
2546 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2547   unsigned Flags;
2548   if (Kind == OMPD_for)
2549     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2550   else if (Kind == OMPD_sections)
2551     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2552   else if (Kind == OMPD_single)
2553     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2554   else if (Kind == OMPD_barrier)
2555     Flags = OMP_IDENT_BARRIER_EXPL;
2556   else
2557     Flags = OMP_IDENT_BARRIER_IMPL;
2558   return Flags;
2559 }
2560 
2561 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2562     CodeGenFunction &CGF, const OMPLoopDirective &S,
2563     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2564   // Check if the loop directive is actually a doacross loop directive. In this
2565   // case choose static, 1 schedule.
2566   if (llvm::any_of(
2567           S.getClausesOfKind<OMPOrderedClause>(),
2568           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2569     ScheduleKind = OMPC_SCHEDULE_static;
2570     // Chunk size is 1 in this case.
2571     llvm::APInt ChunkSize(32, 1);
2572     ChunkExpr = IntegerLiteral::Create(
2573         CGF.getContext(), ChunkSize,
2574         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2575         SourceLocation());
2576   }
2577 }
2578 
2579 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2580                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2581                                       bool ForceSimpleCall) {
2582   // Check if we should use the OMPBuilder
2583   auto *OMPRegionInfo =
2584       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2585   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2586     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2587         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2588     return;
2589   }
2590 
2591   if (!CGF.HaveInsertPoint())
2592     return;
2593   // Build call __kmpc_cancel_barrier(loc, thread_id);
2594   // Build call __kmpc_barrier(loc, thread_id);
2595   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2596   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2597   // thread_id);
2598   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2599                          getThreadID(CGF, Loc)};
2600   if (OMPRegionInfo) {
2601     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2602       llvm::Value *Result = CGF.EmitRuntimeCall(
2603           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2604                                                 OMPRTL___kmpc_cancel_barrier),
2605           Args);
2606       if (EmitChecks) {
2607         // if (__kmpc_cancel_barrier()) {
2608         //   exit from construct;
2609         // }
2610         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2611         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2612         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2613         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2614         CGF.EmitBlock(ExitBB);
2615         //   exit from construct;
2616         CodeGenFunction::JumpDest CancelDestination =
2617             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2618         CGF.EmitBranchThroughCleanup(CancelDestination);
2619         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2620       }
2621       return;
2622     }
2623   }
2624   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2625                           CGM.getModule(), OMPRTL___kmpc_barrier),
2626                       Args);
2627 }
2628 
2629 /// Map the OpenMP loop schedule to the runtime enumeration.
2630 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2631                                           bool Chunked, bool Ordered) {
2632   switch (ScheduleKind) {
2633   case OMPC_SCHEDULE_static:
2634     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2635                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2636   case OMPC_SCHEDULE_dynamic:
2637     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2638   case OMPC_SCHEDULE_guided:
2639     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2640   case OMPC_SCHEDULE_runtime:
2641     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2642   case OMPC_SCHEDULE_auto:
2643     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2644   case OMPC_SCHEDULE_unknown:
2645     assert(!Chunked && "chunk was specified but schedule kind not known");
2646     return Ordered ? OMP_ord_static : OMP_sch_static;
2647   }
2648   llvm_unreachable("Unexpected runtime schedule");
2649 }
2650 
2651 /// Map the OpenMP distribute schedule to the runtime enumeration.
2652 static OpenMPSchedType
2653 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2654   // only static is allowed for dist_schedule
2655   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2656 }
2657 
2658 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2659                                          bool Chunked) const {
2660   OpenMPSchedType Schedule =
2661       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2662   return Schedule == OMP_sch_static;
2663 }
2664 
2665 bool CGOpenMPRuntime::isStaticNonchunked(
2666     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2667   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2668   return Schedule == OMP_dist_sch_static;
2669 }
2670 
2671 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2672                                       bool Chunked) const {
2673   OpenMPSchedType Schedule =
2674       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2675   return Schedule == OMP_sch_static_chunked;
2676 }
2677 
2678 bool CGOpenMPRuntime::isStaticChunked(
2679     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2680   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2681   return Schedule == OMP_dist_sch_static_chunked;
2682 }
2683 
2684 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2685   OpenMPSchedType Schedule =
2686       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2687   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2688   return Schedule != OMP_sch_static;
2689 }
2690 
2691 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2692                                   OpenMPScheduleClauseModifier M1,
2693                                   OpenMPScheduleClauseModifier M2) {
2694   int Modifier = 0;
2695   switch (M1) {
2696   case OMPC_SCHEDULE_MODIFIER_monotonic:
2697     Modifier = OMP_sch_modifier_monotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2700     Modifier = OMP_sch_modifier_nonmonotonic;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_simd:
2703     if (Schedule == OMP_sch_static_chunked)
2704       Schedule = OMP_sch_static_balanced_chunked;
2705     break;
2706   case OMPC_SCHEDULE_MODIFIER_last:
2707   case OMPC_SCHEDULE_MODIFIER_unknown:
2708     break;
2709   }
2710   switch (M2) {
2711   case OMPC_SCHEDULE_MODIFIER_monotonic:
2712     Modifier = OMP_sch_modifier_monotonic;
2713     break;
2714   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2715     Modifier = OMP_sch_modifier_nonmonotonic;
2716     break;
2717   case OMPC_SCHEDULE_MODIFIER_simd:
2718     if (Schedule == OMP_sch_static_chunked)
2719       Schedule = OMP_sch_static_balanced_chunked;
2720     break;
2721   case OMPC_SCHEDULE_MODIFIER_last:
2722   case OMPC_SCHEDULE_MODIFIER_unknown:
2723     break;
2724   }
2725   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2726   // If the static schedule kind is specified or if the ordered clause is
2727   // specified, and if the nonmonotonic modifier is not specified, the effect is
2728   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2729   // modifier is specified, the effect is as if the nonmonotonic modifier is
2730   // specified.
2731   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2732     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2733           Schedule == OMP_sch_static_balanced_chunked ||
2734           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2735           Schedule == OMP_dist_sch_static_chunked ||
2736           Schedule == OMP_dist_sch_static))
2737       Modifier = OMP_sch_modifier_nonmonotonic;
2738   }
2739   return Schedule | Modifier;
2740 }
2741 
2742 void CGOpenMPRuntime::emitForDispatchInit(
2743     CodeGenFunction &CGF, SourceLocation Loc,
2744     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2745     bool Ordered, const DispatchRTInput &DispatchValues) {
2746   if (!CGF.HaveInsertPoint())
2747     return;
2748   OpenMPSchedType Schedule = getRuntimeSchedule(
2749       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2750   assert(Ordered ||
2751          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2752           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2753           Schedule != OMP_sch_static_balanced_chunked));
2754   // Call __kmpc_dispatch_init(
2755   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2756   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2757   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2758 
2759   // If the Chunk was not specified in the clause - use default value 1.
2760   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2761                                             : CGF.Builder.getIntN(IVSize, 1);
2762   llvm::Value *Args[] = {
2763       emitUpdateLocation(CGF, Loc),
2764       getThreadID(CGF, Loc),
2765       CGF.Builder.getInt32(addMonoNonMonoModifier(
2766           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2767       DispatchValues.LB,                                     // Lower
2768       DispatchValues.UB,                                     // Upper
2769       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2770       Chunk                                                  // Chunk
2771   };
2772   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2773 }
2774 
2775 static void emitForStaticInitCall(
2776     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2777     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2778     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2779     const CGOpenMPRuntime::StaticRTInput &Values) {
2780   if (!CGF.HaveInsertPoint())
2781     return;
2782 
2783   assert(!Values.Ordered);
2784   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2785          Schedule == OMP_sch_static_balanced_chunked ||
2786          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2787          Schedule == OMP_dist_sch_static ||
2788          Schedule == OMP_dist_sch_static_chunked);
2789 
2790   // Call __kmpc_for_static_init(
2791   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2792   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2793   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2794   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2795   llvm::Value *Chunk = Values.Chunk;
2796   if (Chunk == nullptr) {
2797     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2798             Schedule == OMP_dist_sch_static) &&
2799            "expected static non-chunked schedule");
2800     // If the Chunk was not specified in the clause - use default value 1.
2801     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2802   } else {
2803     assert((Schedule == OMP_sch_static_chunked ||
2804             Schedule == OMP_sch_static_balanced_chunked ||
2805             Schedule == OMP_ord_static_chunked ||
2806             Schedule == OMP_dist_sch_static_chunked) &&
2807            "expected static chunked schedule");
2808   }
2809   llvm::Value *Args[] = {
2810       UpdateLocation,
2811       ThreadId,
2812       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2813                                                   M2)), // Schedule type
2814       Values.IL.getPointer(),                           // &isLastIter
2815       Values.LB.getPointer(),                           // &LB
2816       Values.UB.getPointer(),                           // &UB
2817       Values.ST.getPointer(),                           // &Stride
2818       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2819       Chunk                                             // Chunk
2820   };
2821   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2822 }
2823 
2824 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2825                                         SourceLocation Loc,
2826                                         OpenMPDirectiveKind DKind,
2827                                         const OpenMPScheduleTy &ScheduleKind,
2828                                         const StaticRTInput &Values) {
2829   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2830       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2831   assert(isOpenMPWorksharingDirective(DKind) &&
2832          "Expected loop-based or sections-based directive.");
2833   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2834                                              isOpenMPLoopDirective(DKind)
2835                                                  ? OMP_IDENT_WORK_LOOP
2836                                                  : OMP_IDENT_WORK_SECTIONS);
2837   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2838   llvm::FunctionCallee StaticInitFunction =
2839       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2840   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2841   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2842                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2843 }
2844 
2845 void CGOpenMPRuntime::emitDistributeStaticInit(
2846     CodeGenFunction &CGF, SourceLocation Loc,
2847     OpenMPDistScheduleClauseKind SchedKind,
2848     const CGOpenMPRuntime::StaticRTInput &Values) {
2849   OpenMPSchedType ScheduleNum =
2850       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2851   llvm::Value *UpdatedLocation =
2852       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2853   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2854   llvm::FunctionCallee StaticInitFunction;
2855   bool isGPUDistribute =
2856       CGM.getLangOpts().OpenMPIsDevice &&
2857       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2858   StaticInitFunction = createForStaticInitFunction(
2859       Values.IVSize, Values.IVSigned, isGPUDistribute);
2860 
2861   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2862                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2863                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2864 }
2865 
2866 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2867                                           SourceLocation Loc,
2868                                           OpenMPDirectiveKind DKind) {
2869   if (!CGF.HaveInsertPoint())
2870     return;
2871   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2872   llvm::Value *Args[] = {
2873       emitUpdateLocation(CGF, Loc,
2874                          isOpenMPDistributeDirective(DKind)
2875                              ? OMP_IDENT_WORK_DISTRIBUTE
2876                              : isOpenMPLoopDirective(DKind)
2877                                    ? OMP_IDENT_WORK_LOOP
2878                                    : OMP_IDENT_WORK_SECTIONS),
2879       getThreadID(CGF, Loc)};
2880   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2881   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2882       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2883     CGF.EmitRuntimeCall(
2884         OMPBuilder.getOrCreateRuntimeFunction(
2885             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2886         Args);
2887   else
2888     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2889                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2890                         Args);
2891 }
2892 
2893 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2894                                                  SourceLocation Loc,
2895                                                  unsigned IVSize,
2896                                                  bool IVSigned) {
2897   if (!CGF.HaveInsertPoint())
2898     return;
2899   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2900   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2901   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2902 }
2903 
2904 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2905                                           SourceLocation Loc, unsigned IVSize,
2906                                           bool IVSigned, Address IL,
2907                                           Address LB, Address UB,
2908                                           Address ST) {
2909   // Call __kmpc_dispatch_next(
2910   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2911   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2912   //          kmp_int[32|64] *p_stride);
2913   llvm::Value *Args[] = {
2914       emitUpdateLocation(CGF, Loc),
2915       getThreadID(CGF, Loc),
2916       IL.getPointer(), // &isLastIter
2917       LB.getPointer(), // &Lower
2918       UB.getPointer(), // &Upper
2919       ST.getPointer()  // &Stride
2920   };
2921   llvm::Value *Call =
2922       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2923   return CGF.EmitScalarConversion(
2924       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2925       CGF.getContext().BoolTy, Loc);
2926 }
2927 
2928 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2929                                            llvm::Value *NumThreads,
2930                                            SourceLocation Loc) {
2931   if (!CGF.HaveInsertPoint())
2932     return;
2933   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2934   llvm::Value *Args[] = {
2935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2936       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2937   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2938                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2939                       Args);
2940 }
2941 
2942 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2943                                          ProcBindKind ProcBind,
2944                                          SourceLocation Loc) {
2945   if (!CGF.HaveInsertPoint())
2946     return;
2947   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2948   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2949   llvm::Value *Args[] = {
2950       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2951       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2952   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2953                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2954                       Args);
2955 }
2956 
2957 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2958                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2959   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2960     OMPBuilder.createFlush(CGF.Builder);
2961   } else {
2962     if (!CGF.HaveInsertPoint())
2963       return;
2964     // Build call void __kmpc_flush(ident_t *loc)
2965     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2966                             CGM.getModule(), OMPRTL___kmpc_flush),
2967                         emitUpdateLocation(CGF, Loc));
2968   }
2969 }
2970 
2971 namespace {
2972 /// Indexes of fields for type kmp_task_t.
2973 enum KmpTaskTFields {
2974   /// List of shared variables.
2975   KmpTaskTShareds,
2976   /// Task routine.
2977   KmpTaskTRoutine,
2978   /// Partition id for the untied tasks.
2979   KmpTaskTPartId,
2980   /// Function with call of destructors for private variables.
2981   Data1,
2982   /// Task priority.
2983   Data2,
2984   /// (Taskloops only) Lower bound.
2985   KmpTaskTLowerBound,
2986   /// (Taskloops only) Upper bound.
2987   KmpTaskTUpperBound,
2988   /// (Taskloops only) Stride.
2989   KmpTaskTStride,
2990   /// (Taskloops only) Is last iteration flag.
2991   KmpTaskTLastIter,
2992   /// (Taskloops only) Reduction data.
2993   KmpTaskTReductions,
2994 };
2995 } // anonymous namespace
2996 
2997 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2998   return OffloadEntriesTargetRegion.empty() &&
2999          OffloadEntriesDeviceGlobalVar.empty();
3000 }
3001 
3002 /// Initialize target region entry.
3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3004     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3005                                     StringRef ParentName, unsigned LineNum,
3006                                     unsigned Order) {
3007   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3008                                              "only required for the device "
3009                                              "code generation.");
3010   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3011       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3012                                    OMPTargetRegionEntryTargetRegion);
3013   ++OffloadingEntriesNum;
3014 }
3015 
3016 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3017     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3018                                   StringRef ParentName, unsigned LineNum,
3019                                   llvm::Constant *Addr, llvm::Constant *ID,
3020                                   OMPTargetRegionEntryKind Flags) {
3021   // If we are emitting code for a target, the entry is already initialized,
3022   // only has to be registered.
3023   if (CGM.getLangOpts().OpenMPIsDevice) {
3024     // This could happen if the device compilation is invoked standalone.
3025     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3026       return;
3027     auto &Entry =
3028         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3029     Entry.setAddress(Addr);
3030     Entry.setID(ID);
3031     Entry.setFlags(Flags);
3032   } else {
3033     if (Flags ==
3034             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3035         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3036                                  /*IgnoreAddressId*/ true))
3037       return;
3038     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3039            "Target region entry already registered!");
3040     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3041     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3042     ++OffloadingEntriesNum;
3043   }
3044 }
3045 
3046 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3047     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3048     bool IgnoreAddressId) const {
3049   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3050   if (PerDevice == OffloadEntriesTargetRegion.end())
3051     return false;
3052   auto PerFile = PerDevice->second.find(FileID);
3053   if (PerFile == PerDevice->second.end())
3054     return false;
3055   auto PerParentName = PerFile->second.find(ParentName);
3056   if (PerParentName == PerFile->second.end())
3057     return false;
3058   auto PerLine = PerParentName->second.find(LineNum);
3059   if (PerLine == PerParentName->second.end())
3060     return false;
3061   // Fail if this entry is already registered.
3062   if (!IgnoreAddressId &&
3063       (PerLine->second.getAddress() || PerLine->second.getID()))
3064     return false;
3065   return true;
3066 }
3067 
3068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3069     const OffloadTargetRegionEntryInfoActTy &Action) {
3070   // Scan all target region entries and perform the provided action.
3071   for (const auto &D : OffloadEntriesTargetRegion)
3072     for (const auto &F : D.second)
3073       for (const auto &P : F.second)
3074         for (const auto &L : P.second)
3075           Action(D.first, F.first, P.first(), L.first, L.second);
3076 }
3077 
3078 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3079     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3080                                        OMPTargetGlobalVarEntryKind Flags,
3081                                        unsigned Order) {
3082   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3083                                              "only required for the device "
3084                                              "code generation.");
3085   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3086   ++OffloadingEntriesNum;
3087 }
3088 
3089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3090     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3091                                      CharUnits VarSize,
3092                                      OMPTargetGlobalVarEntryKind Flags,
3093                                      llvm::GlobalValue::LinkageTypes Linkage) {
3094   if (CGM.getLangOpts().OpenMPIsDevice) {
3095     // This could happen if the device compilation is invoked standalone.
3096     if (!hasDeviceGlobalVarEntryInfo(VarName))
3097       return;
3098     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3099     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3100       if (Entry.getVarSize().isZero()) {
3101         Entry.setVarSize(VarSize);
3102         Entry.setLinkage(Linkage);
3103       }
3104       return;
3105     }
3106     Entry.setVarSize(VarSize);
3107     Entry.setLinkage(Linkage);
3108     Entry.setAddress(Addr);
3109   } else {
3110     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3111       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3112       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3113              "Entry not initialized!");
3114       if (Entry.getVarSize().isZero()) {
3115         Entry.setVarSize(VarSize);
3116         Entry.setLinkage(Linkage);
3117       }
3118       return;
3119     }
3120     OffloadEntriesDeviceGlobalVar.try_emplace(
3121         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3122     ++OffloadingEntriesNum;
3123   }
3124 }
3125 
3126 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3127     actOnDeviceGlobalVarEntriesInfo(
3128         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3129   // Scan all target region entries and perform the provided action.
3130   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3131     Action(E.getKey(), E.getValue());
3132 }
3133 
3134 void CGOpenMPRuntime::createOffloadEntry(
3135     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3136     llvm::GlobalValue::LinkageTypes Linkage) {
3137   StringRef Name = Addr->getName();
3138   llvm::Module &M = CGM.getModule();
3139   llvm::LLVMContext &C = M.getContext();
3140 
3141   // Create constant string with the name.
3142   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3143 
3144   std::string StringName = getName({"omp_offloading", "entry_name"});
3145   auto *Str = new llvm::GlobalVariable(
3146       M, StrPtrInit->getType(), /*isConstant=*/true,
3147       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3148   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3149 
3150   llvm::Constant *Data[] = {
3151       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3152       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3153       llvm::ConstantInt::get(CGM.SizeTy, Size),
3154       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3155       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3156   std::string EntryName = getName({"omp_offloading", "entry", ""});
3157   llvm::GlobalVariable *Entry = createGlobalStruct(
3158       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3159       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3160 
3161   // The entry has to be created in the section the linker expects it to be.
3162   Entry->setSection("omp_offloading_entries");
3163 }
3164 
3165 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3166   // Emit the offloading entries and metadata so that the device codegen side
3167   // can easily figure out what to emit. The produced metadata looks like
3168   // this:
3169   //
3170   // !omp_offload.info = !{!1, ...}
3171   //
3172   // Right now we only generate metadata for function that contain target
3173   // regions.
3174 
3175   // If we are in simd mode or there are no entries, we don't need to do
3176   // anything.
3177   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3178     return;
3179 
3180   llvm::Module &M = CGM.getModule();
3181   llvm::LLVMContext &C = M.getContext();
3182   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3183                          SourceLocation, StringRef>,
3184               16>
3185       OrderedEntries(OffloadEntriesInfoManager.size());
3186   llvm::SmallVector<StringRef, 16> ParentFunctions(
3187       OffloadEntriesInfoManager.size());
3188 
3189   // Auxiliary methods to create metadata values and strings.
3190   auto &&GetMDInt = [this](unsigned V) {
3191     return llvm::ConstantAsMetadata::get(
3192         llvm::ConstantInt::get(CGM.Int32Ty, V));
3193   };
3194 
3195   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3196 
3197   // Create the offloading info metadata node.
3198   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3199 
3200   // Create function that emits metadata for each target region entry;
3201   auto &&TargetRegionMetadataEmitter =
3202       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3203        &GetMDString](
3204           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3205           unsigned Line,
3206           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3207         // Generate metadata for target regions. Each entry of this metadata
3208         // contains:
3209         // - Entry 0 -> Kind of this type of metadata (0).
3210         // - Entry 1 -> Device ID of the file where the entry was identified.
3211         // - Entry 2 -> File ID of the file where the entry was identified.
3212         // - Entry 3 -> Mangled name of the function where the entry was
3213         // identified.
3214         // - Entry 4 -> Line in the file where the entry was identified.
3215         // - Entry 5 -> Order the entry was created.
3216         // The first element of the metadata node is the kind.
3217         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3218                                  GetMDInt(FileID),      GetMDString(ParentName),
3219                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3220 
3221         SourceLocation Loc;
3222         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3223                   E = CGM.getContext().getSourceManager().fileinfo_end();
3224              I != E; ++I) {
3225           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3226               I->getFirst()->getUniqueID().getFile() == FileID) {
3227             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3228                 I->getFirst(), Line, 1);
3229             break;
3230           }
3231         }
3232         // Save this entry in the right position of the ordered entries array.
3233         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3234         ParentFunctions[E.getOrder()] = ParentName;
3235 
3236         // Add metadata to the named metadata node.
3237         MD->addOperand(llvm::MDNode::get(C, Ops));
3238       };
3239 
3240   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3241       TargetRegionMetadataEmitter);
3242 
3243   // Create function that emits metadata for each device global variable entry;
3244   auto &&DeviceGlobalVarMetadataEmitter =
3245       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3246        MD](StringRef MangledName,
3247            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3248                &E) {
3249         // Generate metadata for global variables. Each entry of this metadata
3250         // contains:
3251         // - Entry 0 -> Kind of this type of metadata (1).
3252         // - Entry 1 -> Mangled name of the variable.
3253         // - Entry 2 -> Declare target kind.
3254         // - Entry 3 -> Order the entry was created.
3255         // The first element of the metadata node is the kind.
3256         llvm::Metadata *Ops[] = {
3257             GetMDInt(E.getKind()), GetMDString(MangledName),
3258             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3259 
3260         // Save this entry in the right position of the ordered entries array.
3261         OrderedEntries[E.getOrder()] =
3262             std::make_tuple(&E, SourceLocation(), MangledName);
3263 
3264         // Add metadata to the named metadata node.
3265         MD->addOperand(llvm::MDNode::get(C, Ops));
3266       };
3267 
3268   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3269       DeviceGlobalVarMetadataEmitter);
3270 
3271   for (const auto &E : OrderedEntries) {
3272     assert(std::get<0>(E) && "All ordered entries must exist!");
3273     if (const auto *CE =
3274             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3275                 std::get<0>(E))) {
3276       if (!CE->getID() || !CE->getAddress()) {
3277         // Do not blame the entry if the parent funtion is not emitted.
3278         StringRef FnName = ParentFunctions[CE->getOrder()];
3279         if (!CGM.GetGlobalValue(FnName))
3280           continue;
3281         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3282             DiagnosticsEngine::Error,
3283             "Offloading entry for target region in %0 is incorrect: either the "
3284             "address or the ID is invalid.");
3285         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3286         continue;
3287       }
3288       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3289                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3290     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3291                                              OffloadEntryInfoDeviceGlobalVar>(
3292                    std::get<0>(E))) {
3293       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3294           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3295               CE->getFlags());
3296       switch (Flags) {
3297       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3298         if (CGM.getLangOpts().OpenMPIsDevice &&
3299             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3300           continue;
3301         if (!CE->getAddress()) {
3302           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303               DiagnosticsEngine::Error, "Offloading entry for declare target "
3304                                         "variable %0 is incorrect: the "
3305                                         "address is invalid.");
3306           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3307           continue;
3308         }
3309         // The vaiable has no definition - no need to add the entry.
3310         if (CE->getVarSize().isZero())
3311           continue;
3312         break;
3313       }
3314       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3315         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3316                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3317                "Declaret target link address is set.");
3318         if (CGM.getLangOpts().OpenMPIsDevice)
3319           continue;
3320         if (!CE->getAddress()) {
3321           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3322               DiagnosticsEngine::Error,
3323               "Offloading entry for declare target variable is incorrect: the "
3324               "address is invalid.");
3325           CGM.getDiags().Report(DiagID);
3326           continue;
3327         }
3328         break;
3329       }
3330       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3331                          CE->getVarSize().getQuantity(), Flags,
3332                          CE->getLinkage());
3333     } else {
3334       llvm_unreachable("Unsupported entry kind.");
3335     }
3336   }
3337 }
3338 
3339 /// Loads all the offload entries information from the host IR
3340 /// metadata.
3341 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3342   // If we are in target mode, load the metadata from the host IR. This code has
3343   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3344 
3345   if (!CGM.getLangOpts().OpenMPIsDevice)
3346     return;
3347 
3348   if (CGM.getLangOpts().OMPHostIRFile.empty())
3349     return;
3350 
3351   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3352   if (auto EC = Buf.getError()) {
3353     CGM.getDiags().Report(diag::err_cannot_open_file)
3354         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3355     return;
3356   }
3357 
3358   llvm::LLVMContext C;
3359   auto ME = expectedToErrorOrAndEmitErrors(
3360       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3361 
3362   if (auto EC = ME.getError()) {
3363     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3364         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3365     CGM.getDiags().Report(DiagID)
3366         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3367     return;
3368   }
3369 
3370   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3371   if (!MD)
3372     return;
3373 
3374   for (llvm::MDNode *MN : MD->operands()) {
3375     auto &&GetMDInt = [MN](unsigned Idx) {
3376       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3377       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3378     };
3379 
3380     auto &&GetMDString = [MN](unsigned Idx) {
3381       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3382       return V->getString();
3383     };
3384 
3385     switch (GetMDInt(0)) {
3386     default:
3387       llvm_unreachable("Unexpected metadata!");
3388       break;
3389     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3390         OffloadingEntryInfoTargetRegion:
3391       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3392           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3393           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3394           /*Order=*/GetMDInt(5));
3395       break;
3396     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3397         OffloadingEntryInfoDeviceGlobalVar:
3398       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3399           /*MangledName=*/GetMDString(1),
3400           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3401               /*Flags=*/GetMDInt(2)),
3402           /*Order=*/GetMDInt(3));
3403       break;
3404     }
3405   }
3406 }
3407 
3408 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3409   if (!KmpRoutineEntryPtrTy) {
3410     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3411     ASTContext &C = CGM.getContext();
3412     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3413     FunctionProtoType::ExtProtoInfo EPI;
3414     KmpRoutineEntryPtrQTy = C.getPointerType(
3415         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3416     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3417   }
3418 }
3419 
3420 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3421   // Make sure the type of the entry is already created. This is the type we
3422   // have to create:
3423   // struct __tgt_offload_entry{
3424   //   void      *addr;       // Pointer to the offload entry info.
3425   //                          // (function or global)
3426   //   char      *name;       // Name of the function or global.
3427   //   size_t     size;       // Size of the entry info (0 if it a function).
3428   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3429   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3430   // };
3431   if (TgtOffloadEntryQTy.isNull()) {
3432     ASTContext &C = CGM.getContext();
3433     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3434     RD->startDefinition();
3435     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3436     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3437     addFieldToRecordDecl(C, RD, C.getSizeType());
3438     addFieldToRecordDecl(
3439         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3440     addFieldToRecordDecl(
3441         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3442     RD->completeDefinition();
3443     RD->addAttr(PackedAttr::CreateImplicit(C));
3444     TgtOffloadEntryQTy = C.getRecordType(RD);
3445   }
3446   return TgtOffloadEntryQTy;
3447 }
3448 
3449 namespace {
3450 struct PrivateHelpersTy {
3451   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3452                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3453       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3454         PrivateElemInit(PrivateElemInit) {}
3455   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3456   const Expr *OriginalRef = nullptr;
3457   const VarDecl *Original = nullptr;
3458   const VarDecl *PrivateCopy = nullptr;
3459   const VarDecl *PrivateElemInit = nullptr;
3460   bool isLocalPrivate() const {
3461     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3462   }
3463 };
3464 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3465 } // anonymous namespace
3466 
3467 static bool isAllocatableDecl(const VarDecl *VD) {
3468   const VarDecl *CVD = VD->getCanonicalDecl();
3469   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3470     return false;
3471   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3472   // Use the default allocation.
3473   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3474            !AA->getAllocator());
3475 }
3476 
3477 static RecordDecl *
3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3479   if (!Privates.empty()) {
3480     ASTContext &C = CGM.getContext();
3481     // Build struct .kmp_privates_t. {
3482     //         /*  private vars  */
3483     //       };
3484     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3485     RD->startDefinition();
3486     for (const auto &Pair : Privates) {
3487       const VarDecl *VD = Pair.second.Original;
3488       QualType Type = VD->getType().getNonReferenceType();
3489       // If the private variable is a local variable with lvalue ref type,
3490       // allocate the pointer instead of the pointee type.
3491       if (Pair.second.isLocalPrivate()) {
3492         if (VD->getType()->isLValueReferenceType())
3493           Type = C.getPointerType(Type);
3494         if (isAllocatableDecl(VD))
3495           Type = C.getPointerType(Type);
3496       }
3497       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3498       if (VD->hasAttrs()) {
3499         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3500              E(VD->getAttrs().end());
3501              I != E; ++I)
3502           FD->addAttr(*I);
3503       }
3504     }
3505     RD->completeDefinition();
3506     return RD;
3507   }
3508   return nullptr;
3509 }
3510 
3511 static RecordDecl *
3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3513                          QualType KmpInt32Ty,
3514                          QualType KmpRoutineEntryPointerQTy) {
3515   ASTContext &C = CGM.getContext();
3516   // Build struct kmp_task_t {
3517   //         void *              shareds;
3518   //         kmp_routine_entry_t routine;
3519   //         kmp_int32           part_id;
3520   //         kmp_cmplrdata_t data1;
3521   //         kmp_cmplrdata_t data2;
3522   // For taskloops additional fields:
3523   //         kmp_uint64          lb;
3524   //         kmp_uint64          ub;
3525   //         kmp_int64           st;
3526   //         kmp_int32           liter;
3527   //         void *              reductions;
3528   //       };
3529   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3530   UD->startDefinition();
3531   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3532   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3533   UD->completeDefinition();
3534   QualType KmpCmplrdataTy = C.getRecordType(UD);
3535   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3536   RD->startDefinition();
3537   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3538   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3539   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3540   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3541   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3542   if (isOpenMPTaskLoopDirective(Kind)) {
3543     QualType KmpUInt64Ty =
3544         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3545     QualType KmpInt64Ty =
3546         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3547     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3548     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3549     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3550     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3551     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3552   }
3553   RD->completeDefinition();
3554   return RD;
3555 }
3556 
3557 static RecordDecl *
3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3559                                      ArrayRef<PrivateDataTy> Privates) {
3560   ASTContext &C = CGM.getContext();
3561   // Build struct kmp_task_t_with_privates {
3562   //         kmp_task_t task_data;
3563   //         .kmp_privates_t. privates;
3564   //       };
3565   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3566   RD->startDefinition();
3567   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3568   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3569     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3570   RD->completeDefinition();
3571   return RD;
3572 }
3573 
3574 /// Emit a proxy function which accepts kmp_task_t as the second
3575 /// argument.
3576 /// \code
3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3578 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3579 ///   For taskloops:
3580 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581 ///   tt->reductions, tt->shareds);
3582 ///   return 0;
3583 /// }
3584 /// \endcode
3585 static llvm::Function *
3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3587                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3588                       QualType KmpTaskTWithPrivatesPtrQTy,
3589                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3590                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3591                       llvm::Value *TaskPrivatesMap) {
3592   ASTContext &C = CGM.getContext();
3593   FunctionArgList Args;
3594   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3595                             ImplicitParamDecl::Other);
3596   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3597                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3598                                 ImplicitParamDecl::Other);
3599   Args.push_back(&GtidArg);
3600   Args.push_back(&TaskTypeArg);
3601   const auto &TaskEntryFnInfo =
3602       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3603   llvm::FunctionType *TaskEntryTy =
3604       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3605   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3606   auto *TaskEntry = llvm::Function::Create(
3607       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3608   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3609   TaskEntry->setDoesNotRecurse();
3610   CodeGenFunction CGF(CGM);
3611   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3612                     Loc, Loc);
3613 
3614   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3615   // tt,
3616   // For taskloops:
3617   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3618   // tt->task_data.shareds);
3619   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3620       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3621   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3622       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3623       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3624   const auto *KmpTaskTWithPrivatesQTyRD =
3625       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3626   LValue Base =
3627       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3628   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3629   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3630   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3631   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3632 
3633   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3634   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3635   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3636       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3637       CGF.ConvertTypeForMem(SharedsPtrTy));
3638 
3639   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3640   llvm::Value *PrivatesParam;
3641   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3642     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3643     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3644         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3645   } else {
3646     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3647   }
3648 
3649   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3650                                TaskPrivatesMap,
3651                                CGF.Builder
3652                                    .CreatePointerBitCastOrAddrSpaceCast(
3653                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3654                                    .getPointer()};
3655   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3656                                           std::end(CommonArgs));
3657   if (isOpenMPTaskLoopDirective(Kind)) {
3658     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3659     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3660     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3661     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3662     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3663     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3664     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3665     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3666     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3667     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3668     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3669     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3670     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3671     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3672     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3673     CallArgs.push_back(LBParam);
3674     CallArgs.push_back(UBParam);
3675     CallArgs.push_back(StParam);
3676     CallArgs.push_back(LIParam);
3677     CallArgs.push_back(RParam);
3678   }
3679   CallArgs.push_back(SharedsParam);
3680 
3681   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3682                                                   CallArgs);
3683   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3684                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3685   CGF.FinishFunction();
3686   return TaskEntry;
3687 }
3688 
3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3690                                             SourceLocation Loc,
3691                                             QualType KmpInt32Ty,
3692                                             QualType KmpTaskTWithPrivatesPtrQTy,
3693                                             QualType KmpTaskTWithPrivatesQTy) {
3694   ASTContext &C = CGM.getContext();
3695   FunctionArgList Args;
3696   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3697                             ImplicitParamDecl::Other);
3698   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3699                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3700                                 ImplicitParamDecl::Other);
3701   Args.push_back(&GtidArg);
3702   Args.push_back(&TaskTypeArg);
3703   const auto &DestructorFnInfo =
3704       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3705   llvm::FunctionType *DestructorFnTy =
3706       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3707   std::string Name =
3708       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3709   auto *DestructorFn =
3710       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3711                              Name, &CGM.getModule());
3712   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3713                                     DestructorFnInfo);
3714   DestructorFn->setDoesNotRecurse();
3715   CodeGenFunction CGF(CGM);
3716   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3717                     Args, Loc, Loc);
3718 
3719   LValue Base = CGF.EmitLoadOfPointerLValue(
3720       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3721       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3722   const auto *KmpTaskTWithPrivatesQTyRD =
3723       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3724   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3725   Base = CGF.EmitLValueForField(Base, *FI);
3726   for (const auto *Field :
3727        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3728     if (QualType::DestructionKind DtorKind =
3729             Field->getType().isDestructedType()) {
3730       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3731       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3732     }
3733   }
3734   CGF.FinishFunction();
3735   return DestructorFn;
3736 }
3737 
3738 /// Emit a privates mapping function for correct handling of private and
3739 /// firstprivate variables.
3740 /// \code
3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3742 /// **noalias priv1,...,  <tyn> **noalias privn) {
3743 ///   *priv1 = &.privates.priv1;
3744 ///   ...;
3745 ///   *privn = &.privates.privn;
3746 /// }
3747 /// \endcode
3748 static llvm::Value *
3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3750                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3751                                ArrayRef<PrivateDataTy> Privates) {
3752   ASTContext &C = CGM.getContext();
3753   FunctionArgList Args;
3754   ImplicitParamDecl TaskPrivatesArg(
3755       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3756       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3757       ImplicitParamDecl::Other);
3758   Args.push_back(&TaskPrivatesArg);
3759   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3760   unsigned Counter = 1;
3761   for (const Expr *E : Data.PrivateVars) {
3762     Args.push_back(ImplicitParamDecl::Create(
3763         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3764         C.getPointerType(C.getPointerType(E->getType()))
3765             .withConst()
3766             .withRestrict(),
3767         ImplicitParamDecl::Other));
3768     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3769     PrivateVarsPos[VD] = Counter;
3770     ++Counter;
3771   }
3772   for (const Expr *E : Data.FirstprivateVars) {
3773     Args.push_back(ImplicitParamDecl::Create(
3774         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3775         C.getPointerType(C.getPointerType(E->getType()))
3776             .withConst()
3777             .withRestrict(),
3778         ImplicitParamDecl::Other));
3779     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3780     PrivateVarsPos[VD] = Counter;
3781     ++Counter;
3782   }
3783   for (const Expr *E : Data.LastprivateVars) {
3784     Args.push_back(ImplicitParamDecl::Create(
3785         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3786         C.getPointerType(C.getPointerType(E->getType()))
3787             .withConst()
3788             .withRestrict(),
3789         ImplicitParamDecl::Other));
3790     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3791     PrivateVarsPos[VD] = Counter;
3792     ++Counter;
3793   }
3794   for (const VarDecl *VD : Data.PrivateLocals) {
3795     QualType Ty = VD->getType().getNonReferenceType();
3796     if (VD->getType()->isLValueReferenceType())
3797       Ty = C.getPointerType(Ty);
3798     if (isAllocatableDecl(VD))
3799       Ty = C.getPointerType(Ty);
3800     Args.push_back(ImplicitParamDecl::Create(
3801         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3802         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3803         ImplicitParamDecl::Other));
3804     PrivateVarsPos[VD] = Counter;
3805     ++Counter;
3806   }
3807   const auto &TaskPrivatesMapFnInfo =
3808       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3809   llvm::FunctionType *TaskPrivatesMapTy =
3810       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3811   std::string Name =
3812       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3813   auto *TaskPrivatesMap = llvm::Function::Create(
3814       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3815       &CGM.getModule());
3816   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3817                                     TaskPrivatesMapFnInfo);
3818   if (CGM.getLangOpts().Optimize) {
3819     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3820     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3821     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3822   }
3823   CodeGenFunction CGF(CGM);
3824   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3825                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3826 
3827   // *privi = &.privates.privi;
3828   LValue Base = CGF.EmitLoadOfPointerLValue(
3829       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3830       TaskPrivatesArg.getType()->castAs<PointerType>());
3831   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3832   Counter = 0;
3833   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3834     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3835     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3836     LValue RefLVal =
3837         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3838     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3839         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3840     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3841     ++Counter;
3842   }
3843   CGF.FinishFunction();
3844   return TaskPrivatesMap;
3845 }
3846 
3847 /// Emit initialization for private variables in task-based directives.
3848 static void emitPrivatesInit(CodeGenFunction &CGF,
3849                              const OMPExecutableDirective &D,
3850                              Address KmpTaskSharedsPtr, LValue TDBase,
3851                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3852                              QualType SharedsTy, QualType SharedsPtrTy,
3853                              const OMPTaskDataTy &Data,
3854                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3855   ASTContext &C = CGF.getContext();
3856   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3857   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3858   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3859                                  ? OMPD_taskloop
3860                                  : OMPD_task;
3861   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3862   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3863   LValue SrcBase;
3864   bool IsTargetTask =
3865       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3866       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3867   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3868   // PointersArray, SizesArray, and MappersArray. The original variables for
3869   // these arrays are not captured and we get their addresses explicitly.
3870   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3871       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3872     SrcBase = CGF.MakeAddrLValue(
3873         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3874             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3875         SharedsTy);
3876   }
3877   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3878   for (const PrivateDataTy &Pair : Privates) {
3879     // Do not initialize private locals.
3880     if (Pair.second.isLocalPrivate()) {
3881       ++FI;
3882       continue;
3883     }
3884     const VarDecl *VD = Pair.second.PrivateCopy;
3885     const Expr *Init = VD->getAnyInitializer();
3886     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3887                              !CGF.isTrivialInitializer(Init)))) {
3888       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3889       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3890         const VarDecl *OriginalVD = Pair.second.Original;
3891         // Check if the variable is the target-based BasePointersArray,
3892         // PointersArray, SizesArray, or MappersArray.
3893         LValue SharedRefLValue;
3894         QualType Type = PrivateLValue.getType();
3895         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3896         if (IsTargetTask && !SharedField) {
3897           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3898                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3899                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3900                          ->getNumParams() == 0 &&
3901                  isa<TranslationUnitDecl>(
3902                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3903                          ->getDeclContext()) &&
3904                  "Expected artificial target data variable.");
3905           SharedRefLValue =
3906               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3907         } else if (ForDup) {
3908           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3909           SharedRefLValue = CGF.MakeAddrLValue(
3910               SharedRefLValue.getAddress(CGF).withAlignment(
3911                   C.getDeclAlign(OriginalVD)),
3912               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3913               SharedRefLValue.getTBAAInfo());
3914         } else if (CGF.LambdaCaptureFields.count(
3915                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3916                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3917           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3918         } else {
3919           // Processing for implicitly captured variables.
3920           InlinedOpenMPRegionRAII Region(
3921               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3922               /*HasCancel=*/false, /*NoInheritance=*/true);
3923           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3924         }
3925         if (Type->isArrayType()) {
3926           // Initialize firstprivate array.
3927           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3928             // Perform simple memcpy.
3929             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3930           } else {
3931             // Initialize firstprivate array using element-by-element
3932             // initialization.
3933             CGF.EmitOMPAggregateAssign(
3934                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3935                 Type,
3936                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3937                                                   Address SrcElement) {
3938                   // Clean up any temporaries needed by the initialization.
3939                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3940                   InitScope.addPrivate(
3941                       Elem, [SrcElement]() -> Address { return SrcElement; });
3942                   (void)InitScope.Privatize();
3943                   // Emit initialization for single element.
3944                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3945                       CGF, &CapturesInfo);
3946                   CGF.EmitAnyExprToMem(Init, DestElement,
3947                                        Init->getType().getQualifiers(),
3948                                        /*IsInitializer=*/false);
3949                 });
3950           }
3951         } else {
3952           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3953           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3954             return SharedRefLValue.getAddress(CGF);
3955           });
3956           (void)InitScope.Privatize();
3957           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3958           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3959                              /*capturedByInit=*/false);
3960         }
3961       } else {
3962         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3963       }
3964     }
3965     ++FI;
3966   }
3967 }
3968 
3969 /// Check if duplication function is required for taskloops.
3970 static bool checkInitIsRequired(CodeGenFunction &CGF,
3971                                 ArrayRef<PrivateDataTy> Privates) {
3972   bool InitRequired = false;
3973   for (const PrivateDataTy &Pair : Privates) {
3974     if (Pair.second.isLocalPrivate())
3975       continue;
3976     const VarDecl *VD = Pair.second.PrivateCopy;
3977     const Expr *Init = VD->getAnyInitializer();
3978     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3979                                     !CGF.isTrivialInitializer(Init));
3980     if (InitRequired)
3981       break;
3982   }
3983   return InitRequired;
3984 }
3985 
3986 
3987 /// Emit task_dup function (for initialization of
3988 /// private/firstprivate/lastprivate vars and last_iter flag)
3989 /// \code
3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3991 /// lastpriv) {
3992 /// // setup lastprivate flag
3993 ///    task_dst->last = lastpriv;
3994 /// // could be constructor calls here...
3995 /// }
3996 /// \endcode
3997 static llvm::Value *
3998 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3999                     const OMPExecutableDirective &D,
4000                     QualType KmpTaskTWithPrivatesPtrQTy,
4001                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4002                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4003                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4004                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4005   ASTContext &C = CGM.getContext();
4006   FunctionArgList Args;
4007   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4008                            KmpTaskTWithPrivatesPtrQTy,
4009                            ImplicitParamDecl::Other);
4010   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4011                            KmpTaskTWithPrivatesPtrQTy,
4012                            ImplicitParamDecl::Other);
4013   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4014                                 ImplicitParamDecl::Other);
4015   Args.push_back(&DstArg);
4016   Args.push_back(&SrcArg);
4017   Args.push_back(&LastprivArg);
4018   const auto &TaskDupFnInfo =
4019       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4020   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4021   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4022   auto *TaskDup = llvm::Function::Create(
4023       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4024   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4025   TaskDup->setDoesNotRecurse();
4026   CodeGenFunction CGF(CGM);
4027   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4028                     Loc);
4029 
4030   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4031       CGF.GetAddrOfLocalVar(&DstArg),
4032       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4033   // task_dst->liter = lastpriv;
4034   if (WithLastIter) {
4035     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4036     LValue Base = CGF.EmitLValueForField(
4037         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4038     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4039     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4040         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4041     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4042   }
4043 
4044   // Emit initial values for private copies (if any).
4045   assert(!Privates.empty());
4046   Address KmpTaskSharedsPtr = Address::invalid();
4047   if (!Data.FirstprivateVars.empty()) {
4048     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4049         CGF.GetAddrOfLocalVar(&SrcArg),
4050         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4051     LValue Base = CGF.EmitLValueForField(
4052         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4053     KmpTaskSharedsPtr = Address::deprecated(
4054         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4055                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4056                                                   KmpTaskTShareds)),
4057                              Loc),
4058         CGM.getNaturalTypeAlignment(SharedsTy));
4059   }
4060   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4061                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4062   CGF.FinishFunction();
4063   return TaskDup;
4064 }
4065 
4066 /// Checks if destructor function is required to be generated.
4067 /// \return true if cleanups are required, false otherwise.
4068 static bool
4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4070                          ArrayRef<PrivateDataTy> Privates) {
4071   for (const PrivateDataTy &P : Privates) {
4072     if (P.second.isLocalPrivate())
4073       continue;
4074     QualType Ty = P.second.Original->getType().getNonReferenceType();
4075     if (Ty.isDestructedType())
4076       return true;
4077   }
4078   return false;
4079 }
4080 
4081 namespace {
4082 /// Loop generator for OpenMP iterator expression.
4083 class OMPIteratorGeneratorScope final
4084     : public CodeGenFunction::OMPPrivateScope {
4085   CodeGenFunction &CGF;
4086   const OMPIteratorExpr *E = nullptr;
4087   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4088   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4089   OMPIteratorGeneratorScope() = delete;
4090   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4091 
4092 public:
4093   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4094       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4095     if (!E)
4096       return;
4097     SmallVector<llvm::Value *, 4> Uppers;
4098     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4099       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4100       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4101       addPrivate(VD, [&CGF, VD]() {
4102         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4103       });
4104       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4105       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4106         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4107                                  "counter.addr");
4108       });
4109     }
4110     Privatize();
4111 
4112     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4113       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4114       LValue CLVal =
4115           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4116                              HelperData.CounterVD->getType());
4117       // Counter = 0;
4118       CGF.EmitStoreOfScalar(
4119           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4120           CLVal);
4121       CodeGenFunction::JumpDest &ContDest =
4122           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4123       CodeGenFunction::JumpDest &ExitDest =
4124           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4125       // N = <number-of_iterations>;
4126       llvm::Value *N = Uppers[I];
4127       // cont:
4128       // if (Counter < N) goto body; else goto exit;
4129       CGF.EmitBlock(ContDest.getBlock());
4130       auto *CVal =
4131           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4132       llvm::Value *Cmp =
4133           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4134               ? CGF.Builder.CreateICmpSLT(CVal, N)
4135               : CGF.Builder.CreateICmpULT(CVal, N);
4136       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4137       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4138       // body:
4139       CGF.EmitBlock(BodyBB);
4140       // Iteri = Begini + Counter * Stepi;
4141       CGF.EmitIgnoredExpr(HelperData.Update);
4142     }
4143   }
4144   ~OMPIteratorGeneratorScope() {
4145     if (!E)
4146       return;
4147     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4148       // Counter = Counter + 1;
4149       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4150       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4151       // goto cont;
4152       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4153       // exit:
4154       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4155     }
4156   }
4157 };
4158 } // namespace
4159 
4160 static std::pair<llvm::Value *, llvm::Value *>
4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4162   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4163   llvm::Value *Addr;
4164   if (OASE) {
4165     const Expr *Base = OASE->getBase();
4166     Addr = CGF.EmitScalarExpr(Base);
4167   } else {
4168     Addr = CGF.EmitLValue(E).getPointer(CGF);
4169   }
4170   llvm::Value *SizeVal;
4171   QualType Ty = E->getType();
4172   if (OASE) {
4173     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4174     for (const Expr *SE : OASE->getDimensions()) {
4175       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4176       Sz = CGF.EmitScalarConversion(
4177           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4178       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4179     }
4180   } else if (const auto *ASE =
4181                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4182     LValue UpAddrLVal =
4183         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4184     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4185     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4186         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4187     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4188     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4189     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4190   } else {
4191     SizeVal = CGF.getTypeSize(Ty);
4192   }
4193   return std::make_pair(Addr, SizeVal);
4194 }
4195 
4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4198   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4199   if (KmpTaskAffinityInfoTy.isNull()) {
4200     RecordDecl *KmpAffinityInfoRD =
4201         C.buildImplicitRecord("kmp_task_affinity_info_t");
4202     KmpAffinityInfoRD->startDefinition();
4203     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4204     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4206     KmpAffinityInfoRD->completeDefinition();
4207     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4208   }
4209 }
4210 
4211 CGOpenMPRuntime::TaskResultTy
4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4213                               const OMPExecutableDirective &D,
4214                               llvm::Function *TaskFunction, QualType SharedsTy,
4215                               Address Shareds, const OMPTaskDataTy &Data) {
4216   ASTContext &C = CGM.getContext();
4217   llvm::SmallVector<PrivateDataTy, 4> Privates;
4218   // Aggregate privates and sort them by the alignment.
4219   const auto *I = Data.PrivateCopies.begin();
4220   for (const Expr *E : Data.PrivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   I = Data.FirstprivateCopies.begin();
4229   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4230   for (const Expr *E : Data.FirstprivateVars) {
4231     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4232     Privates.emplace_back(
4233         C.getDeclAlign(VD),
4234         PrivateHelpersTy(
4235             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4236             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4237     ++I;
4238     ++IElemInitRef;
4239   }
4240   I = Data.LastprivateCopies.begin();
4241   for (const Expr *E : Data.LastprivateVars) {
4242     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4243     Privates.emplace_back(
4244         C.getDeclAlign(VD),
4245         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4246                          /*PrivateElemInit=*/nullptr));
4247     ++I;
4248   }
4249   for (const VarDecl *VD : Data.PrivateLocals) {
4250     if (isAllocatableDecl(VD))
4251       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4252     else
4253       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4254   }
4255   llvm::stable_sort(Privates,
4256                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4257                       return L.first > R.first;
4258                     });
4259   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4260   // Build type kmp_routine_entry_t (if not built yet).
4261   emitKmpRoutineEntryT(KmpInt32Ty);
4262   // Build type kmp_task_t (if not built yet).
4263   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4264     if (SavedKmpTaskloopTQTy.isNull()) {
4265       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4266           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4267     }
4268     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4269   } else {
4270     assert((D.getDirectiveKind() == OMPD_task ||
4271             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4272             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4273            "Expected taskloop, task or target directive");
4274     if (SavedKmpTaskTQTy.isNull()) {
4275       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4276           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4277     }
4278     KmpTaskTQTy = SavedKmpTaskTQTy;
4279   }
4280   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4281   // Build particular struct kmp_task_t for the given task.
4282   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4283       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4284   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4285   QualType KmpTaskTWithPrivatesPtrQTy =
4286       C.getPointerType(KmpTaskTWithPrivatesQTy);
4287   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4288   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4289       KmpTaskTWithPrivatesTy->getPointerTo();
4290   llvm::Value *KmpTaskTWithPrivatesTySize =
4291       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4292   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4293 
4294   // Emit initial values for private copies (if any).
4295   llvm::Value *TaskPrivatesMap = nullptr;
4296   llvm::Type *TaskPrivatesMapTy =
4297       std::next(TaskFunction->arg_begin(), 3)->getType();
4298   if (!Privates.empty()) {
4299     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4300     TaskPrivatesMap =
4301         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4302     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4303         TaskPrivatesMap, TaskPrivatesMapTy);
4304   } else {
4305     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4306         cast<llvm::PointerType>(TaskPrivatesMapTy));
4307   }
4308   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4309   // kmp_task_t *tt);
4310   llvm::Function *TaskEntry = emitProxyTaskFunction(
4311       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4312       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4313       TaskPrivatesMap);
4314 
4315   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4316   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4317   // kmp_routine_entry_t *task_entry);
4318   // Task flags. Format is taken from
4319   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4320   // description of kmp_tasking_flags struct.
4321   enum {
4322     TiedFlag = 0x1,
4323     FinalFlag = 0x2,
4324     DestructorsFlag = 0x8,
4325     PriorityFlag = 0x20,
4326     DetachableFlag = 0x40,
4327   };
4328   unsigned Flags = Data.Tied ? TiedFlag : 0;
4329   bool NeedsCleanup = false;
4330   if (!Privates.empty()) {
4331     NeedsCleanup =
4332         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4333     if (NeedsCleanup)
4334       Flags = Flags | DestructorsFlag;
4335   }
4336   if (Data.Priority.getInt())
4337     Flags = Flags | PriorityFlag;
4338   if (D.hasClausesOfKind<OMPDetachClause>())
4339     Flags = Flags | DetachableFlag;
4340   llvm::Value *TaskFlags =
4341       Data.Final.getPointer()
4342           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4343                                      CGF.Builder.getInt32(FinalFlag),
4344                                      CGF.Builder.getInt32(/*C=*/0))
4345           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4346   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4347   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4348   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4349       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4350       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4351           TaskEntry, KmpRoutineEntryPtrTy)};
4352   llvm::Value *NewTask;
4353   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4354     // Check if we have any device clause associated with the directive.
4355     const Expr *Device = nullptr;
4356     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4357       Device = C->getDevice();
4358     // Emit device ID if any otherwise use default value.
4359     llvm::Value *DeviceID;
4360     if (Device)
4361       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4362                                            CGF.Int64Ty, /*isSigned=*/true);
4363     else
4364       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4365     AllocArgs.push_back(DeviceID);
4366     NewTask = CGF.EmitRuntimeCall(
4367         OMPBuilder.getOrCreateRuntimeFunction(
4368             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4369         AllocArgs);
4370   } else {
4371     NewTask =
4372         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4373                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4374                             AllocArgs);
4375   }
4376   // Emit detach clause initialization.
4377   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4378   // task_descriptor);
4379   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4380     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4381     LValue EvtLVal = CGF.EmitLValue(Evt);
4382 
4383     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4384     // int gtid, kmp_task_t *task);
4385     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4386     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4387     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4388     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4389         OMPBuilder.getOrCreateRuntimeFunction(
4390             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4391         {Loc, Tid, NewTask});
4392     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4393                                       Evt->getExprLoc());
4394     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4395   }
4396   // Process affinity clauses.
4397   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4398     // Process list of affinity data.
4399     ASTContext &C = CGM.getContext();
4400     Address AffinitiesArray = Address::invalid();
4401     // Calculate number of elements to form the array of affinity data.
4402     llvm::Value *NumOfElements = nullptr;
4403     unsigned NumAffinities = 0;
4404     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4405       if (const Expr *Modifier = C->getModifier()) {
4406         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4407         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4408           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4409           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4410           NumOfElements =
4411               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4412         }
4413       } else {
4414         NumAffinities += C->varlist_size();
4415       }
4416     }
4417     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4418     // Fields ids in kmp_task_affinity_info record.
4419     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4420 
4421     QualType KmpTaskAffinityInfoArrayTy;
4422     if (NumOfElements) {
4423       NumOfElements = CGF.Builder.CreateNUWAdd(
4424           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4425       auto *OVE = new (C) OpaqueValueExpr(
4426           Loc,
4427           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4428           VK_PRValue);
4429       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4430                                                     RValue::get(NumOfElements));
4431       KmpTaskAffinityInfoArrayTy =
4432           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4433                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4434       // Properly emit variable-sized array.
4435       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4436                                            ImplicitParamDecl::Other);
4437       CGF.EmitVarDecl(*PD);
4438       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4439       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4440                                                 /*isSigned=*/false);
4441     } else {
4442       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4443           KmpTaskAffinityInfoTy,
4444           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4445           ArrayType::Normal, /*IndexTypeQuals=*/0);
4446       AffinitiesArray =
4447           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4448       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4449       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4450                                              /*isSigned=*/false);
4451     }
4452 
4453     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4454     // Fill array by elements without iterators.
4455     unsigned Pos = 0;
4456     bool HasIterator = false;
4457     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4458       if (C->getModifier()) {
4459         HasIterator = true;
4460         continue;
4461       }
4462       for (const Expr *E : C->varlists()) {
4463         llvm::Value *Addr;
4464         llvm::Value *Size;
4465         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4466         LValue Base =
4467             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4468                                KmpTaskAffinityInfoTy);
4469         // affs[i].base_addr = &<Affinities[i].second>;
4470         LValue BaseAddrLVal = CGF.EmitLValueForField(
4471             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4472         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4473                               BaseAddrLVal);
4474         // affs[i].len = sizeof(<Affinities[i].second>);
4475         LValue LenLVal = CGF.EmitLValueForField(
4476             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4477         CGF.EmitStoreOfScalar(Size, LenLVal);
4478         ++Pos;
4479       }
4480     }
4481     LValue PosLVal;
4482     if (HasIterator) {
4483       PosLVal = CGF.MakeAddrLValue(
4484           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4485           C.getSizeType());
4486       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4487     }
4488     // Process elements with iterators.
4489     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4490       const Expr *Modifier = C->getModifier();
4491       if (!Modifier)
4492         continue;
4493       OMPIteratorGeneratorScope IteratorScope(
4494           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4495       for (const Expr *E : C->varlists()) {
4496         llvm::Value *Addr;
4497         llvm::Value *Size;
4498         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4499         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4500         LValue Base = CGF.MakeAddrLValue(
4501             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4502         // affs[i].base_addr = &<Affinities[i].second>;
4503         LValue BaseAddrLVal = CGF.EmitLValueForField(
4504             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4505         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4506                               BaseAddrLVal);
4507         // affs[i].len = sizeof(<Affinities[i].second>);
4508         LValue LenLVal = CGF.EmitLValueForField(
4509             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4510         CGF.EmitStoreOfScalar(Size, LenLVal);
4511         Idx = CGF.Builder.CreateNUWAdd(
4512             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4513         CGF.EmitStoreOfScalar(Idx, PosLVal);
4514       }
4515     }
4516     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4517     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4518     // naffins, kmp_task_affinity_info_t *affin_list);
4519     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4520     llvm::Value *GTid = getThreadID(CGF, Loc);
4521     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4522         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4523     // FIXME: Emit the function and ignore its result for now unless the
4524     // runtime function is properly implemented.
4525     (void)CGF.EmitRuntimeCall(
4526         OMPBuilder.getOrCreateRuntimeFunction(
4527             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4528         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4529   }
4530   llvm::Value *NewTaskNewTaskTTy =
4531       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4532           NewTask, KmpTaskTWithPrivatesPtrTy);
4533   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4534                                                KmpTaskTWithPrivatesQTy);
4535   LValue TDBase =
4536       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4537   // Fill the data in the resulting kmp_task_t record.
4538   // Copy shareds if there are any.
4539   Address KmpTaskSharedsPtr = Address::invalid();
4540   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4541     KmpTaskSharedsPtr = Address::deprecated(
4542         CGF.EmitLoadOfScalar(
4543             CGF.EmitLValueForField(
4544                 TDBase,
4545                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4546             Loc),
4547         CGM.getNaturalTypeAlignment(SharedsTy));
4548     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4549     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4550     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4551   }
4552   // Emit initial values for private copies (if any).
4553   TaskResultTy Result;
4554   if (!Privates.empty()) {
4555     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4556                      SharedsTy, SharedsPtrTy, Data, Privates,
4557                      /*ForDup=*/false);
4558     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4559         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4560       Result.TaskDupFn = emitTaskDupFunction(
4561           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4562           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4563           /*WithLastIter=*/!Data.LastprivateVars.empty());
4564     }
4565   }
4566   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4567   enum { Priority = 0, Destructors = 1 };
4568   // Provide pointer to function with destructors for privates.
4569   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4570   const RecordDecl *KmpCmplrdataUD =
4571       (*FI)->getType()->getAsUnionType()->getDecl();
4572   if (NeedsCleanup) {
4573     llvm::Value *DestructorFn = emitDestructorsFunction(
4574         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4575         KmpTaskTWithPrivatesQTy);
4576     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4577     LValue DestructorsLV = CGF.EmitLValueForField(
4578         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4579     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4580                               DestructorFn, KmpRoutineEntryPtrTy),
4581                           DestructorsLV);
4582   }
4583   // Set priority.
4584   if (Data.Priority.getInt()) {
4585     LValue Data2LV = CGF.EmitLValueForField(
4586         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4587     LValue PriorityLV = CGF.EmitLValueForField(
4588         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4589     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4590   }
4591   Result.NewTask = NewTask;
4592   Result.TaskEntry = TaskEntry;
4593   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4594   Result.TDBase = TDBase;
4595   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4596   return Result;
4597 }
4598 
4599 namespace {
4600 /// Dependence kind for RTL.
4601 enum RTLDependenceKindTy {
4602   DepIn = 0x01,
4603   DepInOut = 0x3,
4604   DepMutexInOutSet = 0x4,
4605   DepInOutSet = 0x8
4606 };
4607 /// Fields ids in kmp_depend_info record.
4608 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4609 } // namespace
4610 
4611 /// Translates internal dependency kind into the runtime kind.
4612 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4613   RTLDependenceKindTy DepKind;
4614   switch (K) {
4615   case OMPC_DEPEND_in:
4616     DepKind = DepIn;
4617     break;
4618   // Out and InOut dependencies must use the same code.
4619   case OMPC_DEPEND_out:
4620   case OMPC_DEPEND_inout:
4621     DepKind = DepInOut;
4622     break;
4623   case OMPC_DEPEND_mutexinoutset:
4624     DepKind = DepMutexInOutSet;
4625     break;
4626   case OMPC_DEPEND_inoutset:
4627     DepKind = DepInOutSet;
4628     break;
4629   case OMPC_DEPEND_source:
4630   case OMPC_DEPEND_sink:
4631   case OMPC_DEPEND_depobj:
4632   case OMPC_DEPEND_unknown:
4633     llvm_unreachable("Unknown task dependence type");
4634   }
4635   return DepKind;
4636 }
4637 
4638 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4639 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4640                            QualType &FlagsTy) {
4641   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4642   if (KmpDependInfoTy.isNull()) {
4643     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4644     KmpDependInfoRD->startDefinition();
4645     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4646     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4647     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4648     KmpDependInfoRD->completeDefinition();
4649     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4650   }
4651 }
4652 
4653 std::pair<llvm::Value *, LValue>
4654 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4655                                    SourceLocation Loc) {
4656   ASTContext &C = CGM.getContext();
4657   QualType FlagsTy;
4658   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4659   RecordDecl *KmpDependInfoRD =
4660       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4661   LValue Base = CGF.EmitLoadOfPointerLValue(
4662       DepobjLVal.getAddress(CGF),
4663       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4664   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4665   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4666           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4667   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4668                             Base.getTBAAInfo());
4669   Address DepObjAddr = CGF.Builder.CreateGEP(
4670       Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4671   LValue NumDepsBase = CGF.MakeAddrLValue(
4672       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4673   // NumDeps = deps[i].base_addr;
4674   LValue BaseAddrLVal = CGF.EmitLValueForField(
4675       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4676   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4677   return std::make_pair(NumDeps, Base);
4678 }
4679 
4680 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4681                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4682                            const OMPTaskDataTy::DependData &Data,
4683                            Address DependenciesArray) {
4684   CodeGenModule &CGM = CGF.CGM;
4685   ASTContext &C = CGM.getContext();
4686   QualType FlagsTy;
4687   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4688   RecordDecl *KmpDependInfoRD =
4689       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4690   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4691 
4692   OMPIteratorGeneratorScope IteratorScope(
4693       CGF, cast_or_null<OMPIteratorExpr>(
4694                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4695                                  : nullptr));
4696   for (const Expr *E : Data.DepExprs) {
4697     llvm::Value *Addr;
4698     llvm::Value *Size;
4699     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4700     LValue Base;
4701     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4702       Base = CGF.MakeAddrLValue(
4703           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4704     } else {
4705       LValue &PosLVal = *Pos.get<LValue *>();
4706       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4707       Base = CGF.MakeAddrLValue(
4708           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4709     }
4710     // deps[i].base_addr = &<Dependencies[i].second>;
4711     LValue BaseAddrLVal = CGF.EmitLValueForField(
4712         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4713     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4714                           BaseAddrLVal);
4715     // deps[i].len = sizeof(<Dependencies[i].second>);
4716     LValue LenLVal = CGF.EmitLValueForField(
4717         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4718     CGF.EmitStoreOfScalar(Size, LenLVal);
4719     // deps[i].flags = <Dependencies[i].first>;
4720     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4721     LValue FlagsLVal = CGF.EmitLValueForField(
4722         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4723     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4724                           FlagsLVal);
4725     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4726       ++(*P);
4727     } else {
4728       LValue &PosLVal = *Pos.get<LValue *>();
4729       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4730       Idx = CGF.Builder.CreateNUWAdd(Idx,
4731                                      llvm::ConstantInt::get(Idx->getType(), 1));
4732       CGF.EmitStoreOfScalar(Idx, PosLVal);
4733     }
4734   }
4735 }
4736 
4737 static SmallVector<llvm::Value *, 4>
4738 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4739                         const OMPTaskDataTy::DependData &Data) {
4740   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4741          "Expected depobj dependecy kind.");
4742   SmallVector<llvm::Value *, 4> Sizes;
4743   SmallVector<LValue, 4> SizeLVals;
4744   ASTContext &C = CGF.getContext();
4745   QualType FlagsTy;
4746   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4747   RecordDecl *KmpDependInfoRD =
4748       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4749   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4750   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4751   {
4752     OMPIteratorGeneratorScope IteratorScope(
4753         CGF, cast_or_null<OMPIteratorExpr>(
4754                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4755                                    : nullptr));
4756     for (const Expr *E : Data.DepExprs) {
4757       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4758       LValue Base = CGF.EmitLoadOfPointerLValue(
4759           DepobjLVal.getAddress(CGF),
4760           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4761       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4762           Base.getAddress(CGF), KmpDependInfoPtrT);
4763       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4764                                 Base.getTBAAInfo());
4765       Address DepObjAddr = CGF.Builder.CreateGEP(
4766           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4767       LValue NumDepsBase = CGF.MakeAddrLValue(
4768           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4769       // NumDeps = deps[i].base_addr;
4770       LValue BaseAddrLVal = CGF.EmitLValueForField(
4771           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4772       llvm::Value *NumDeps =
4773           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4774       LValue NumLVal = CGF.MakeAddrLValue(
4775           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4776           C.getUIntPtrType());
4777       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4778                               NumLVal.getAddress(CGF));
4779       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4780       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4781       CGF.EmitStoreOfScalar(Add, NumLVal);
4782       SizeLVals.push_back(NumLVal);
4783     }
4784   }
4785   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4786     llvm::Value *Size =
4787         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4788     Sizes.push_back(Size);
4789   }
4790   return Sizes;
4791 }
4792 
4793 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4794                                LValue PosLVal,
4795                                const OMPTaskDataTy::DependData &Data,
4796                                Address DependenciesArray) {
4797   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4798          "Expected depobj dependecy kind.");
4799   ASTContext &C = CGF.getContext();
4800   QualType FlagsTy;
4801   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4802   RecordDecl *KmpDependInfoRD =
4803       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4804   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4805   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4806   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4807   {
4808     OMPIteratorGeneratorScope IteratorScope(
4809         CGF, cast_or_null<OMPIteratorExpr>(
4810                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4811                                    : nullptr));
4812     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4813       const Expr *E = Data.DepExprs[I];
4814       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4815       LValue Base = CGF.EmitLoadOfPointerLValue(
4816           DepobjLVal.getAddress(CGF),
4817           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4818       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4819           Base.getAddress(CGF), KmpDependInfoPtrT);
4820       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4821                                 Base.getTBAAInfo());
4822 
4823       // Get number of elements in a single depobj.
4824       Address DepObjAddr = CGF.Builder.CreateGEP(
4825           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4826       LValue NumDepsBase = CGF.MakeAddrLValue(
4827           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4828       // NumDeps = deps[i].base_addr;
4829       LValue BaseAddrLVal = CGF.EmitLValueForField(
4830           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4831       llvm::Value *NumDeps =
4832           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4833 
4834       // memcopy dependency data.
4835       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4836           ElSize,
4837           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4838       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4839       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4840       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4841 
4842       // Increase pos.
4843       // pos += size;
4844       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4845       CGF.EmitStoreOfScalar(Add, PosLVal);
4846     }
4847   }
4848 }
4849 
4850 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4851     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4852     SourceLocation Loc) {
4853   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4854         return D.DepExprs.empty();
4855       }))
4856     return std::make_pair(nullptr, Address::invalid());
4857   // Process list of dependencies.
4858   ASTContext &C = CGM.getContext();
4859   Address DependenciesArray = Address::invalid();
4860   llvm::Value *NumOfElements = nullptr;
4861   unsigned NumDependencies = std::accumulate(
4862       Dependencies.begin(), Dependencies.end(), 0,
4863       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4864         return D.DepKind == OMPC_DEPEND_depobj
4865                    ? V
4866                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4867       });
4868   QualType FlagsTy;
4869   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4870   bool HasDepobjDeps = false;
4871   bool HasRegularWithIterators = false;
4872   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4873   llvm::Value *NumOfRegularWithIterators =
4874       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4875   // Calculate number of depobj dependecies and regular deps with the iterators.
4876   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4877     if (D.DepKind == OMPC_DEPEND_depobj) {
4878       SmallVector<llvm::Value *, 4> Sizes =
4879           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4880       for (llvm::Value *Size : Sizes) {
4881         NumOfDepobjElements =
4882             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4883       }
4884       HasDepobjDeps = true;
4885       continue;
4886     }
4887     // Include number of iterations, if any.
4888 
4889     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4890       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4891         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4892         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4893         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4894             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4895         NumOfRegularWithIterators =
4896             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4897       }
4898       HasRegularWithIterators = true;
4899       continue;
4900     }
4901   }
4902 
4903   QualType KmpDependInfoArrayTy;
4904   if (HasDepobjDeps || HasRegularWithIterators) {
4905     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4906                                            /*isSigned=*/false);
4907     if (HasDepobjDeps) {
4908       NumOfElements =
4909           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4910     }
4911     if (HasRegularWithIterators) {
4912       NumOfElements =
4913           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4914     }
4915     auto *OVE = new (C) OpaqueValueExpr(
4916         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4917         VK_PRValue);
4918     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4919                                                   RValue::get(NumOfElements));
4920     KmpDependInfoArrayTy =
4921         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4922                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4923     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4924     // Properly emit variable-sized array.
4925     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4926                                          ImplicitParamDecl::Other);
4927     CGF.EmitVarDecl(*PD);
4928     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4929     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4930                                               /*isSigned=*/false);
4931   } else {
4932     KmpDependInfoArrayTy = C.getConstantArrayType(
4933         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4934         ArrayType::Normal, /*IndexTypeQuals=*/0);
4935     DependenciesArray =
4936         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4937     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4938     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4939                                            /*isSigned=*/false);
4940   }
4941   unsigned Pos = 0;
4942   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4943     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4944         Dependencies[I].IteratorExpr)
4945       continue;
4946     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4947                    DependenciesArray);
4948   }
4949   // Copy regular dependecies with iterators.
4950   LValue PosLVal = CGF.MakeAddrLValue(
4951       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4952   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4953   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4954     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4955         !Dependencies[I].IteratorExpr)
4956       continue;
4957     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4958                    DependenciesArray);
4959   }
4960   // Copy final depobj arrays without iterators.
4961   if (HasDepobjDeps) {
4962     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4963       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4964         continue;
4965       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4966                          DependenciesArray);
4967     }
4968   }
4969   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4970       DependenciesArray, CGF.VoidPtrTy);
4971   return std::make_pair(NumOfElements, DependenciesArray);
4972 }
4973 
4974 Address CGOpenMPRuntime::emitDepobjDependClause(
4975     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4976     SourceLocation Loc) {
4977   if (Dependencies.DepExprs.empty())
4978     return Address::invalid();
4979   // Process list of dependencies.
4980   ASTContext &C = CGM.getContext();
4981   Address DependenciesArray = Address::invalid();
4982   unsigned NumDependencies = Dependencies.DepExprs.size();
4983   QualType FlagsTy;
4984   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4985   RecordDecl *KmpDependInfoRD =
4986       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4987 
4988   llvm::Value *Size;
4989   // Define type kmp_depend_info[<Dependencies.size()>];
4990   // For depobj reserve one extra element to store the number of elements.
4991   // It is required to handle depobj(x) update(in) construct.
4992   // kmp_depend_info[<Dependencies.size()>] deps;
4993   llvm::Value *NumDepsVal;
4994   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4995   if (const auto *IE =
4996           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4997     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4998     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4999       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5000       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5001       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5002     }
5003     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5004                                     NumDepsVal);
5005     CharUnits SizeInBytes =
5006         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5007     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5008     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5009     NumDepsVal =
5010         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5011   } else {
5012     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5013         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5014         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5015     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5016     Size = CGM.getSize(Sz.alignTo(Align));
5017     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5018   }
5019   // Need to allocate on the dynamic memory.
5020   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5021   // Use default allocator.
5022   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5023   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5024 
5025   llvm::Value *Addr =
5026       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5027                               CGM.getModule(), OMPRTL___kmpc_alloc),
5028                           Args, ".dep.arr.addr");
5029   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5030       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5031   DependenciesArray = Address::deprecated(Addr, Align);
5032   // Write number of elements in the first element of array for depobj.
5033   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5034   // deps[i].base_addr = NumDependencies;
5035   LValue BaseAddrLVal = CGF.EmitLValueForField(
5036       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5037   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5038   llvm::PointerUnion<unsigned *, LValue *> Pos;
5039   unsigned Idx = 1;
5040   LValue PosLVal;
5041   if (Dependencies.IteratorExpr) {
5042     PosLVal = CGF.MakeAddrLValue(
5043         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5044         C.getSizeType());
5045     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5046                           /*IsInit=*/true);
5047     Pos = &PosLVal;
5048   } else {
5049     Pos = &Idx;
5050   }
5051   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5052   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5053       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5054   return DependenciesArray;
5055 }
5056 
5057 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5058                                         SourceLocation Loc) {
5059   ASTContext &C = CGM.getContext();
5060   QualType FlagsTy;
5061   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5062   LValue Base = CGF.EmitLoadOfPointerLValue(
5063       DepobjLVal.getAddress(CGF),
5064       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5065   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5066   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5067       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5068   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5069       Addr.getElementType(), Addr.getPointer(),
5070       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5071   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5072                                                                CGF.VoidPtrTy);
5073   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5074   // Use default allocator.
5075   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5076   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5077 
5078   // _kmpc_free(gtid, addr, nullptr);
5079   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5080                                 CGM.getModule(), OMPRTL___kmpc_free),
5081                             Args);
5082 }
5083 
5084 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5085                                        OpenMPDependClauseKind NewDepKind,
5086                                        SourceLocation Loc) {
5087   ASTContext &C = CGM.getContext();
5088   QualType FlagsTy;
5089   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5090   RecordDecl *KmpDependInfoRD =
5091       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5092   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5093   llvm::Value *NumDeps;
5094   LValue Base;
5095   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5096 
5097   Address Begin = Base.getAddress(CGF);
5098   // Cast from pointer to array type to pointer to single element.
5099   llvm::Value *End = CGF.Builder.CreateGEP(
5100       Begin.getElementType(), Begin.getPointer(), NumDeps);
5101   // The basic structure here is a while-do loop.
5102   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5103   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5104   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5105   CGF.EmitBlock(BodyBB);
5106   llvm::PHINode *ElementPHI =
5107       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5108   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5109   Begin = Begin.withPointer(ElementPHI);
5110   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5111                             Base.getTBAAInfo());
5112   // deps[i].flags = NewDepKind;
5113   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5114   LValue FlagsLVal = CGF.EmitLValueForField(
5115       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5116   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5117                         FlagsLVal);
5118 
5119   // Shift the address forward by one element.
5120   Address ElementNext =
5121       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5122   ElementPHI->addIncoming(ElementNext.getPointer(),
5123                           CGF.Builder.GetInsertBlock());
5124   llvm::Value *IsEmpty =
5125       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5126   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5127   // Done.
5128   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5129 }
5130 
5131 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5132                                    const OMPExecutableDirective &D,
5133                                    llvm::Function *TaskFunction,
5134                                    QualType SharedsTy, Address Shareds,
5135                                    const Expr *IfCond,
5136                                    const OMPTaskDataTy &Data) {
5137   if (!CGF.HaveInsertPoint())
5138     return;
5139 
5140   TaskResultTy Result =
5141       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5142   llvm::Value *NewTask = Result.NewTask;
5143   llvm::Function *TaskEntry = Result.TaskEntry;
5144   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5145   LValue TDBase = Result.TDBase;
5146   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5147   // Process list of dependences.
5148   Address DependenciesArray = Address::invalid();
5149   llvm::Value *NumOfElements;
5150   std::tie(NumOfElements, DependenciesArray) =
5151       emitDependClause(CGF, Data.Dependences, Loc);
5152 
5153   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5154   // libcall.
5155   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5156   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5157   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5158   // list is not empty
5159   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5160   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5161   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5162   llvm::Value *DepTaskArgs[7];
5163   if (!Data.Dependences.empty()) {
5164     DepTaskArgs[0] = UpLoc;
5165     DepTaskArgs[1] = ThreadID;
5166     DepTaskArgs[2] = NewTask;
5167     DepTaskArgs[3] = NumOfElements;
5168     DepTaskArgs[4] = DependenciesArray.getPointer();
5169     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5170     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5171   }
5172   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5173                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5174     if (!Data.Tied) {
5175       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5176       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5177       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5178     }
5179     if (!Data.Dependences.empty()) {
5180       CGF.EmitRuntimeCall(
5181           OMPBuilder.getOrCreateRuntimeFunction(
5182               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5183           DepTaskArgs);
5184     } else {
5185       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5186                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5187                           TaskArgs);
5188     }
5189     // Check if parent region is untied and build return for untied task;
5190     if (auto *Region =
5191             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5192       Region->emitUntiedSwitch(CGF);
5193   };
5194 
5195   llvm::Value *DepWaitTaskArgs[6];
5196   if (!Data.Dependences.empty()) {
5197     DepWaitTaskArgs[0] = UpLoc;
5198     DepWaitTaskArgs[1] = ThreadID;
5199     DepWaitTaskArgs[2] = NumOfElements;
5200     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5201     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5202     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5203   }
5204   auto &M = CGM.getModule();
5205   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5206                         TaskEntry, &Data, &DepWaitTaskArgs,
5207                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5208     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5209     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5210     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5211     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5212     // is specified.
5213     if (!Data.Dependences.empty())
5214       CGF.EmitRuntimeCall(
5215           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5216           DepWaitTaskArgs);
5217     // Call proxy_task_entry(gtid, new_task);
5218     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5219                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5220       Action.Enter(CGF);
5221       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5222       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5223                                                           OutlinedFnArgs);
5224     };
5225 
5226     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5227     // kmp_task_t *new_task);
5228     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5229     // kmp_task_t *new_task);
5230     RegionCodeGenTy RCG(CodeGen);
5231     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5232                               M, OMPRTL___kmpc_omp_task_begin_if0),
5233                           TaskArgs,
5234                           OMPBuilder.getOrCreateRuntimeFunction(
5235                               M, OMPRTL___kmpc_omp_task_complete_if0),
5236                           TaskArgs);
5237     RCG.setAction(Action);
5238     RCG(CGF);
5239   };
5240 
5241   if (IfCond) {
5242     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5243   } else {
5244     RegionCodeGenTy ThenRCG(ThenCodeGen);
5245     ThenRCG(CGF);
5246   }
5247 }
5248 
5249 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5250                                        const OMPLoopDirective &D,
5251                                        llvm::Function *TaskFunction,
5252                                        QualType SharedsTy, Address Shareds,
5253                                        const Expr *IfCond,
5254                                        const OMPTaskDataTy &Data) {
5255   if (!CGF.HaveInsertPoint())
5256     return;
5257   TaskResultTy Result =
5258       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5259   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5260   // libcall.
5261   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5262   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5263   // sched, kmp_uint64 grainsize, void *task_dup);
5264   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5265   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5266   llvm::Value *IfVal;
5267   if (IfCond) {
5268     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5269                                       /*isSigned=*/true);
5270   } else {
5271     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5272   }
5273 
5274   LValue LBLVal = CGF.EmitLValueForField(
5275       Result.TDBase,
5276       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5277   const auto *LBVar =
5278       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5279   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5280                        LBLVal.getQuals(),
5281                        /*IsInitializer=*/true);
5282   LValue UBLVal = CGF.EmitLValueForField(
5283       Result.TDBase,
5284       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5285   const auto *UBVar =
5286       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5287   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5288                        UBLVal.getQuals(),
5289                        /*IsInitializer=*/true);
5290   LValue StLVal = CGF.EmitLValueForField(
5291       Result.TDBase,
5292       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5293   const auto *StVar =
5294       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5295   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5296                        StLVal.getQuals(),
5297                        /*IsInitializer=*/true);
5298   // Store reductions address.
5299   LValue RedLVal = CGF.EmitLValueForField(
5300       Result.TDBase,
5301       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5302   if (Data.Reductions) {
5303     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5304   } else {
5305     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5306                                CGF.getContext().VoidPtrTy);
5307   }
5308   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5309   llvm::Value *TaskArgs[] = {
5310       UpLoc,
5311       ThreadID,
5312       Result.NewTask,
5313       IfVal,
5314       LBLVal.getPointer(CGF),
5315       UBLVal.getPointer(CGF),
5316       CGF.EmitLoadOfScalar(StLVal, Loc),
5317       llvm::ConstantInt::getSigned(
5318           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5319       llvm::ConstantInt::getSigned(
5320           CGF.IntTy, Data.Schedule.getPointer()
5321                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5322                          : NoSchedule),
5323       Data.Schedule.getPointer()
5324           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5325                                       /*isSigned=*/false)
5326           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5327       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5328                              Result.TaskDupFn, CGF.VoidPtrTy)
5329                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5330   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5331                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5332                       TaskArgs);
5333 }
5334 
5335 /// Emit reduction operation for each element of array (required for
5336 /// array sections) LHS op = RHS.
5337 /// \param Type Type of array.
5338 /// \param LHSVar Variable on the left side of the reduction operation
5339 /// (references element of array in original variable).
5340 /// \param RHSVar Variable on the right side of the reduction operation
5341 /// (references element of array in original variable).
5342 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5343 /// RHSVar.
5344 static void EmitOMPAggregateReduction(
5345     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5346     const VarDecl *RHSVar,
5347     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5348                                   const Expr *, const Expr *)> &RedOpGen,
5349     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5350     const Expr *UpExpr = nullptr) {
5351   // Perform element-by-element initialization.
5352   QualType ElementTy;
5353   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5354   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5355 
5356   // Drill down to the base element type on both arrays.
5357   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5358   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5359 
5360   llvm::Value *RHSBegin = RHSAddr.getPointer();
5361   llvm::Value *LHSBegin = LHSAddr.getPointer();
5362   // Cast from pointer to array type to pointer to single element.
5363   llvm::Value *LHSEnd =
5364       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5365   // The basic structure here is a while-do loop.
5366   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5367   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5368   llvm::Value *IsEmpty =
5369       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5370   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5371 
5372   // Enter the loop body, making that address the current address.
5373   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5374   CGF.EmitBlock(BodyBB);
5375 
5376   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5377 
5378   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5379       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5380   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5381   Address RHSElementCurrent = Address::deprecated(
5382       RHSElementPHI,
5383       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5384 
5385   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5386       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5387   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5388   Address LHSElementCurrent = Address::deprecated(
5389       LHSElementPHI,
5390       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5391 
5392   // Emit copy.
5393   CodeGenFunction::OMPPrivateScope Scope(CGF);
5394   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5395   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5396   Scope.Privatize();
5397   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5398   Scope.ForceCleanup();
5399 
5400   // Shift the address forward by one element.
5401   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5402       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5403       "omp.arraycpy.dest.element");
5404   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5405       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5406       "omp.arraycpy.src.element");
5407   // Check whether we've reached the end.
5408   llvm::Value *Done =
5409       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5410   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5411   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5412   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5413 
5414   // Done.
5415   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5416 }
5417 
5418 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5419 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5420 /// UDR combiner function.
5421 static void emitReductionCombiner(CodeGenFunction &CGF,
5422                                   const Expr *ReductionOp) {
5423   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5424     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5425       if (const auto *DRE =
5426               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5427         if (const auto *DRD =
5428                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5429           std::pair<llvm::Function *, llvm::Function *> Reduction =
5430               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5431           RValue Func = RValue::get(Reduction.first);
5432           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5433           CGF.EmitIgnoredExpr(ReductionOp);
5434           return;
5435         }
5436   CGF.EmitIgnoredExpr(ReductionOp);
5437 }
5438 
5439 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5440     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5441     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5442     ArrayRef<const Expr *> ReductionOps) {
5443   ASTContext &C = CGM.getContext();
5444 
5445   // void reduction_func(void *LHSArg, void *RHSArg);
5446   FunctionArgList Args;
5447   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5448                            ImplicitParamDecl::Other);
5449   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5450                            ImplicitParamDecl::Other);
5451   Args.push_back(&LHSArg);
5452   Args.push_back(&RHSArg);
5453   const auto &CGFI =
5454       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5455   std::string Name = getName({"omp", "reduction", "reduction_func"});
5456   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5457                                     llvm::GlobalValue::InternalLinkage, Name,
5458                                     &CGM.getModule());
5459   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5460   Fn->setDoesNotRecurse();
5461   CodeGenFunction CGF(CGM);
5462   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5463 
5464   // Dst = (void*[n])(LHSArg);
5465   // Src = (void*[n])(RHSArg);
5466   Address LHS = Address::deprecated(
5467       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5468           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
5469       CGF.getPointerAlign());
5470   Address RHS = Address::deprecated(
5471       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5472           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
5473       CGF.getPointerAlign());
5474 
5475   //  ...
5476   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5477   //  ...
5478   CodeGenFunction::OMPPrivateScope Scope(CGF);
5479   auto IPriv = Privates.begin();
5480   unsigned Idx = 0;
5481   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5482     const auto *RHSVar =
5483         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5484     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5485       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5486     });
5487     const auto *LHSVar =
5488         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5489     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5490       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5491     });
5492     QualType PrivTy = (*IPriv)->getType();
5493     if (PrivTy->isVariablyModifiedType()) {
5494       // Get array size and emit VLA type.
5495       ++Idx;
5496       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5497       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5498       const VariableArrayType *VLA =
5499           CGF.getContext().getAsVariableArrayType(PrivTy);
5500       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5501       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5502           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5503       CGF.EmitVariablyModifiedType(PrivTy);
5504     }
5505   }
5506   Scope.Privatize();
5507   IPriv = Privates.begin();
5508   auto ILHS = LHSExprs.begin();
5509   auto IRHS = RHSExprs.begin();
5510   for (const Expr *E : ReductionOps) {
5511     if ((*IPriv)->getType()->isArrayType()) {
5512       // Emit reduction for array section.
5513       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5514       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5515       EmitOMPAggregateReduction(
5516           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5517           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5518             emitReductionCombiner(CGF, E);
5519           });
5520     } else {
5521       // Emit reduction for array subscript or single variable.
5522       emitReductionCombiner(CGF, E);
5523     }
5524     ++IPriv;
5525     ++ILHS;
5526     ++IRHS;
5527   }
5528   Scope.ForceCleanup();
5529   CGF.FinishFunction();
5530   return Fn;
5531 }
5532 
5533 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5534                                                   const Expr *ReductionOp,
5535                                                   const Expr *PrivateRef,
5536                                                   const DeclRefExpr *LHS,
5537                                                   const DeclRefExpr *RHS) {
5538   if (PrivateRef->getType()->isArrayType()) {
5539     // Emit reduction for array section.
5540     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5541     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5542     EmitOMPAggregateReduction(
5543         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5544         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5545           emitReductionCombiner(CGF, ReductionOp);
5546         });
5547   } else {
5548     // Emit reduction for array subscript or single variable.
5549     emitReductionCombiner(CGF, ReductionOp);
5550   }
5551 }
5552 
5553 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5554                                     ArrayRef<const Expr *> Privates,
5555                                     ArrayRef<const Expr *> LHSExprs,
5556                                     ArrayRef<const Expr *> RHSExprs,
5557                                     ArrayRef<const Expr *> ReductionOps,
5558                                     ReductionOptionsTy Options) {
5559   if (!CGF.HaveInsertPoint())
5560     return;
5561 
5562   bool WithNowait = Options.WithNowait;
5563   bool SimpleReduction = Options.SimpleReduction;
5564 
5565   // Next code should be emitted for reduction:
5566   //
5567   // static kmp_critical_name lock = { 0 };
5568   //
5569   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5570   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5571   //  ...
5572   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5573   //  *(Type<n>-1*)rhs[<n>-1]);
5574   // }
5575   //
5576   // ...
5577   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5578   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5579   // RedList, reduce_func, &<lock>)) {
5580   // case 1:
5581   //  ...
5582   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5583   //  ...
5584   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5585   // break;
5586   // case 2:
5587   //  ...
5588   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5589   //  ...
5590   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5591   // break;
5592   // default:;
5593   // }
5594   //
5595   // if SimpleReduction is true, only the next code is generated:
5596   //  ...
5597   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5598   //  ...
5599 
5600   ASTContext &C = CGM.getContext();
5601 
5602   if (SimpleReduction) {
5603     CodeGenFunction::RunCleanupsScope Scope(CGF);
5604     auto IPriv = Privates.begin();
5605     auto ILHS = LHSExprs.begin();
5606     auto IRHS = RHSExprs.begin();
5607     for (const Expr *E : ReductionOps) {
5608       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5609                                   cast<DeclRefExpr>(*IRHS));
5610       ++IPriv;
5611       ++ILHS;
5612       ++IRHS;
5613     }
5614     return;
5615   }
5616 
5617   // 1. Build a list of reduction variables.
5618   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5619   auto Size = RHSExprs.size();
5620   for (const Expr *E : Privates) {
5621     if (E->getType()->isVariablyModifiedType())
5622       // Reserve place for array size.
5623       ++Size;
5624   }
5625   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5626   QualType ReductionArrayTy =
5627       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5628                              /*IndexTypeQuals=*/0);
5629   Address ReductionList =
5630       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5631   auto IPriv = Privates.begin();
5632   unsigned Idx = 0;
5633   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5634     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5635     CGF.Builder.CreateStore(
5636         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5637             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5638         Elem);
5639     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5640       // Store array size.
5641       ++Idx;
5642       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5643       llvm::Value *Size = CGF.Builder.CreateIntCast(
5644           CGF.getVLASize(
5645                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5646               .NumElts,
5647           CGF.SizeTy, /*isSigned=*/false);
5648       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5649                               Elem);
5650     }
5651   }
5652 
5653   // 2. Emit reduce_func().
5654   llvm::Function *ReductionFn = emitReductionFunction(
5655       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5656       LHSExprs, RHSExprs, ReductionOps);
5657 
5658   // 3. Create static kmp_critical_name lock = { 0 };
5659   std::string Name = getName({"reduction"});
5660   llvm::Value *Lock = getCriticalRegionLock(Name);
5661 
5662   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5663   // RedList, reduce_func, &<lock>);
5664   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5665   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5666   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5667   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5668       ReductionList.getPointer(), CGF.VoidPtrTy);
5669   llvm::Value *Args[] = {
5670       IdentTLoc,                             // ident_t *<loc>
5671       ThreadId,                              // i32 <gtid>
5672       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5673       ReductionArrayTySize,                  // size_type sizeof(RedList)
5674       RL,                                    // void *RedList
5675       ReductionFn, // void (*) (void *, void *) <reduce_func>
5676       Lock         // kmp_critical_name *&<lock>
5677   };
5678   llvm::Value *Res = CGF.EmitRuntimeCall(
5679       OMPBuilder.getOrCreateRuntimeFunction(
5680           CGM.getModule(),
5681           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5682       Args);
5683 
5684   // 5. Build switch(res)
5685   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5686   llvm::SwitchInst *SwInst =
5687       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5688 
5689   // 6. Build case 1:
5690   //  ...
5691   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5692   //  ...
5693   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5694   // break;
5695   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5696   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5697   CGF.EmitBlock(Case1BB);
5698 
5699   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5700   llvm::Value *EndArgs[] = {
5701       IdentTLoc, // ident_t *<loc>
5702       ThreadId,  // i32 <gtid>
5703       Lock       // kmp_critical_name *&<lock>
5704   };
5705   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5706                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5707     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5708     auto IPriv = Privates.begin();
5709     auto ILHS = LHSExprs.begin();
5710     auto IRHS = RHSExprs.begin();
5711     for (const Expr *E : ReductionOps) {
5712       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5713                                      cast<DeclRefExpr>(*IRHS));
5714       ++IPriv;
5715       ++ILHS;
5716       ++IRHS;
5717     }
5718   };
5719   RegionCodeGenTy RCG(CodeGen);
5720   CommonActionTy Action(
5721       nullptr, llvm::None,
5722       OMPBuilder.getOrCreateRuntimeFunction(
5723           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5724                                       : OMPRTL___kmpc_end_reduce),
5725       EndArgs);
5726   RCG.setAction(Action);
5727   RCG(CGF);
5728 
5729   CGF.EmitBranch(DefaultBB);
5730 
5731   // 7. Build case 2:
5732   //  ...
5733   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5734   //  ...
5735   // break;
5736   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5737   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5738   CGF.EmitBlock(Case2BB);
5739 
5740   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5741                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5742     auto ILHS = LHSExprs.begin();
5743     auto IRHS = RHSExprs.begin();
5744     auto IPriv = Privates.begin();
5745     for (const Expr *E : ReductionOps) {
5746       const Expr *XExpr = nullptr;
5747       const Expr *EExpr = nullptr;
5748       const Expr *UpExpr = nullptr;
5749       BinaryOperatorKind BO = BO_Comma;
5750       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5751         if (BO->getOpcode() == BO_Assign) {
5752           XExpr = BO->getLHS();
5753           UpExpr = BO->getRHS();
5754         }
5755       }
5756       // Try to emit update expression as a simple atomic.
5757       const Expr *RHSExpr = UpExpr;
5758       if (RHSExpr) {
5759         // Analyze RHS part of the whole expression.
5760         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5761                 RHSExpr->IgnoreParenImpCasts())) {
5762           // If this is a conditional operator, analyze its condition for
5763           // min/max reduction operator.
5764           RHSExpr = ACO->getCond();
5765         }
5766         if (const auto *BORHS =
5767                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5768           EExpr = BORHS->getRHS();
5769           BO = BORHS->getOpcode();
5770         }
5771       }
5772       if (XExpr) {
5773         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5774         auto &&AtomicRedGen = [BO, VD,
5775                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5776                                     const Expr *EExpr, const Expr *UpExpr) {
5777           LValue X = CGF.EmitLValue(XExpr);
5778           RValue E;
5779           if (EExpr)
5780             E = CGF.EmitAnyExpr(EExpr);
5781           CGF.EmitOMPAtomicSimpleUpdateExpr(
5782               X, E, BO, /*IsXLHSInRHSPart=*/true,
5783               llvm::AtomicOrdering::Monotonic, Loc,
5784               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5785                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5786                 PrivateScope.addPrivate(
5787                     VD, [&CGF, VD, XRValue, Loc]() {
5788                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5789                       CGF.emitOMPSimpleStore(
5790                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5791                           VD->getType().getNonReferenceType(), Loc);
5792                       return LHSTemp;
5793                     });
5794                 (void)PrivateScope.Privatize();
5795                 return CGF.EmitAnyExpr(UpExpr);
5796               });
5797         };
5798         if ((*IPriv)->getType()->isArrayType()) {
5799           // Emit atomic reduction for array section.
5800           const auto *RHSVar =
5801               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5802           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5803                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5804         } else {
5805           // Emit atomic reduction for array subscript or single variable.
5806           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5807         }
5808       } else {
5809         // Emit as a critical region.
5810         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5811                                            const Expr *, const Expr *) {
5812           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5813           std::string Name = RT.getName({"atomic_reduction"});
5814           RT.emitCriticalRegion(
5815               CGF, Name,
5816               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5817                 Action.Enter(CGF);
5818                 emitReductionCombiner(CGF, E);
5819               },
5820               Loc);
5821         };
5822         if ((*IPriv)->getType()->isArrayType()) {
5823           const auto *LHSVar =
5824               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5825           const auto *RHSVar =
5826               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5827           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5828                                     CritRedGen);
5829         } else {
5830           CritRedGen(CGF, nullptr, nullptr, nullptr);
5831         }
5832       }
5833       ++ILHS;
5834       ++IRHS;
5835       ++IPriv;
5836     }
5837   };
5838   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5839   if (!WithNowait) {
5840     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5841     llvm::Value *EndArgs[] = {
5842         IdentTLoc, // ident_t *<loc>
5843         ThreadId,  // i32 <gtid>
5844         Lock       // kmp_critical_name *&<lock>
5845     };
5846     CommonActionTy Action(nullptr, llvm::None,
5847                           OMPBuilder.getOrCreateRuntimeFunction(
5848                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5849                           EndArgs);
5850     AtomicRCG.setAction(Action);
5851     AtomicRCG(CGF);
5852   } else {
5853     AtomicRCG(CGF);
5854   }
5855 
5856   CGF.EmitBranch(DefaultBB);
5857   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5858 }
5859 
5860 /// Generates unique name for artificial threadprivate variables.
5861 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5862 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5863                                       const Expr *Ref) {
5864   SmallString<256> Buffer;
5865   llvm::raw_svector_ostream Out(Buffer);
5866   const clang::DeclRefExpr *DE;
5867   const VarDecl *D = ::getBaseDecl(Ref, DE);
5868   if (!D)
5869     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5870   D = D->getCanonicalDecl();
5871   std::string Name = CGM.getOpenMPRuntime().getName(
5872       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5873   Out << Prefix << Name << "_"
5874       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5875   return std::string(Out.str());
5876 }
5877 
5878 /// Emits reduction initializer function:
5879 /// \code
5880 /// void @.red_init(void* %arg, void* %orig) {
5881 /// %0 = bitcast void* %arg to <type>*
5882 /// store <type> <init>, <type>* %0
5883 /// ret void
5884 /// }
5885 /// \endcode
5886 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5887                                            SourceLocation Loc,
5888                                            ReductionCodeGen &RCG, unsigned N) {
5889   ASTContext &C = CGM.getContext();
5890   QualType VoidPtrTy = C.VoidPtrTy;
5891   VoidPtrTy.addRestrict();
5892   FunctionArgList Args;
5893   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5894                           ImplicitParamDecl::Other);
5895   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5896                               ImplicitParamDecl::Other);
5897   Args.emplace_back(&Param);
5898   Args.emplace_back(&ParamOrig);
5899   const auto &FnInfo =
5900       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5901   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5902   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5903   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5904                                     Name, &CGM.getModule());
5905   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5906   Fn->setDoesNotRecurse();
5907   CodeGenFunction CGF(CGM);
5908   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5909   Address PrivateAddr = CGF.EmitLoadOfPointer(
5910       CGF.GetAddrOfLocalVar(&Param),
5911       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5912   llvm::Value *Size = nullptr;
5913   // If the size of the reduction item is non-constant, load it from global
5914   // threadprivate variable.
5915   if (RCG.getSizes(N).second) {
5916     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5917         CGF, CGM.getContext().getSizeType(),
5918         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5919     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5920                                 CGM.getContext().getSizeType(), Loc);
5921   }
5922   RCG.emitAggregateType(CGF, N, Size);
5923   Address OrigAddr = Address::invalid();
5924   // If initializer uses initializer from declare reduction construct, emit a
5925   // pointer to the address of the original reduction item (reuired by reduction
5926   // initializer)
5927   if (RCG.usesReductionInitializer(N)) {
5928     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5929     OrigAddr = CGF.EmitLoadOfPointer(
5930         SharedAddr,
5931         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5932   }
5933   // Emit the initializer:
5934   // %0 = bitcast void* %arg to <type>*
5935   // store <type> <init>, <type>* %0
5936   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5937                          [](CodeGenFunction &) { return false; });
5938   CGF.FinishFunction();
5939   return Fn;
5940 }
5941 
5942 /// Emits reduction combiner function:
5943 /// \code
5944 /// void @.red_comb(void* %arg0, void* %arg1) {
5945 /// %lhs = bitcast void* %arg0 to <type>*
5946 /// %rhs = bitcast void* %arg1 to <type>*
5947 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5948 /// store <type> %2, <type>* %lhs
5949 /// ret void
5950 /// }
5951 /// \endcode
5952 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5953                                            SourceLocation Loc,
5954                                            ReductionCodeGen &RCG, unsigned N,
5955                                            const Expr *ReductionOp,
5956                                            const Expr *LHS, const Expr *RHS,
5957                                            const Expr *PrivateRef) {
5958   ASTContext &C = CGM.getContext();
5959   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5960   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5961   FunctionArgList Args;
5962   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5963                                C.VoidPtrTy, ImplicitParamDecl::Other);
5964   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5965                             ImplicitParamDecl::Other);
5966   Args.emplace_back(&ParamInOut);
5967   Args.emplace_back(&ParamIn);
5968   const auto &FnInfo =
5969       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5970   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5971   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5972   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5973                                     Name, &CGM.getModule());
5974   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5975   Fn->setDoesNotRecurse();
5976   CodeGenFunction CGF(CGM);
5977   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5978   llvm::Value *Size = nullptr;
5979   // If the size of the reduction item is non-constant, load it from global
5980   // threadprivate variable.
5981   if (RCG.getSizes(N).second) {
5982     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5983         CGF, CGM.getContext().getSizeType(),
5984         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5985     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5986                                 CGM.getContext().getSizeType(), Loc);
5987   }
5988   RCG.emitAggregateType(CGF, N, Size);
5989   // Remap lhs and rhs variables to the addresses of the function arguments.
5990   // %lhs = bitcast void* %arg0 to <type>*
5991   // %rhs = bitcast void* %arg1 to <type>*
5992   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5993   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5994     // Pull out the pointer to the variable.
5995     Address PtrAddr = CGF.EmitLoadOfPointer(
5996         CGF.GetAddrOfLocalVar(&ParamInOut),
5997         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5998     return CGF.Builder.CreateElementBitCast(
5999         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6000   });
6001   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6002     // Pull out the pointer to the variable.
6003     Address PtrAddr = CGF.EmitLoadOfPointer(
6004         CGF.GetAddrOfLocalVar(&ParamIn),
6005         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6006     return CGF.Builder.CreateElementBitCast(
6007         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6008   });
6009   PrivateScope.Privatize();
6010   // Emit the combiner body:
6011   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6012   // store <type> %2, <type>* %lhs
6013   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6014       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6015       cast<DeclRefExpr>(RHS));
6016   CGF.FinishFunction();
6017   return Fn;
6018 }
6019 
6020 /// Emits reduction finalizer function:
6021 /// \code
6022 /// void @.red_fini(void* %arg) {
6023 /// %0 = bitcast void* %arg to <type>*
6024 /// <destroy>(<type>* %0)
6025 /// ret void
6026 /// }
6027 /// \endcode
6028 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6029                                            SourceLocation Loc,
6030                                            ReductionCodeGen &RCG, unsigned N) {
6031   if (!RCG.needCleanups(N))
6032     return nullptr;
6033   ASTContext &C = CGM.getContext();
6034   FunctionArgList Args;
6035   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6036                           ImplicitParamDecl::Other);
6037   Args.emplace_back(&Param);
6038   const auto &FnInfo =
6039       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6040   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6041   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6042   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6043                                     Name, &CGM.getModule());
6044   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6045   Fn->setDoesNotRecurse();
6046   CodeGenFunction CGF(CGM);
6047   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6048   Address PrivateAddr = CGF.EmitLoadOfPointer(
6049       CGF.GetAddrOfLocalVar(&Param),
6050       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6051   llvm::Value *Size = nullptr;
6052   // If the size of the reduction item is non-constant, load it from global
6053   // threadprivate variable.
6054   if (RCG.getSizes(N).second) {
6055     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6056         CGF, CGM.getContext().getSizeType(),
6057         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6058     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6059                                 CGM.getContext().getSizeType(), Loc);
6060   }
6061   RCG.emitAggregateType(CGF, N, Size);
6062   // Emit the finalizer body:
6063   // <destroy>(<type>* %0)
6064   RCG.emitCleanups(CGF, N, PrivateAddr);
6065   CGF.FinishFunction(Loc);
6066   return Fn;
6067 }
6068 
6069 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6070     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6071     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6072   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6073     return nullptr;
6074 
6075   // Build typedef struct:
6076   // kmp_taskred_input {
6077   //   void *reduce_shar; // shared reduction item
6078   //   void *reduce_orig; // original reduction item used for initialization
6079   //   size_t reduce_size; // size of data item
6080   //   void *reduce_init; // data initialization routine
6081   //   void *reduce_fini; // data finalization routine
6082   //   void *reduce_comb; // data combiner routine
6083   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6084   // } kmp_taskred_input_t;
6085   ASTContext &C = CGM.getContext();
6086   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6087   RD->startDefinition();
6088   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6089   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6090   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6091   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6092   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6093   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6094   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6095       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6096   RD->completeDefinition();
6097   QualType RDType = C.getRecordType(RD);
6098   unsigned Size = Data.ReductionVars.size();
6099   llvm::APInt ArraySize(/*numBits=*/64, Size);
6100   QualType ArrayRDType = C.getConstantArrayType(
6101       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6102   // kmp_task_red_input_t .rd_input.[Size];
6103   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6104   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6105                        Data.ReductionCopies, Data.ReductionOps);
6106   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6107     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6108     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6109                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6110     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6111         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6112         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6113         ".rd_input.gep.");
6114     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6115     // ElemLVal.reduce_shar = &Shareds[Cnt];
6116     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6117     RCG.emitSharedOrigLValue(CGF, Cnt);
6118     llvm::Value *CastedShared =
6119         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6120     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6121     // ElemLVal.reduce_orig = &Origs[Cnt];
6122     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6123     llvm::Value *CastedOrig =
6124         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6125     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6126     RCG.emitAggregateType(CGF, Cnt);
6127     llvm::Value *SizeValInChars;
6128     llvm::Value *SizeVal;
6129     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6130     // We use delayed creation/initialization for VLAs and array sections. It is
6131     // required because runtime does not provide the way to pass the sizes of
6132     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6133     // threadprivate global variables are used to store these values and use
6134     // them in the functions.
6135     bool DelayedCreation = !!SizeVal;
6136     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6137                                                /*isSigned=*/false);
6138     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6139     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6140     // ElemLVal.reduce_init = init;
6141     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6142     llvm::Value *InitAddr =
6143         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6144     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6145     // ElemLVal.reduce_fini = fini;
6146     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6147     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6148     llvm::Value *FiniAddr = Fini
6149                                 ? CGF.EmitCastToVoidPtr(Fini)
6150                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6151     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6152     // ElemLVal.reduce_comb = comb;
6153     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6154     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6155         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6156         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6157     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6158     // ElemLVal.flags = 0;
6159     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6160     if (DelayedCreation) {
6161       CGF.EmitStoreOfScalar(
6162           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6163           FlagsLVal);
6164     } else
6165       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6166                                  FlagsLVal.getType());
6167   }
6168   if (Data.IsReductionWithTaskMod) {
6169     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6170     // is_ws, int num, void *data);
6171     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6172     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6173                                                   CGM.IntTy, /*isSigned=*/true);
6174     llvm::Value *Args[] = {
6175         IdentTLoc, GTid,
6176         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6177                                /*isSigned=*/true),
6178         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6179         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6180             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6181     return CGF.EmitRuntimeCall(
6182         OMPBuilder.getOrCreateRuntimeFunction(
6183             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6184         Args);
6185   }
6186   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6187   llvm::Value *Args[] = {
6188       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6189                                 /*isSigned=*/true),
6190       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6191       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6192                                                       CGM.VoidPtrTy)};
6193   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6194                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6195                              Args);
6196 }
6197 
6198 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6199                                             SourceLocation Loc,
6200                                             bool IsWorksharingReduction) {
6201   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6202   // is_ws, int num, void *data);
6203   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6204   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6205                                                 CGM.IntTy, /*isSigned=*/true);
6206   llvm::Value *Args[] = {IdentTLoc, GTid,
6207                          llvm::ConstantInt::get(CGM.IntTy,
6208                                                 IsWorksharingReduction ? 1 : 0,
6209                                                 /*isSigned=*/true)};
6210   (void)CGF.EmitRuntimeCall(
6211       OMPBuilder.getOrCreateRuntimeFunction(
6212           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6213       Args);
6214 }
6215 
6216 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6217                                               SourceLocation Loc,
6218                                               ReductionCodeGen &RCG,
6219                                               unsigned N) {
6220   auto Sizes = RCG.getSizes(N);
6221   // Emit threadprivate global variable if the type is non-constant
6222   // (Sizes.second = nullptr).
6223   if (Sizes.second) {
6224     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6225                                                      /*isSigned=*/false);
6226     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6227         CGF, CGM.getContext().getSizeType(),
6228         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6229     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6230   }
6231 }
6232 
6233 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6234                                               SourceLocation Loc,
6235                                               llvm::Value *ReductionsPtr,
6236                                               LValue SharedLVal) {
6237   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6238   // *d);
6239   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6240                                                    CGM.IntTy,
6241                                                    /*isSigned=*/true),
6242                          ReductionsPtr,
6243                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6244                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6245   return Address::deprecated(
6246       CGF.EmitRuntimeCall(
6247           OMPBuilder.getOrCreateRuntimeFunction(
6248               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6249           Args),
6250       SharedLVal.getAlignment());
6251 }
6252 
6253 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6254                                        const OMPTaskDataTy &Data) {
6255   if (!CGF.HaveInsertPoint())
6256     return;
6257 
6258   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6259     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6260     OMPBuilder.createTaskwait(CGF.Builder);
6261   } else {
6262     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6263     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6264     auto &M = CGM.getModule();
6265     Address DependenciesArray = Address::invalid();
6266     llvm::Value *NumOfElements;
6267     std::tie(NumOfElements, DependenciesArray) =
6268         emitDependClause(CGF, Data.Dependences, Loc);
6269     llvm::Value *DepWaitTaskArgs[6];
6270     if (!Data.Dependences.empty()) {
6271       DepWaitTaskArgs[0] = UpLoc;
6272       DepWaitTaskArgs[1] = ThreadID;
6273       DepWaitTaskArgs[2] = NumOfElements;
6274       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6275       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6276       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6277 
6278       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6279 
6280       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6281       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6282       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6283       // is specified.
6284       CGF.EmitRuntimeCall(
6285           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6286           DepWaitTaskArgs);
6287 
6288     } else {
6289 
6290       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6291       // global_tid);
6292       llvm::Value *Args[] = {UpLoc, ThreadID};
6293       // Ignore return result until untied tasks are supported.
6294       CGF.EmitRuntimeCall(
6295           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6296           Args);
6297     }
6298   }
6299 
6300   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6301     Region->emitUntiedSwitch(CGF);
6302 }
6303 
6304 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6305                                            OpenMPDirectiveKind InnerKind,
6306                                            const RegionCodeGenTy &CodeGen,
6307                                            bool HasCancel) {
6308   if (!CGF.HaveInsertPoint())
6309     return;
6310   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6311                                  InnerKind != OMPD_critical &&
6312                                      InnerKind != OMPD_master &&
6313                                      InnerKind != OMPD_masked);
6314   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6315 }
6316 
6317 namespace {
6318 enum RTCancelKind {
6319   CancelNoreq = 0,
6320   CancelParallel = 1,
6321   CancelLoop = 2,
6322   CancelSections = 3,
6323   CancelTaskgroup = 4
6324 };
6325 } // anonymous namespace
6326 
6327 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6328   RTCancelKind CancelKind = CancelNoreq;
6329   if (CancelRegion == OMPD_parallel)
6330     CancelKind = CancelParallel;
6331   else if (CancelRegion == OMPD_for)
6332     CancelKind = CancelLoop;
6333   else if (CancelRegion == OMPD_sections)
6334     CancelKind = CancelSections;
6335   else {
6336     assert(CancelRegion == OMPD_taskgroup);
6337     CancelKind = CancelTaskgroup;
6338   }
6339   return CancelKind;
6340 }
6341 
6342 void CGOpenMPRuntime::emitCancellationPointCall(
6343     CodeGenFunction &CGF, SourceLocation Loc,
6344     OpenMPDirectiveKind CancelRegion) {
6345   if (!CGF.HaveInsertPoint())
6346     return;
6347   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6348   // global_tid, kmp_int32 cncl_kind);
6349   if (auto *OMPRegionInfo =
6350           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6351     // For 'cancellation point taskgroup', the task region info may not have a
6352     // cancel. This may instead happen in another adjacent task.
6353     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6354       llvm::Value *Args[] = {
6355           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6356           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6357       // Ignore return result until untied tasks are supported.
6358       llvm::Value *Result = CGF.EmitRuntimeCall(
6359           OMPBuilder.getOrCreateRuntimeFunction(
6360               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6361           Args);
6362       // if (__kmpc_cancellationpoint()) {
6363       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6364       //   exit from construct;
6365       // }
6366       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6367       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6368       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6369       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6370       CGF.EmitBlock(ExitBB);
6371       if (CancelRegion == OMPD_parallel)
6372         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6373       // exit from construct;
6374       CodeGenFunction::JumpDest CancelDest =
6375           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6376       CGF.EmitBranchThroughCleanup(CancelDest);
6377       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6378     }
6379   }
6380 }
6381 
6382 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6383                                      const Expr *IfCond,
6384                                      OpenMPDirectiveKind CancelRegion) {
6385   if (!CGF.HaveInsertPoint())
6386     return;
6387   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6388   // kmp_int32 cncl_kind);
6389   auto &M = CGM.getModule();
6390   if (auto *OMPRegionInfo =
6391           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6392     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6393                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6394       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6395       llvm::Value *Args[] = {
6396           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6397           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6398       // Ignore return result until untied tasks are supported.
6399       llvm::Value *Result = CGF.EmitRuntimeCall(
6400           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6401       // if (__kmpc_cancel()) {
6402       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6403       //   exit from construct;
6404       // }
6405       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6406       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6407       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6408       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6409       CGF.EmitBlock(ExitBB);
6410       if (CancelRegion == OMPD_parallel)
6411         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6412       // exit from construct;
6413       CodeGenFunction::JumpDest CancelDest =
6414           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6415       CGF.EmitBranchThroughCleanup(CancelDest);
6416       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6417     };
6418     if (IfCond) {
6419       emitIfClause(CGF, IfCond, ThenGen,
6420                    [](CodeGenFunction &, PrePostActionTy &) {});
6421     } else {
6422       RegionCodeGenTy ThenRCG(ThenGen);
6423       ThenRCG(CGF);
6424     }
6425   }
6426 }
6427 
6428 namespace {
6429 /// Cleanup action for uses_allocators support.
6430 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6431   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6432 
6433 public:
6434   OMPUsesAllocatorsActionTy(
6435       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6436       : Allocators(Allocators) {}
6437   void Enter(CodeGenFunction &CGF) override {
6438     if (!CGF.HaveInsertPoint())
6439       return;
6440     for (const auto &AllocatorData : Allocators) {
6441       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6442           CGF, AllocatorData.first, AllocatorData.second);
6443     }
6444   }
6445   void Exit(CodeGenFunction &CGF) override {
6446     if (!CGF.HaveInsertPoint())
6447       return;
6448     for (const auto &AllocatorData : Allocators) {
6449       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6450                                                         AllocatorData.first);
6451     }
6452   }
6453 };
6454 } // namespace
6455 
6456 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6457     const OMPExecutableDirective &D, StringRef ParentName,
6458     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6459     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6460   assert(!ParentName.empty() && "Invalid target region parent name!");
6461   HasEmittedTargetRegion = true;
6462   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6463   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6464     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6465       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6466       if (!D.AllocatorTraits)
6467         continue;
6468       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6469     }
6470   }
6471   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6472   CodeGen.setAction(UsesAllocatorAction);
6473   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6474                                    IsOffloadEntry, CodeGen);
6475 }
6476 
6477 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6478                                              const Expr *Allocator,
6479                                              const Expr *AllocatorTraits) {
6480   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6481   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6482   // Use default memspace handle.
6483   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6484   llvm::Value *NumTraits = llvm::ConstantInt::get(
6485       CGF.IntTy, cast<ConstantArrayType>(
6486                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6487                      ->getSize()
6488                      .getLimitedValue());
6489   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6490   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6491       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6492   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6493                                            AllocatorTraitsLVal.getBaseInfo(),
6494                                            AllocatorTraitsLVal.getTBAAInfo());
6495   llvm::Value *Traits =
6496       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6497 
6498   llvm::Value *AllocatorVal =
6499       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6500                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6501                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6502   // Store to allocator.
6503   CGF.EmitVarDecl(*cast<VarDecl>(
6504       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6505   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6506   AllocatorVal =
6507       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6508                                Allocator->getType(), Allocator->getExprLoc());
6509   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6510 }
6511 
6512 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6513                                              const Expr *Allocator) {
6514   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6515   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6516   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6517   llvm::Value *AllocatorVal =
6518       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6519   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6520                                           CGF.getContext().VoidPtrTy,
6521                                           Allocator->getExprLoc());
6522   (void)CGF.EmitRuntimeCall(
6523       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6524                                             OMPRTL___kmpc_destroy_allocator),
6525       {ThreadId, AllocatorVal});
6526 }
6527 
6528 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6529     const OMPExecutableDirective &D, StringRef ParentName,
6530     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6531     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6532   // Create a unique name for the entry function using the source location
6533   // information of the current target region. The name will be something like:
6534   //
6535   // __omp_offloading_DD_FFFF_PP_lBB
6536   //
6537   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6538   // mangled name of the function that encloses the target region and BB is the
6539   // line number of the target region.
6540 
6541   unsigned DeviceID;
6542   unsigned FileID;
6543   unsigned Line;
6544   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6545                            Line);
6546   SmallString<64> EntryFnName;
6547   {
6548     llvm::raw_svector_ostream OS(EntryFnName);
6549     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6550        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6551   }
6552 
6553   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6554 
6555   CodeGenFunction CGF(CGM, true);
6556   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6557   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6558 
6559   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6560 
6561   // If this target outline function is not an offload entry, we don't need to
6562   // register it.
6563   if (!IsOffloadEntry)
6564     return;
6565 
6566   // The target region ID is used by the runtime library to identify the current
6567   // target region, so it only has to be unique and not necessarily point to
6568   // anything. It could be the pointer to the outlined function that implements
6569   // the target region, but we aren't using that so that the compiler doesn't
6570   // need to keep that, and could therefore inline the host function if proven
6571   // worthwhile during optimization. In the other hand, if emitting code for the
6572   // device, the ID has to be the function address so that it can retrieved from
6573   // the offloading entry and launched by the runtime library. We also mark the
6574   // outlined function to have external linkage in case we are emitting code for
6575   // the device, because these functions will be entry points to the device.
6576 
6577   if (CGM.getLangOpts().OpenMPIsDevice) {
6578     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6579     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6580     OutlinedFn->setDSOLocal(false);
6581     if (CGM.getTriple().isAMDGCN())
6582       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6583   } else {
6584     std::string Name = getName({EntryFnName, "region_id"});
6585     OutlinedFnID = new llvm::GlobalVariable(
6586         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6587         llvm::GlobalValue::WeakAnyLinkage,
6588         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6589   }
6590 
6591   // Register the information for the entry associated with this target region.
6592   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6593       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6594       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6595 
6596   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6597   int32_t DefaultValTeams = -1;
6598   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6599   if (DefaultValTeams > 0) {
6600     OutlinedFn->addFnAttr("omp_target_num_teams",
6601                           std::to_string(DefaultValTeams));
6602   }
6603   int32_t DefaultValThreads = -1;
6604   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6605   if (DefaultValThreads > 0) {
6606     OutlinedFn->addFnAttr("omp_target_thread_limit",
6607                           std::to_string(DefaultValThreads));
6608   }
6609 
6610   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6611 }
6612 
6613 /// Checks if the expression is constant or does not have non-trivial function
6614 /// calls.
6615 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6616   // We can skip constant expressions.
6617   // We can skip expressions with trivial calls or simple expressions.
6618   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6619           !E->hasNonTrivialCall(Ctx)) &&
6620          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6621 }
6622 
6623 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6624                                                     const Stmt *Body) {
6625   const Stmt *Child = Body->IgnoreContainers();
6626   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6627     Child = nullptr;
6628     for (const Stmt *S : C->body()) {
6629       if (const auto *E = dyn_cast<Expr>(S)) {
6630         if (isTrivial(Ctx, E))
6631           continue;
6632       }
6633       // Some of the statements can be ignored.
6634       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6635           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6636         continue;
6637       // Analyze declarations.
6638       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6639         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6640               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6641                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6642                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6643                   isa<UsingDirectiveDecl>(D) ||
6644                   isa<OMPDeclareReductionDecl>(D) ||
6645                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6646                 return true;
6647               const auto *VD = dyn_cast<VarDecl>(D);
6648               if (!VD)
6649                 return false;
6650               return VD->hasGlobalStorage() || !VD->isUsed();
6651             }))
6652           continue;
6653       }
6654       // Found multiple children - cannot get the one child only.
6655       if (Child)
6656         return nullptr;
6657       Child = S;
6658     }
6659     if (Child)
6660       Child = Child->IgnoreContainers();
6661   }
6662   return Child;
6663 }
6664 
6665 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6666     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6667     int32_t &DefaultVal) {
6668 
6669   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6670   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6671          "Expected target-based executable directive.");
6672   switch (DirectiveKind) {
6673   case OMPD_target: {
6674     const auto *CS = D.getInnermostCapturedStmt();
6675     const auto *Body =
6676         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6677     const Stmt *ChildStmt =
6678         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6679     if (const auto *NestedDir =
6680             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6681       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6682         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6683           const Expr *NumTeams =
6684               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6685           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6686             if (auto Constant =
6687                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6688               DefaultVal = Constant->getExtValue();
6689           return NumTeams;
6690         }
6691         DefaultVal = 0;
6692         return nullptr;
6693       }
6694       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6695           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6696         DefaultVal = 1;
6697         return nullptr;
6698       }
6699       DefaultVal = 1;
6700       return nullptr;
6701     }
6702     // A value of -1 is used to check if we need to emit no teams region
6703     DefaultVal = -1;
6704     return nullptr;
6705   }
6706   case OMPD_target_teams:
6707   case OMPD_target_teams_distribute:
6708   case OMPD_target_teams_distribute_simd:
6709   case OMPD_target_teams_distribute_parallel_for:
6710   case OMPD_target_teams_distribute_parallel_for_simd: {
6711     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6712       const Expr *NumTeams =
6713           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6714       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6715         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6716           DefaultVal = Constant->getExtValue();
6717       return NumTeams;
6718     }
6719     DefaultVal = 0;
6720     return nullptr;
6721   }
6722   case OMPD_target_parallel:
6723   case OMPD_target_parallel_for:
6724   case OMPD_target_parallel_for_simd:
6725   case OMPD_target_simd:
6726     DefaultVal = 1;
6727     return nullptr;
6728   case OMPD_parallel:
6729   case OMPD_for:
6730   case OMPD_parallel_for:
6731   case OMPD_parallel_master:
6732   case OMPD_parallel_sections:
6733   case OMPD_for_simd:
6734   case OMPD_parallel_for_simd:
6735   case OMPD_cancel:
6736   case OMPD_cancellation_point:
6737   case OMPD_ordered:
6738   case OMPD_threadprivate:
6739   case OMPD_allocate:
6740   case OMPD_task:
6741   case OMPD_simd:
6742   case OMPD_tile:
6743   case OMPD_unroll:
6744   case OMPD_sections:
6745   case OMPD_section:
6746   case OMPD_single:
6747   case OMPD_master:
6748   case OMPD_critical:
6749   case OMPD_taskyield:
6750   case OMPD_barrier:
6751   case OMPD_taskwait:
6752   case OMPD_taskgroup:
6753   case OMPD_atomic:
6754   case OMPD_flush:
6755   case OMPD_depobj:
6756   case OMPD_scan:
6757   case OMPD_teams:
6758   case OMPD_target_data:
6759   case OMPD_target_exit_data:
6760   case OMPD_target_enter_data:
6761   case OMPD_distribute:
6762   case OMPD_distribute_simd:
6763   case OMPD_distribute_parallel_for:
6764   case OMPD_distribute_parallel_for_simd:
6765   case OMPD_teams_distribute:
6766   case OMPD_teams_distribute_simd:
6767   case OMPD_teams_distribute_parallel_for:
6768   case OMPD_teams_distribute_parallel_for_simd:
6769   case OMPD_target_update:
6770   case OMPD_declare_simd:
6771   case OMPD_declare_variant:
6772   case OMPD_begin_declare_variant:
6773   case OMPD_end_declare_variant:
6774   case OMPD_declare_target:
6775   case OMPD_end_declare_target:
6776   case OMPD_declare_reduction:
6777   case OMPD_declare_mapper:
6778   case OMPD_taskloop:
6779   case OMPD_taskloop_simd:
6780   case OMPD_master_taskloop:
6781   case OMPD_master_taskloop_simd:
6782   case OMPD_parallel_master_taskloop:
6783   case OMPD_parallel_master_taskloop_simd:
6784   case OMPD_requires:
6785   case OMPD_metadirective:
6786   case OMPD_unknown:
6787     break;
6788   default:
6789     break;
6790   }
6791   llvm_unreachable("Unexpected directive kind.");
6792 }
6793 
6794 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6795     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6796   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6797          "Clauses associated with the teams directive expected to be emitted "
6798          "only for the host!");
6799   CGBuilderTy &Bld = CGF.Builder;
6800   int32_t DefaultNT = -1;
6801   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6802   if (NumTeams != nullptr) {
6803     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6804 
6805     switch (DirectiveKind) {
6806     case OMPD_target: {
6807       const auto *CS = D.getInnermostCapturedStmt();
6808       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6809       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6810       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6811                                                   /*IgnoreResultAssign*/ true);
6812       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6813                              /*isSigned=*/true);
6814     }
6815     case OMPD_target_teams:
6816     case OMPD_target_teams_distribute:
6817     case OMPD_target_teams_distribute_simd:
6818     case OMPD_target_teams_distribute_parallel_for:
6819     case OMPD_target_teams_distribute_parallel_for_simd: {
6820       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6821       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6822                                                   /*IgnoreResultAssign*/ true);
6823       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6824                              /*isSigned=*/true);
6825     }
6826     default:
6827       break;
6828     }
6829   } else if (DefaultNT == -1) {
6830     return nullptr;
6831   }
6832 
6833   return Bld.getInt32(DefaultNT);
6834 }
6835 
6836 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6837                                   llvm::Value *DefaultThreadLimitVal) {
6838   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6839       CGF.getContext(), CS->getCapturedStmt());
6840   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6841     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6842       llvm::Value *NumThreads = nullptr;
6843       llvm::Value *CondVal = nullptr;
6844       // Handle if clause. If if clause present, the number of threads is
6845       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6846       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6847         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6848         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6849         const OMPIfClause *IfClause = nullptr;
6850         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6851           if (C->getNameModifier() == OMPD_unknown ||
6852               C->getNameModifier() == OMPD_parallel) {
6853             IfClause = C;
6854             break;
6855           }
6856         }
6857         if (IfClause) {
6858           const Expr *Cond = IfClause->getCondition();
6859           bool Result;
6860           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6861             if (!Result)
6862               return CGF.Builder.getInt32(1);
6863           } else {
6864             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6865             if (const auto *PreInit =
6866                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6867               for (const auto *I : PreInit->decls()) {
6868                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6869                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6870                 } else {
6871                   CodeGenFunction::AutoVarEmission Emission =
6872                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6873                   CGF.EmitAutoVarCleanups(Emission);
6874                 }
6875               }
6876             }
6877             CondVal = CGF.EvaluateExprAsBool(Cond);
6878           }
6879         }
6880       }
6881       // Check the value of num_threads clause iff if clause was not specified
6882       // or is not evaluated to false.
6883       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6884         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6885         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6886         const auto *NumThreadsClause =
6887             Dir->getSingleClause<OMPNumThreadsClause>();
6888         CodeGenFunction::LexicalScope Scope(
6889             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6890         if (const auto *PreInit =
6891                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6892           for (const auto *I : PreInit->decls()) {
6893             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6894               CGF.EmitVarDecl(cast<VarDecl>(*I));
6895             } else {
6896               CodeGenFunction::AutoVarEmission Emission =
6897                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6898               CGF.EmitAutoVarCleanups(Emission);
6899             }
6900           }
6901         }
6902         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6903         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6904                                                /*isSigned=*/false);
6905         if (DefaultThreadLimitVal)
6906           NumThreads = CGF.Builder.CreateSelect(
6907               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6908               DefaultThreadLimitVal, NumThreads);
6909       } else {
6910         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6911                                            : CGF.Builder.getInt32(0);
6912       }
6913       // Process condition of the if clause.
6914       if (CondVal) {
6915         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6916                                               CGF.Builder.getInt32(1));
6917       }
6918       return NumThreads;
6919     }
6920     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6921       return CGF.Builder.getInt32(1);
6922     return DefaultThreadLimitVal;
6923   }
6924   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6925                                : CGF.Builder.getInt32(0);
6926 }
6927 
6928 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6929     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6930     int32_t &DefaultVal) {
6931   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6932   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6933          "Expected target-based executable directive.");
6934 
6935   switch (DirectiveKind) {
6936   case OMPD_target:
6937     // Teams have no clause thread_limit
6938     return nullptr;
6939   case OMPD_target_teams:
6940   case OMPD_target_teams_distribute:
6941     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6942       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6943       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6944       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6945         if (auto Constant =
6946                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6947           DefaultVal = Constant->getExtValue();
6948       return ThreadLimit;
6949     }
6950     return nullptr;
6951   case OMPD_target_parallel:
6952   case OMPD_target_parallel_for:
6953   case OMPD_target_parallel_for_simd:
6954   case OMPD_target_teams_distribute_parallel_for:
6955   case OMPD_target_teams_distribute_parallel_for_simd: {
6956     Expr *ThreadLimit = nullptr;
6957     Expr *NumThreads = nullptr;
6958     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6959       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6960       ThreadLimit = ThreadLimitClause->getThreadLimit();
6961       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6962         if (auto Constant =
6963                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6964           DefaultVal = Constant->getExtValue();
6965     }
6966     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6967       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6968       NumThreads = NumThreadsClause->getNumThreads();
6969       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6970         if (auto Constant =
6971                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6972           if (Constant->getExtValue() < DefaultVal) {
6973             DefaultVal = Constant->getExtValue();
6974             ThreadLimit = NumThreads;
6975           }
6976         }
6977       }
6978     }
6979     return ThreadLimit;
6980   }
6981   case OMPD_target_teams_distribute_simd:
6982   case OMPD_target_simd:
6983     DefaultVal = 1;
6984     return nullptr;
6985   case OMPD_parallel:
6986   case OMPD_for:
6987   case OMPD_parallel_for:
6988   case OMPD_parallel_master:
6989   case OMPD_parallel_sections:
6990   case OMPD_for_simd:
6991   case OMPD_parallel_for_simd:
6992   case OMPD_cancel:
6993   case OMPD_cancellation_point:
6994   case OMPD_ordered:
6995   case OMPD_threadprivate:
6996   case OMPD_allocate:
6997   case OMPD_task:
6998   case OMPD_simd:
6999   case OMPD_tile:
7000   case OMPD_unroll:
7001   case OMPD_sections:
7002   case OMPD_section:
7003   case OMPD_single:
7004   case OMPD_master:
7005   case OMPD_critical:
7006   case OMPD_taskyield:
7007   case OMPD_barrier:
7008   case OMPD_taskwait:
7009   case OMPD_taskgroup:
7010   case OMPD_atomic:
7011   case OMPD_flush:
7012   case OMPD_depobj:
7013   case OMPD_scan:
7014   case OMPD_teams:
7015   case OMPD_target_data:
7016   case OMPD_target_exit_data:
7017   case OMPD_target_enter_data:
7018   case OMPD_distribute:
7019   case OMPD_distribute_simd:
7020   case OMPD_distribute_parallel_for:
7021   case OMPD_distribute_parallel_for_simd:
7022   case OMPD_teams_distribute:
7023   case OMPD_teams_distribute_simd:
7024   case OMPD_teams_distribute_parallel_for:
7025   case OMPD_teams_distribute_parallel_for_simd:
7026   case OMPD_target_update:
7027   case OMPD_declare_simd:
7028   case OMPD_declare_variant:
7029   case OMPD_begin_declare_variant:
7030   case OMPD_end_declare_variant:
7031   case OMPD_declare_target:
7032   case OMPD_end_declare_target:
7033   case OMPD_declare_reduction:
7034   case OMPD_declare_mapper:
7035   case OMPD_taskloop:
7036   case OMPD_taskloop_simd:
7037   case OMPD_master_taskloop:
7038   case OMPD_master_taskloop_simd:
7039   case OMPD_parallel_master_taskloop:
7040   case OMPD_parallel_master_taskloop_simd:
7041   case OMPD_requires:
7042   case OMPD_unknown:
7043     break;
7044   default:
7045     break;
7046   }
7047   llvm_unreachable("Unsupported directive kind.");
7048 }
7049 
7050 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7051     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7052   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7053          "Clauses associated with the teams directive expected to be emitted "
7054          "only for the host!");
7055   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7056   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7057          "Expected target-based executable directive.");
7058   CGBuilderTy &Bld = CGF.Builder;
7059   llvm::Value *ThreadLimitVal = nullptr;
7060   llvm::Value *NumThreadsVal = nullptr;
7061   switch (DirectiveKind) {
7062   case OMPD_target: {
7063     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7064     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7065       return NumThreads;
7066     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7067         CGF.getContext(), CS->getCapturedStmt());
7068     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7069       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7070         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7071         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7072         const auto *ThreadLimitClause =
7073             Dir->getSingleClause<OMPThreadLimitClause>();
7074         CodeGenFunction::LexicalScope Scope(
7075             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7076         if (const auto *PreInit =
7077                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7078           for (const auto *I : PreInit->decls()) {
7079             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7080               CGF.EmitVarDecl(cast<VarDecl>(*I));
7081             } else {
7082               CodeGenFunction::AutoVarEmission Emission =
7083                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7084               CGF.EmitAutoVarCleanups(Emission);
7085             }
7086           }
7087         }
7088         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7089             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7090         ThreadLimitVal =
7091             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7092       }
7093       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7094           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7095         CS = Dir->getInnermostCapturedStmt();
7096         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7097             CGF.getContext(), CS->getCapturedStmt());
7098         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7099       }
7100       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7101           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7102         CS = Dir->getInnermostCapturedStmt();
7103         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7104           return NumThreads;
7105       }
7106       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7107         return Bld.getInt32(1);
7108     }
7109     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7110   }
7111   case OMPD_target_teams: {
7112     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7113       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7114       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7115       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7116           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7117       ThreadLimitVal =
7118           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7119     }
7120     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7121     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7122       return NumThreads;
7123     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7124         CGF.getContext(), CS->getCapturedStmt());
7125     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7126       if (Dir->getDirectiveKind() == OMPD_distribute) {
7127         CS = Dir->getInnermostCapturedStmt();
7128         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7129           return NumThreads;
7130       }
7131     }
7132     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7133   }
7134   case OMPD_target_teams_distribute:
7135     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7136       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7137       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7138       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7139           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7140       ThreadLimitVal =
7141           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7142     }
7143     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7144   case OMPD_target_parallel:
7145   case OMPD_target_parallel_for:
7146   case OMPD_target_parallel_for_simd:
7147   case OMPD_target_teams_distribute_parallel_for:
7148   case OMPD_target_teams_distribute_parallel_for_simd: {
7149     llvm::Value *CondVal = nullptr;
7150     // Handle if clause. If if clause present, the number of threads is
7151     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7152     if (D.hasClausesOfKind<OMPIfClause>()) {
7153       const OMPIfClause *IfClause = nullptr;
7154       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7155         if (C->getNameModifier() == OMPD_unknown ||
7156             C->getNameModifier() == OMPD_parallel) {
7157           IfClause = C;
7158           break;
7159         }
7160       }
7161       if (IfClause) {
7162         const Expr *Cond = IfClause->getCondition();
7163         bool Result;
7164         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7165           if (!Result)
7166             return Bld.getInt32(1);
7167         } else {
7168           CodeGenFunction::RunCleanupsScope Scope(CGF);
7169           CondVal = CGF.EvaluateExprAsBool(Cond);
7170         }
7171       }
7172     }
7173     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7174       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7175       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7176       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7177           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7178       ThreadLimitVal =
7179           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7180     }
7181     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7182       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7183       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7184       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7185           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7186       NumThreadsVal =
7187           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7188       ThreadLimitVal = ThreadLimitVal
7189                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7190                                                                 ThreadLimitVal),
7191                                               NumThreadsVal, ThreadLimitVal)
7192                            : NumThreadsVal;
7193     }
7194     if (!ThreadLimitVal)
7195       ThreadLimitVal = Bld.getInt32(0);
7196     if (CondVal)
7197       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7198     return ThreadLimitVal;
7199   }
7200   case OMPD_target_teams_distribute_simd:
7201   case OMPD_target_simd:
7202     return Bld.getInt32(1);
7203   case OMPD_parallel:
7204   case OMPD_for:
7205   case OMPD_parallel_for:
7206   case OMPD_parallel_master:
7207   case OMPD_parallel_sections:
7208   case OMPD_for_simd:
7209   case OMPD_parallel_for_simd:
7210   case OMPD_cancel:
7211   case OMPD_cancellation_point:
7212   case OMPD_ordered:
7213   case OMPD_threadprivate:
7214   case OMPD_allocate:
7215   case OMPD_task:
7216   case OMPD_simd:
7217   case OMPD_tile:
7218   case OMPD_unroll:
7219   case OMPD_sections:
7220   case OMPD_section:
7221   case OMPD_single:
7222   case OMPD_master:
7223   case OMPD_critical:
7224   case OMPD_taskyield:
7225   case OMPD_barrier:
7226   case OMPD_taskwait:
7227   case OMPD_taskgroup:
7228   case OMPD_atomic:
7229   case OMPD_flush:
7230   case OMPD_depobj:
7231   case OMPD_scan:
7232   case OMPD_teams:
7233   case OMPD_target_data:
7234   case OMPD_target_exit_data:
7235   case OMPD_target_enter_data:
7236   case OMPD_distribute:
7237   case OMPD_distribute_simd:
7238   case OMPD_distribute_parallel_for:
7239   case OMPD_distribute_parallel_for_simd:
7240   case OMPD_teams_distribute:
7241   case OMPD_teams_distribute_simd:
7242   case OMPD_teams_distribute_parallel_for:
7243   case OMPD_teams_distribute_parallel_for_simd:
7244   case OMPD_target_update:
7245   case OMPD_declare_simd:
7246   case OMPD_declare_variant:
7247   case OMPD_begin_declare_variant:
7248   case OMPD_end_declare_variant:
7249   case OMPD_declare_target:
7250   case OMPD_end_declare_target:
7251   case OMPD_declare_reduction:
7252   case OMPD_declare_mapper:
7253   case OMPD_taskloop:
7254   case OMPD_taskloop_simd:
7255   case OMPD_master_taskloop:
7256   case OMPD_master_taskloop_simd:
7257   case OMPD_parallel_master_taskloop:
7258   case OMPD_parallel_master_taskloop_simd:
7259   case OMPD_requires:
7260   case OMPD_metadirective:
7261   case OMPD_unknown:
7262     break;
7263   default:
7264     break;
7265   }
7266   llvm_unreachable("Unsupported directive kind.");
7267 }
7268 
7269 namespace {
7270 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7271 
7272 // Utility to handle information from clauses associated with a given
7273 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7274 // It provides a convenient interface to obtain the information and generate
7275 // code for that information.
7276 class MappableExprsHandler {
7277 public:
7278   /// Values for bit flags used to specify the mapping type for
7279   /// offloading.
7280   enum OpenMPOffloadMappingFlags : uint64_t {
7281     /// No flags
7282     OMP_MAP_NONE = 0x0,
7283     /// Allocate memory on the device and move data from host to device.
7284     OMP_MAP_TO = 0x01,
7285     /// Allocate memory on the device and move data from device to host.
7286     OMP_MAP_FROM = 0x02,
7287     /// Always perform the requested mapping action on the element, even
7288     /// if it was already mapped before.
7289     OMP_MAP_ALWAYS = 0x04,
7290     /// Delete the element from the device environment, ignoring the
7291     /// current reference count associated with the element.
7292     OMP_MAP_DELETE = 0x08,
7293     /// The element being mapped is a pointer-pointee pair; both the
7294     /// pointer and the pointee should be mapped.
7295     OMP_MAP_PTR_AND_OBJ = 0x10,
7296     /// This flags signals that the base address of an entry should be
7297     /// passed to the target kernel as an argument.
7298     OMP_MAP_TARGET_PARAM = 0x20,
7299     /// Signal that the runtime library has to return the device pointer
7300     /// in the current position for the data being mapped. Used when we have the
7301     /// use_device_ptr or use_device_addr clause.
7302     OMP_MAP_RETURN_PARAM = 0x40,
7303     /// This flag signals that the reference being passed is a pointer to
7304     /// private data.
7305     OMP_MAP_PRIVATE = 0x80,
7306     /// Pass the element to the device by value.
7307     OMP_MAP_LITERAL = 0x100,
7308     /// Implicit map
7309     OMP_MAP_IMPLICIT = 0x200,
7310     /// Close is a hint to the runtime to allocate memory close to
7311     /// the target device.
7312     OMP_MAP_CLOSE = 0x400,
7313     /// 0x800 is reserved for compatibility with XLC.
7314     /// Produce a runtime error if the data is not already allocated.
7315     OMP_MAP_PRESENT = 0x1000,
7316     // Increment and decrement a separate reference counter so that the data
7317     // cannot be unmapped within the associated region.  Thus, this flag is
7318     // intended to be used on 'target' and 'target data' directives because they
7319     // are inherently structured.  It is not intended to be used on 'target
7320     // enter data' and 'target exit data' directives because they are inherently
7321     // dynamic.
7322     // This is an OpenMP extension for the sake of OpenACC support.
7323     OMP_MAP_OMPX_HOLD = 0x2000,
7324     /// Signal that the runtime library should use args as an array of
7325     /// descriptor_dim pointers and use args_size as dims. Used when we have
7326     /// non-contiguous list items in target update directive
7327     OMP_MAP_NON_CONTIG = 0x100000000000,
7328     /// The 16 MSBs of the flags indicate whether the entry is member of some
7329     /// struct/class.
7330     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7331     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7332   };
7333 
7334   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7335   static unsigned getFlagMemberOffset() {
7336     unsigned Offset = 0;
7337     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7338          Remain = Remain >> 1)
7339       Offset++;
7340     return Offset;
7341   }
7342 
7343   /// Class that holds debugging information for a data mapping to be passed to
7344   /// the runtime library.
7345   class MappingExprInfo {
7346     /// The variable declaration used for the data mapping.
7347     const ValueDecl *MapDecl = nullptr;
7348     /// The original expression used in the map clause, or null if there is
7349     /// none.
7350     const Expr *MapExpr = nullptr;
7351 
7352   public:
7353     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7354         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7355 
7356     const ValueDecl *getMapDecl() const { return MapDecl; }
7357     const Expr *getMapExpr() const { return MapExpr; }
7358   };
7359 
7360   /// Class that associates information with a base pointer to be passed to the
7361   /// runtime library.
7362   class BasePointerInfo {
7363     /// The base pointer.
7364     llvm::Value *Ptr = nullptr;
7365     /// The base declaration that refers to this device pointer, or null if
7366     /// there is none.
7367     const ValueDecl *DevPtrDecl = nullptr;
7368 
7369   public:
7370     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7371         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7372     llvm::Value *operator*() const { return Ptr; }
7373     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7374     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7375   };
7376 
7377   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7378   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7379   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7380   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7381   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7382   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7383   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7384 
7385   /// This structure contains combined information generated for mappable
7386   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7387   /// mappers, and non-contiguous information.
7388   struct MapCombinedInfoTy {
7389     struct StructNonContiguousInfo {
7390       bool IsNonContiguous = false;
7391       MapDimArrayTy Dims;
7392       MapNonContiguousArrayTy Offsets;
7393       MapNonContiguousArrayTy Counts;
7394       MapNonContiguousArrayTy Strides;
7395     };
7396     MapExprsArrayTy Exprs;
7397     MapBaseValuesArrayTy BasePointers;
7398     MapValuesArrayTy Pointers;
7399     MapValuesArrayTy Sizes;
7400     MapFlagsArrayTy Types;
7401     MapMappersArrayTy Mappers;
7402     StructNonContiguousInfo NonContigInfo;
7403 
7404     /// Append arrays in \a CurInfo.
7405     void append(MapCombinedInfoTy &CurInfo) {
7406       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7407       BasePointers.append(CurInfo.BasePointers.begin(),
7408                           CurInfo.BasePointers.end());
7409       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7410       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7411       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7412       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7413       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7414                                  CurInfo.NonContigInfo.Dims.end());
7415       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7416                                     CurInfo.NonContigInfo.Offsets.end());
7417       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7418                                    CurInfo.NonContigInfo.Counts.end());
7419       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7420                                     CurInfo.NonContigInfo.Strides.end());
7421     }
7422   };
7423 
7424   /// Map between a struct and the its lowest & highest elements which have been
7425   /// mapped.
7426   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7427   ///                    HE(FieldIndex, Pointer)}
7428   struct StructRangeInfoTy {
7429     MapCombinedInfoTy PreliminaryMapData;
7430     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7431         0, Address::invalid()};
7432     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7433         0, Address::invalid()};
7434     Address Base = Address::invalid();
7435     Address LB = Address::invalid();
7436     bool IsArraySection = false;
7437     bool HasCompleteRecord = false;
7438   };
7439 
7440 private:
7441   /// Kind that defines how a device pointer has to be returned.
7442   struct MapInfo {
7443     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7444     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7445     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7446     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7447     bool ReturnDevicePointer = false;
7448     bool IsImplicit = false;
7449     const ValueDecl *Mapper = nullptr;
7450     const Expr *VarRef = nullptr;
7451     bool ForDeviceAddr = false;
7452 
7453     MapInfo() = default;
7454     MapInfo(
7455         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7456         OpenMPMapClauseKind MapType,
7457         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7458         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7459         bool ReturnDevicePointer, bool IsImplicit,
7460         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7461         bool ForDeviceAddr = false)
7462         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7463           MotionModifiers(MotionModifiers),
7464           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7465           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7466   };
7467 
7468   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7469   /// member and there is no map information about it, then emission of that
7470   /// entry is deferred until the whole struct has been processed.
7471   struct DeferredDevicePtrEntryTy {
7472     const Expr *IE = nullptr;
7473     const ValueDecl *VD = nullptr;
7474     bool ForDeviceAddr = false;
7475 
7476     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7477                              bool ForDeviceAddr)
7478         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7479   };
7480 
7481   /// The target directive from where the mappable clauses were extracted. It
7482   /// is either a executable directive or a user-defined mapper directive.
7483   llvm::PointerUnion<const OMPExecutableDirective *,
7484                      const OMPDeclareMapperDecl *>
7485       CurDir;
7486 
7487   /// Function the directive is being generated for.
7488   CodeGenFunction &CGF;
7489 
7490   /// Set of all first private variables in the current directive.
7491   /// bool data is set to true if the variable is implicitly marked as
7492   /// firstprivate, false otherwise.
7493   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7494 
7495   /// Map between device pointer declarations and their expression components.
7496   /// The key value for declarations in 'this' is null.
7497   llvm::DenseMap<
7498       const ValueDecl *,
7499       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7500       DevPointersMap;
7501 
7502   /// Map between lambda declarations and their map type.
7503   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7504 
7505   llvm::Value *getExprTypeSize(const Expr *E) const {
7506     QualType ExprTy = E->getType().getCanonicalType();
7507 
7508     // Calculate the size for array shaping expression.
7509     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7510       llvm::Value *Size =
7511           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7512       for (const Expr *SE : OAE->getDimensions()) {
7513         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7514         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7515                                       CGF.getContext().getSizeType(),
7516                                       SE->getExprLoc());
7517         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7518       }
7519       return Size;
7520     }
7521 
7522     // Reference types are ignored for mapping purposes.
7523     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7524       ExprTy = RefTy->getPointeeType().getCanonicalType();
7525 
7526     // Given that an array section is considered a built-in type, we need to
7527     // do the calculation based on the length of the section instead of relying
7528     // on CGF.getTypeSize(E->getType()).
7529     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7530       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7531                             OAE->getBase()->IgnoreParenImpCasts())
7532                             .getCanonicalType();
7533 
7534       // If there is no length associated with the expression and lower bound is
7535       // not specified too, that means we are using the whole length of the
7536       // base.
7537       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7538           !OAE->getLowerBound())
7539         return CGF.getTypeSize(BaseTy);
7540 
7541       llvm::Value *ElemSize;
7542       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7543         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7544       } else {
7545         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7546         assert(ATy && "Expecting array type if not a pointer type.");
7547         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7548       }
7549 
7550       // If we don't have a length at this point, that is because we have an
7551       // array section with a single element.
7552       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7553         return ElemSize;
7554 
7555       if (const Expr *LenExpr = OAE->getLength()) {
7556         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7557         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7558                                              CGF.getContext().getSizeType(),
7559                                              LenExpr->getExprLoc());
7560         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7561       }
7562       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7563              OAE->getLowerBound() && "expected array_section[lb:].");
7564       // Size = sizetype - lb * elemtype;
7565       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7566       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7567       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7568                                        CGF.getContext().getSizeType(),
7569                                        OAE->getLowerBound()->getExprLoc());
7570       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7571       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7572       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7573       LengthVal = CGF.Builder.CreateSelect(
7574           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7575       return LengthVal;
7576     }
7577     return CGF.getTypeSize(ExprTy);
7578   }
7579 
7580   /// Return the corresponding bits for a given map clause modifier. Add
7581   /// a flag marking the map as a pointer if requested. Add a flag marking the
7582   /// map as the first one of a series of maps that relate to the same map
7583   /// expression.
7584   OpenMPOffloadMappingFlags getMapTypeBits(
7585       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7586       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7587       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7588     OpenMPOffloadMappingFlags Bits =
7589         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7590     switch (MapType) {
7591     case OMPC_MAP_alloc:
7592     case OMPC_MAP_release:
7593       // alloc and release is the default behavior in the runtime library,  i.e.
7594       // if we don't pass any bits alloc/release that is what the runtime is
7595       // going to do. Therefore, we don't need to signal anything for these two
7596       // type modifiers.
7597       break;
7598     case OMPC_MAP_to:
7599       Bits |= OMP_MAP_TO;
7600       break;
7601     case OMPC_MAP_from:
7602       Bits |= OMP_MAP_FROM;
7603       break;
7604     case OMPC_MAP_tofrom:
7605       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7606       break;
7607     case OMPC_MAP_delete:
7608       Bits |= OMP_MAP_DELETE;
7609       break;
7610     case OMPC_MAP_unknown:
7611       llvm_unreachable("Unexpected map type!");
7612     }
7613     if (AddPtrFlag)
7614       Bits |= OMP_MAP_PTR_AND_OBJ;
7615     if (AddIsTargetParamFlag)
7616       Bits |= OMP_MAP_TARGET_PARAM;
7617     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7618       Bits |= OMP_MAP_ALWAYS;
7619     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7620       Bits |= OMP_MAP_CLOSE;
7621     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7622         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7623       Bits |= OMP_MAP_PRESENT;
7624     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7625       Bits |= OMP_MAP_OMPX_HOLD;
7626     if (IsNonContiguous)
7627       Bits |= OMP_MAP_NON_CONTIG;
7628     return Bits;
7629   }
7630 
7631   /// Return true if the provided expression is a final array section. A
7632   /// final array section, is one whose length can't be proved to be one.
7633   bool isFinalArraySectionExpression(const Expr *E) const {
7634     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7635 
7636     // It is not an array section and therefore not a unity-size one.
7637     if (!OASE)
7638       return false;
7639 
7640     // An array section with no colon always refer to a single element.
7641     if (OASE->getColonLocFirst().isInvalid())
7642       return false;
7643 
7644     const Expr *Length = OASE->getLength();
7645 
7646     // If we don't have a length we have to check if the array has size 1
7647     // for this dimension. Also, we should always expect a length if the
7648     // base type is pointer.
7649     if (!Length) {
7650       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7651                              OASE->getBase()->IgnoreParenImpCasts())
7652                              .getCanonicalType();
7653       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7654         return ATy->getSize().getSExtValue() != 1;
7655       // If we don't have a constant dimension length, we have to consider
7656       // the current section as having any size, so it is not necessarily
7657       // unitary. If it happen to be unity size, that's user fault.
7658       return true;
7659     }
7660 
7661     // Check if the length evaluates to 1.
7662     Expr::EvalResult Result;
7663     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7664       return true; // Can have more that size 1.
7665 
7666     llvm::APSInt ConstLength = Result.Val.getInt();
7667     return ConstLength.getSExtValue() != 1;
7668   }
7669 
7670   /// Generate the base pointers, section pointers, sizes, map type bits, and
7671   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7672   /// map type, map or motion modifiers, and expression components.
7673   /// \a IsFirstComponent should be set to true if the provided set of
7674   /// components is the first associated with a capture.
7675   void generateInfoForComponentList(
7676       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7677       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7678       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7679       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7680       bool IsFirstComponentList, bool IsImplicit,
7681       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7682       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7683       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7684           OverlappedElements = llvm::None) const {
7685     // The following summarizes what has to be generated for each map and the
7686     // types below. The generated information is expressed in this order:
7687     // base pointer, section pointer, size, flags
7688     // (to add to the ones that come from the map type and modifier).
7689     //
7690     // double d;
7691     // int i[100];
7692     // float *p;
7693     //
7694     // struct S1 {
7695     //   int i;
7696     //   float f[50];
7697     // }
7698     // struct S2 {
7699     //   int i;
7700     //   float f[50];
7701     //   S1 s;
7702     //   double *p;
7703     //   struct S2 *ps;
7704     //   int &ref;
7705     // }
7706     // S2 s;
7707     // S2 *ps;
7708     //
7709     // map(d)
7710     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7711     //
7712     // map(i)
7713     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7714     //
7715     // map(i[1:23])
7716     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7717     //
7718     // map(p)
7719     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7720     //
7721     // map(p[1:24])
7722     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7723     // in unified shared memory mode or for local pointers
7724     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7725     //
7726     // map(s)
7727     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7728     //
7729     // map(s.i)
7730     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7731     //
7732     // map(s.s.f)
7733     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7734     //
7735     // map(s.p)
7736     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7737     //
7738     // map(to: s.p[:22])
7739     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7740     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7741     // &(s.p), &(s.p[0]), 22*sizeof(double),
7742     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7743     // (*) alloc space for struct members, only this is a target parameter
7744     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7745     //      optimizes this entry out, same in the examples below)
7746     // (***) map the pointee (map: to)
7747     //
7748     // map(to: s.ref)
7749     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7750     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7751     // (*) alloc space for struct members, only this is a target parameter
7752     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7753     //      optimizes this entry out, same in the examples below)
7754     // (***) map the pointee (map: to)
7755     //
7756     // map(s.ps)
7757     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7758     //
7759     // map(from: s.ps->s.i)
7760     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7761     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7762     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7763     //
7764     // map(to: s.ps->ps)
7765     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7766     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7767     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7768     //
7769     // map(s.ps->ps->ps)
7770     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7771     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7772     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7773     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7774     //
7775     // map(to: s.ps->ps->s.f[:22])
7776     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7777     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7778     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7779     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7780     //
7781     // map(ps)
7782     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7783     //
7784     // map(ps->i)
7785     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7786     //
7787     // map(ps->s.f)
7788     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7789     //
7790     // map(from: ps->p)
7791     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7792     //
7793     // map(to: ps->p[:22])
7794     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7795     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7796     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7797     //
7798     // map(ps->ps)
7799     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7800     //
7801     // map(from: ps->ps->s.i)
7802     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7803     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7804     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7805     //
7806     // map(from: ps->ps->ps)
7807     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7808     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7809     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7810     //
7811     // map(ps->ps->ps->ps)
7812     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7813     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7814     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7815     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7816     //
7817     // map(to: ps->ps->ps->s.f[:22])
7818     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7819     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7820     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7821     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7822     //
7823     // map(to: s.f[:22]) map(from: s.p[:33])
7824     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7825     //     sizeof(double*) (**), TARGET_PARAM
7826     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7827     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7828     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7829     // (*) allocate contiguous space needed to fit all mapped members even if
7830     //     we allocate space for members not mapped (in this example,
7831     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7832     //     them as well because they fall between &s.f[0] and &s.p)
7833     //
7834     // map(from: s.f[:22]) map(to: ps->p[:33])
7835     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7836     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7837     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7838     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7839     // (*) the struct this entry pertains to is the 2nd element in the list of
7840     //     arguments, hence MEMBER_OF(2)
7841     //
7842     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7843     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7844     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7845     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7846     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7847     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7848     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7849     // (*) the struct this entry pertains to is the 4th element in the list
7850     //     of arguments, hence MEMBER_OF(4)
7851 
7852     // Track if the map information being generated is the first for a capture.
7853     bool IsCaptureFirstInfo = IsFirstComponentList;
7854     // When the variable is on a declare target link or in a to clause with
7855     // unified memory, a reference is needed to hold the host/device address
7856     // of the variable.
7857     bool RequiresReference = false;
7858 
7859     // Scan the components from the base to the complete expression.
7860     auto CI = Components.rbegin();
7861     auto CE = Components.rend();
7862     auto I = CI;
7863 
7864     // Track if the map information being generated is the first for a list of
7865     // components.
7866     bool IsExpressionFirstInfo = true;
7867     bool FirstPointerInComplexData = false;
7868     Address BP = Address::invalid();
7869     const Expr *AssocExpr = I->getAssociatedExpression();
7870     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7871     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7872     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7873 
7874     if (isa<MemberExpr>(AssocExpr)) {
7875       // The base is the 'this' pointer. The content of the pointer is going
7876       // to be the base of the field being mapped.
7877       BP = CGF.LoadCXXThisAddress();
7878     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7879                (OASE &&
7880                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7881       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7882     } else if (OAShE &&
7883                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7884       BP = Address::deprecated(
7885           CGF.EmitScalarExpr(OAShE->getBase()),
7886           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7887     } else {
7888       // The base is the reference to the variable.
7889       // BP = &Var.
7890       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7891       if (const auto *VD =
7892               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7893         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7894                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7895           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7896               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7897                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7898             RequiresReference = true;
7899             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7900           }
7901         }
7902       }
7903 
7904       // If the variable is a pointer and is being dereferenced (i.e. is not
7905       // the last component), the base has to be the pointer itself, not its
7906       // reference. References are ignored for mapping purposes.
7907       QualType Ty =
7908           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7909       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7910         // No need to generate individual map information for the pointer, it
7911         // can be associated with the combined storage if shared memory mode is
7912         // active or the base declaration is not global variable.
7913         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7914         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7915             !VD || VD->hasLocalStorage())
7916           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7917         else
7918           FirstPointerInComplexData = true;
7919         ++I;
7920       }
7921     }
7922 
7923     // Track whether a component of the list should be marked as MEMBER_OF some
7924     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7925     // in a component list should be marked as MEMBER_OF, all subsequent entries
7926     // do not belong to the base struct. E.g.
7927     // struct S2 s;
7928     // s.ps->ps->ps->f[:]
7929     //   (1) (2) (3) (4)
7930     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7931     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7932     // is the pointee of ps(2) which is not member of struct s, so it should not
7933     // be marked as such (it is still PTR_AND_OBJ).
7934     // The variable is initialized to false so that PTR_AND_OBJ entries which
7935     // are not struct members are not considered (e.g. array of pointers to
7936     // data).
7937     bool ShouldBeMemberOf = false;
7938 
7939     // Variable keeping track of whether or not we have encountered a component
7940     // in the component list which is a member expression. Useful when we have a
7941     // pointer or a final array section, in which case it is the previous
7942     // component in the list which tells us whether we have a member expression.
7943     // E.g. X.f[:]
7944     // While processing the final array section "[:]" it is "f" which tells us
7945     // whether we are dealing with a member of a declared struct.
7946     const MemberExpr *EncounteredME = nullptr;
7947 
7948     // Track for the total number of dimension. Start from one for the dummy
7949     // dimension.
7950     uint64_t DimSize = 1;
7951 
7952     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7953     bool IsPrevMemberReference = false;
7954 
7955     for (; I != CE; ++I) {
7956       // If the current component is member of a struct (parent struct) mark it.
7957       if (!EncounteredME) {
7958         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7959         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7960         // as MEMBER_OF the parent struct.
7961         if (EncounteredME) {
7962           ShouldBeMemberOf = true;
7963           // Do not emit as complex pointer if this is actually not array-like
7964           // expression.
7965           if (FirstPointerInComplexData) {
7966             QualType Ty = std::prev(I)
7967                               ->getAssociatedDeclaration()
7968                               ->getType()
7969                               .getNonReferenceType();
7970             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7971             FirstPointerInComplexData = false;
7972           }
7973         }
7974       }
7975 
7976       auto Next = std::next(I);
7977 
7978       // We need to generate the addresses and sizes if this is the last
7979       // component, if the component is a pointer or if it is an array section
7980       // whose length can't be proved to be one. If this is a pointer, it
7981       // becomes the base address for the following components.
7982 
7983       // A final array section, is one whose length can't be proved to be one.
7984       // If the map item is non-contiguous then we don't treat any array section
7985       // as final array section.
7986       bool IsFinalArraySection =
7987           !IsNonContiguous &&
7988           isFinalArraySectionExpression(I->getAssociatedExpression());
7989 
7990       // If we have a declaration for the mapping use that, otherwise use
7991       // the base declaration of the map clause.
7992       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7993                                      ? I->getAssociatedDeclaration()
7994                                      : BaseDecl;
7995       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7996                                                : MapExpr;
7997 
7998       // Get information on whether the element is a pointer. Have to do a
7999       // special treatment for array sections given that they are built-in
8000       // types.
8001       const auto *OASE =
8002           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8003       const auto *OAShE =
8004           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8005       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8006       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8007       bool IsPointer =
8008           OAShE ||
8009           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8010                        .getCanonicalType()
8011                        ->isAnyPointerType()) ||
8012           I->getAssociatedExpression()->getType()->isAnyPointerType();
8013       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8014                                MapDecl &&
8015                                MapDecl->getType()->isLValueReferenceType();
8016       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8017 
8018       if (OASE)
8019         ++DimSize;
8020 
8021       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8022           IsFinalArraySection) {
8023         // If this is not the last component, we expect the pointer to be
8024         // associated with an array expression or member expression.
8025         assert((Next == CE ||
8026                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8027                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8028                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8029                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8030                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8031                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8032                "Unexpected expression");
8033 
8034         Address LB = Address::invalid();
8035         Address LowestElem = Address::invalid();
8036         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8037                                        const MemberExpr *E) {
8038           const Expr *BaseExpr = E->getBase();
8039           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8040           // scalar.
8041           LValue BaseLV;
8042           if (E->isArrow()) {
8043             LValueBaseInfo BaseInfo;
8044             TBAAAccessInfo TBAAInfo;
8045             Address Addr =
8046                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8047             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8048             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8049           } else {
8050             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8051           }
8052           return BaseLV;
8053         };
8054         if (OAShE) {
8055           LowestElem = LB =
8056               Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()),
8057                                   CGF.getContext().getTypeAlignInChars(
8058                                       OAShE->getBase()->getType()));
8059         } else if (IsMemberReference) {
8060           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8061           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8062           LowestElem = CGF.EmitLValueForFieldInitialization(
8063                               BaseLVal, cast<FieldDecl>(MapDecl))
8064                            .getAddress(CGF);
8065           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8066                    .getAddress(CGF);
8067         } else {
8068           LowestElem = LB =
8069               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8070                   .getAddress(CGF);
8071         }
8072 
8073         // If this component is a pointer inside the base struct then we don't
8074         // need to create any entry for it - it will be combined with the object
8075         // it is pointing to into a single PTR_AND_OBJ entry.
8076         bool IsMemberPointerOrAddr =
8077             EncounteredME &&
8078             (((IsPointer || ForDeviceAddr) &&
8079               I->getAssociatedExpression() == EncounteredME) ||
8080              (IsPrevMemberReference && !IsPointer) ||
8081              (IsMemberReference && Next != CE &&
8082               !Next->getAssociatedExpression()->getType()->isPointerType()));
8083         if (!OverlappedElements.empty() && Next == CE) {
8084           // Handle base element with the info for overlapped elements.
8085           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8086           assert(!IsPointer &&
8087                  "Unexpected base element with the pointer type.");
8088           // Mark the whole struct as the struct that requires allocation on the
8089           // device.
8090           PartialStruct.LowestElem = {0, LowestElem};
8091           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8092               I->getAssociatedExpression()->getType());
8093           Address HB = CGF.Builder.CreateConstGEP(
8094               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8095                                                               CGF.VoidPtrTy),
8096               TypeSize.getQuantity() - 1);
8097           PartialStruct.HighestElem = {
8098               std::numeric_limits<decltype(
8099                   PartialStruct.HighestElem.first)>::max(),
8100               HB};
8101           PartialStruct.Base = BP;
8102           PartialStruct.LB = LB;
8103           assert(
8104               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8105               "Overlapped elements must be used only once for the variable.");
8106           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8107           // Emit data for non-overlapped data.
8108           OpenMPOffloadMappingFlags Flags =
8109               OMP_MAP_MEMBER_OF |
8110               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8111                              /*AddPtrFlag=*/false,
8112                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8113           llvm::Value *Size = nullptr;
8114           // Do bitcopy of all non-overlapped structure elements.
8115           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8116                    Component : OverlappedElements) {
8117             Address ComponentLB = Address::invalid();
8118             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8119                  Component) {
8120               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8121                 const auto *FD = dyn_cast<FieldDecl>(VD);
8122                 if (FD && FD->getType()->isLValueReferenceType()) {
8123                   const auto *ME =
8124                       cast<MemberExpr>(MC.getAssociatedExpression());
8125                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8126                   ComponentLB =
8127                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8128                           .getAddress(CGF);
8129                 } else {
8130                   ComponentLB =
8131                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8132                           .getAddress(CGF);
8133                 }
8134                 Size = CGF.Builder.CreatePtrDiff(
8135                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8136                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8137                 break;
8138               }
8139             }
8140             assert(Size && "Failed to determine structure size");
8141             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8142             CombinedInfo.BasePointers.push_back(BP.getPointer());
8143             CombinedInfo.Pointers.push_back(LB.getPointer());
8144             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8145                 Size, CGF.Int64Ty, /*isSigned=*/true));
8146             CombinedInfo.Types.push_back(Flags);
8147             CombinedInfo.Mappers.push_back(nullptr);
8148             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8149                                                                       : 1);
8150             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8151           }
8152           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8153           CombinedInfo.BasePointers.push_back(BP.getPointer());
8154           CombinedInfo.Pointers.push_back(LB.getPointer());
8155           Size = CGF.Builder.CreatePtrDiff(
8156               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8157               CGF.EmitCastToVoidPtr(LB.getPointer()));
8158           CombinedInfo.Sizes.push_back(
8159               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8160           CombinedInfo.Types.push_back(Flags);
8161           CombinedInfo.Mappers.push_back(nullptr);
8162           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8163                                                                     : 1);
8164           break;
8165         }
8166         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8167         if (!IsMemberPointerOrAddr ||
8168             (Next == CE && MapType != OMPC_MAP_unknown)) {
8169           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8170           CombinedInfo.BasePointers.push_back(BP.getPointer());
8171           CombinedInfo.Pointers.push_back(LB.getPointer());
8172           CombinedInfo.Sizes.push_back(
8173               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8174           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8175                                                                     : 1);
8176 
8177           // If Mapper is valid, the last component inherits the mapper.
8178           bool HasMapper = Mapper && Next == CE;
8179           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8180 
8181           // We need to add a pointer flag for each map that comes from the
8182           // same expression except for the first one. We also need to signal
8183           // this map is the first one that relates with the current capture
8184           // (there is a set of entries for each capture).
8185           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8186               MapType, MapModifiers, MotionModifiers, IsImplicit,
8187               !IsExpressionFirstInfo || RequiresReference ||
8188                   FirstPointerInComplexData || IsMemberReference,
8189               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8190 
8191           if (!IsExpressionFirstInfo || IsMemberReference) {
8192             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8193             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8194             if (IsPointer || (IsMemberReference && Next != CE))
8195               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8196                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8197 
8198             if (ShouldBeMemberOf) {
8199               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8200               // should be later updated with the correct value of MEMBER_OF.
8201               Flags |= OMP_MAP_MEMBER_OF;
8202               // From now on, all subsequent PTR_AND_OBJ entries should not be
8203               // marked as MEMBER_OF.
8204               ShouldBeMemberOf = false;
8205             }
8206           }
8207 
8208           CombinedInfo.Types.push_back(Flags);
8209         }
8210 
8211         // If we have encountered a member expression so far, keep track of the
8212         // mapped member. If the parent is "*this", then the value declaration
8213         // is nullptr.
8214         if (EncounteredME) {
8215           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8216           unsigned FieldIndex = FD->getFieldIndex();
8217 
8218           // Update info about the lowest and highest elements for this struct
8219           if (!PartialStruct.Base.isValid()) {
8220             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8221             if (IsFinalArraySection) {
8222               Address HB =
8223                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8224                       .getAddress(CGF);
8225               PartialStruct.HighestElem = {FieldIndex, HB};
8226             } else {
8227               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8228             }
8229             PartialStruct.Base = BP;
8230             PartialStruct.LB = BP;
8231           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8232             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8233           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8234             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8235           }
8236         }
8237 
8238         // Need to emit combined struct for array sections.
8239         if (IsFinalArraySection || IsNonContiguous)
8240           PartialStruct.IsArraySection = true;
8241 
8242         // If we have a final array section, we are done with this expression.
8243         if (IsFinalArraySection)
8244           break;
8245 
8246         // The pointer becomes the base for the next element.
8247         if (Next != CE)
8248           BP = IsMemberReference ? LowestElem : LB;
8249 
8250         IsExpressionFirstInfo = false;
8251         IsCaptureFirstInfo = false;
8252         FirstPointerInComplexData = false;
8253         IsPrevMemberReference = IsMemberReference;
8254       } else if (FirstPointerInComplexData) {
8255         QualType Ty = Components.rbegin()
8256                           ->getAssociatedDeclaration()
8257                           ->getType()
8258                           .getNonReferenceType();
8259         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8260         FirstPointerInComplexData = false;
8261       }
8262     }
8263     // If ran into the whole component - allocate the space for the whole
8264     // record.
8265     if (!EncounteredME)
8266       PartialStruct.HasCompleteRecord = true;
8267 
8268     if (!IsNonContiguous)
8269       return;
8270 
8271     const ASTContext &Context = CGF.getContext();
8272 
8273     // For supporting stride in array section, we need to initialize the first
8274     // dimension size as 1, first offset as 0, and first count as 1
8275     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8276     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8277     MapValuesArrayTy CurStrides;
8278     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8279     uint64_t ElementTypeSize;
8280 
8281     // Collect Size information for each dimension and get the element size as
8282     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8283     // should be [10, 10] and the first stride is 4 btyes.
8284     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8285          Components) {
8286       const Expr *AssocExpr = Component.getAssociatedExpression();
8287       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8288 
8289       if (!OASE)
8290         continue;
8291 
8292       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8293       auto *CAT = Context.getAsConstantArrayType(Ty);
8294       auto *VAT = Context.getAsVariableArrayType(Ty);
8295 
8296       // We need all the dimension size except for the last dimension.
8297       assert((VAT || CAT || &Component == &*Components.begin()) &&
8298              "Should be either ConstantArray or VariableArray if not the "
8299              "first Component");
8300 
8301       // Get element size if CurStrides is empty.
8302       if (CurStrides.empty()) {
8303         const Type *ElementType = nullptr;
8304         if (CAT)
8305           ElementType = CAT->getElementType().getTypePtr();
8306         else if (VAT)
8307           ElementType = VAT->getElementType().getTypePtr();
8308         else
8309           assert(&Component == &*Components.begin() &&
8310                  "Only expect pointer (non CAT or VAT) when this is the "
8311                  "first Component");
8312         // If ElementType is null, then it means the base is a pointer
8313         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8314         // for next iteration.
8315         if (ElementType) {
8316           // For the case that having pointer as base, we need to remove one
8317           // level of indirection.
8318           if (&Component != &*Components.begin())
8319             ElementType = ElementType->getPointeeOrArrayElementType();
8320           ElementTypeSize =
8321               Context.getTypeSizeInChars(ElementType).getQuantity();
8322           CurStrides.push_back(
8323               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8324         }
8325       }
8326       // Get dimension value except for the last dimension since we don't need
8327       // it.
8328       if (DimSizes.size() < Components.size() - 1) {
8329         if (CAT)
8330           DimSizes.push_back(llvm::ConstantInt::get(
8331               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8332         else if (VAT)
8333           DimSizes.push_back(CGF.Builder.CreateIntCast(
8334               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8335               /*IsSigned=*/false));
8336       }
8337     }
8338 
8339     // Skip the dummy dimension since we have already have its information.
8340     auto DI = DimSizes.begin() + 1;
8341     // Product of dimension.
8342     llvm::Value *DimProd =
8343         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8344 
8345     // Collect info for non-contiguous. Notice that offset, count, and stride
8346     // are only meaningful for array-section, so we insert a null for anything
8347     // other than array-section.
8348     // Also, the size of offset, count, and stride are not the same as
8349     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8350     // count, and stride are the same as the number of non-contiguous
8351     // declaration in target update to/from clause.
8352     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8353          Components) {
8354       const Expr *AssocExpr = Component.getAssociatedExpression();
8355 
8356       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8357         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8358             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8359             /*isSigned=*/false);
8360         CurOffsets.push_back(Offset);
8361         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8362         CurStrides.push_back(CurStrides.back());
8363         continue;
8364       }
8365 
8366       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8367 
8368       if (!OASE)
8369         continue;
8370 
8371       // Offset
8372       const Expr *OffsetExpr = OASE->getLowerBound();
8373       llvm::Value *Offset = nullptr;
8374       if (!OffsetExpr) {
8375         // If offset is absent, then we just set it to zero.
8376         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8377       } else {
8378         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8379                                            CGF.Int64Ty,
8380                                            /*isSigned=*/false);
8381       }
8382       CurOffsets.push_back(Offset);
8383 
8384       // Count
8385       const Expr *CountExpr = OASE->getLength();
8386       llvm::Value *Count = nullptr;
8387       if (!CountExpr) {
8388         // In Clang, once a high dimension is an array section, we construct all
8389         // the lower dimension as array section, however, for case like
8390         // arr[0:2][2], Clang construct the inner dimension as an array section
8391         // but it actually is not in an array section form according to spec.
8392         if (!OASE->getColonLocFirst().isValid() &&
8393             !OASE->getColonLocSecond().isValid()) {
8394           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8395         } else {
8396           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8397           // When the length is absent it defaults to ⌈(size −
8398           // lower-bound)/stride⌉, where size is the size of the array
8399           // dimension.
8400           const Expr *StrideExpr = OASE->getStride();
8401           llvm::Value *Stride =
8402               StrideExpr
8403                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8404                                               CGF.Int64Ty, /*isSigned=*/false)
8405                   : nullptr;
8406           if (Stride)
8407             Count = CGF.Builder.CreateUDiv(
8408                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8409           else
8410             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8411         }
8412       } else {
8413         Count = CGF.EmitScalarExpr(CountExpr);
8414       }
8415       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8416       CurCounts.push_back(Count);
8417 
8418       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8419       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8420       //              Offset      Count     Stride
8421       //    D0          0           1         4    (int)    <- dummy dimension
8422       //    D1          0           2         8    (2 * (1) * 4)
8423       //    D2          1           2         20   (1 * (1 * 5) * 4)
8424       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8425       const Expr *StrideExpr = OASE->getStride();
8426       llvm::Value *Stride =
8427           StrideExpr
8428               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8429                                           CGF.Int64Ty, /*isSigned=*/false)
8430               : nullptr;
8431       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8432       if (Stride)
8433         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8434       else
8435         CurStrides.push_back(DimProd);
8436       if (DI != DimSizes.end())
8437         ++DI;
8438     }
8439 
8440     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8441     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8442     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8443   }
8444 
8445   /// Return the adjusted map modifiers if the declaration a capture refers to
8446   /// appears in a first-private clause. This is expected to be used only with
8447   /// directives that start with 'target'.
8448   MappableExprsHandler::OpenMPOffloadMappingFlags
8449   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8450     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8451 
8452     // A first private variable captured by reference will use only the
8453     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8454     // declaration is known as first-private in this handler.
8455     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8456       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8457         return MappableExprsHandler::OMP_MAP_TO |
8458                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8459       return MappableExprsHandler::OMP_MAP_PRIVATE |
8460              MappableExprsHandler::OMP_MAP_TO;
8461     }
8462     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8463     if (I != LambdasMap.end())
8464       // for map(to: lambda): using user specified map type.
8465       return getMapTypeBits(
8466           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8467           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8468           /*AddPtrFlag=*/false,
8469           /*AddIsTargetParamFlag=*/false,
8470           /*isNonContiguous=*/false);
8471     return MappableExprsHandler::OMP_MAP_TO |
8472            MappableExprsHandler::OMP_MAP_FROM;
8473   }
8474 
8475   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8476     // Rotate by getFlagMemberOffset() bits.
8477     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8478                                                   << getFlagMemberOffset());
8479   }
8480 
8481   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8482                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8483     // If the entry is PTR_AND_OBJ but has not been marked with the special
8484     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8485     // marked as MEMBER_OF.
8486     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8487         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8488       return;
8489 
8490     // Reset the placeholder value to prepare the flag for the assignment of the
8491     // proper MEMBER_OF value.
8492     Flags &= ~OMP_MAP_MEMBER_OF;
8493     Flags |= MemberOfFlag;
8494   }
8495 
8496   void getPlainLayout(const CXXRecordDecl *RD,
8497                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8498                       bool AsBase) const {
8499     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8500 
8501     llvm::StructType *St =
8502         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8503 
8504     unsigned NumElements = St->getNumElements();
8505     llvm::SmallVector<
8506         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8507         RecordLayout(NumElements);
8508 
8509     // Fill bases.
8510     for (const auto &I : RD->bases()) {
8511       if (I.isVirtual())
8512         continue;
8513       const auto *Base = I.getType()->getAsCXXRecordDecl();
8514       // Ignore empty bases.
8515       if (Base->isEmpty() || CGF.getContext()
8516                                  .getASTRecordLayout(Base)
8517                                  .getNonVirtualSize()
8518                                  .isZero())
8519         continue;
8520 
8521       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8522       RecordLayout[FieldIndex] = Base;
8523     }
8524     // Fill in virtual bases.
8525     for (const auto &I : RD->vbases()) {
8526       const auto *Base = I.getType()->getAsCXXRecordDecl();
8527       // Ignore empty bases.
8528       if (Base->isEmpty())
8529         continue;
8530       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8531       if (RecordLayout[FieldIndex])
8532         continue;
8533       RecordLayout[FieldIndex] = Base;
8534     }
8535     // Fill in all the fields.
8536     assert(!RD->isUnion() && "Unexpected union.");
8537     for (const auto *Field : RD->fields()) {
8538       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8539       // will fill in later.)
8540       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8541         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8542         RecordLayout[FieldIndex] = Field;
8543       }
8544     }
8545     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8546              &Data : RecordLayout) {
8547       if (Data.isNull())
8548         continue;
8549       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8550         getPlainLayout(Base, Layout, /*AsBase=*/true);
8551       else
8552         Layout.push_back(Data.get<const FieldDecl *>());
8553     }
8554   }
8555 
8556   /// Generate all the base pointers, section pointers, sizes, map types, and
8557   /// mappers for the extracted mappable expressions (all included in \a
8558   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8559   /// pair of the relevant declaration and index where it occurs is appended to
8560   /// the device pointers info array.
8561   void generateAllInfoForClauses(
8562       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8563       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8564           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8565     // We have to process the component lists that relate with the same
8566     // declaration in a single chunk so that we can generate the map flags
8567     // correctly. Therefore, we organize all lists in a map.
8568     enum MapKind { Present, Allocs, Other, Total };
8569     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8570                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8571         Info;
8572 
8573     // Helper function to fill the information map for the different supported
8574     // clauses.
8575     auto &&InfoGen =
8576         [&Info, &SkipVarSet](
8577             const ValueDecl *D, MapKind Kind,
8578             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8579             OpenMPMapClauseKind MapType,
8580             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8581             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8582             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8583             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8584           if (SkipVarSet.contains(D))
8585             return;
8586           auto It = Info.find(D);
8587           if (It == Info.end())
8588             It = Info
8589                      .insert(std::make_pair(
8590                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8591                      .first;
8592           It->second[Kind].emplace_back(
8593               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8594               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8595         };
8596 
8597     for (const auto *Cl : Clauses) {
8598       const auto *C = dyn_cast<OMPMapClause>(Cl);
8599       if (!C)
8600         continue;
8601       MapKind Kind = Other;
8602       if (llvm::is_contained(C->getMapTypeModifiers(),
8603                              OMPC_MAP_MODIFIER_present))
8604         Kind = Present;
8605       else if (C->getMapType() == OMPC_MAP_alloc)
8606         Kind = Allocs;
8607       const auto *EI = C->getVarRefs().begin();
8608       for (const auto L : C->component_lists()) {
8609         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8610         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8611                 C->getMapTypeModifiers(), llvm::None,
8612                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8613                 E);
8614         ++EI;
8615       }
8616     }
8617     for (const auto *Cl : Clauses) {
8618       const auto *C = dyn_cast<OMPToClause>(Cl);
8619       if (!C)
8620         continue;
8621       MapKind Kind = Other;
8622       if (llvm::is_contained(C->getMotionModifiers(),
8623                              OMPC_MOTION_MODIFIER_present))
8624         Kind = Present;
8625       const auto *EI = C->getVarRefs().begin();
8626       for (const auto L : C->component_lists()) {
8627         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8628                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8629                 C->isImplicit(), std::get<2>(L), *EI);
8630         ++EI;
8631       }
8632     }
8633     for (const auto *Cl : Clauses) {
8634       const auto *C = dyn_cast<OMPFromClause>(Cl);
8635       if (!C)
8636         continue;
8637       MapKind Kind = Other;
8638       if (llvm::is_contained(C->getMotionModifiers(),
8639                              OMPC_MOTION_MODIFIER_present))
8640         Kind = Present;
8641       const auto *EI = C->getVarRefs().begin();
8642       for (const auto L : C->component_lists()) {
8643         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8644                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8645                 C->isImplicit(), std::get<2>(L), *EI);
8646         ++EI;
8647       }
8648     }
8649 
8650     // Look at the use_device_ptr clause information and mark the existing map
8651     // entries as such. If there is no map information for an entry in the
8652     // use_device_ptr list, we create one with map type 'alloc' and zero size
8653     // section. It is the user fault if that was not mapped before. If there is
8654     // no map information and the pointer is a struct member, then we defer the
8655     // emission of that entry until the whole struct has been processed.
8656     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8657                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8658         DeferredInfo;
8659     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8660 
8661     for (const auto *Cl : Clauses) {
8662       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8663       if (!C)
8664         continue;
8665       for (const auto L : C->component_lists()) {
8666         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8667             std::get<1>(L);
8668         assert(!Components.empty() &&
8669                "Not expecting empty list of components!");
8670         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8671         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8672         const Expr *IE = Components.back().getAssociatedExpression();
8673         // If the first component is a member expression, we have to look into
8674         // 'this', which maps to null in the map of map information. Otherwise
8675         // look directly for the information.
8676         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8677 
8678         // We potentially have map information for this declaration already.
8679         // Look for the first set of components that refer to it.
8680         if (It != Info.end()) {
8681           bool Found = false;
8682           for (auto &Data : It->second) {
8683             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8684               return MI.Components.back().getAssociatedDeclaration() == VD;
8685             });
8686             // If we found a map entry, signal that the pointer has to be
8687             // returned and move on to the next declaration. Exclude cases where
8688             // the base pointer is mapped as array subscript, array section or
8689             // array shaping. The base address is passed as a pointer to base in
8690             // this case and cannot be used as a base for use_device_ptr list
8691             // item.
8692             if (CI != Data.end()) {
8693               auto PrevCI = std::next(CI->Components.rbegin());
8694               const auto *VarD = dyn_cast<VarDecl>(VD);
8695               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8696                   isa<MemberExpr>(IE) ||
8697                   !VD->getType().getNonReferenceType()->isPointerType() ||
8698                   PrevCI == CI->Components.rend() ||
8699                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8700                   VarD->hasLocalStorage()) {
8701                 CI->ReturnDevicePointer = true;
8702                 Found = true;
8703                 break;
8704               }
8705             }
8706           }
8707           if (Found)
8708             continue;
8709         }
8710 
8711         // We didn't find any match in our map information - generate a zero
8712         // size array section - if the pointer is a struct member we defer this
8713         // action until the whole struct has been processed.
8714         if (isa<MemberExpr>(IE)) {
8715           // Insert the pointer into Info to be processed by
8716           // generateInfoForComponentList. Because it is a member pointer
8717           // without a pointee, no entry will be generated for it, therefore
8718           // we need to generate one after the whole struct has been processed.
8719           // Nonetheless, generateInfoForComponentList must be called to take
8720           // the pointer into account for the calculation of the range of the
8721           // partial struct.
8722           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8723                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8724                   nullptr);
8725           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8726         } else {
8727           llvm::Value *Ptr =
8728               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8729           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8730           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8731           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8732           UseDevicePtrCombinedInfo.Sizes.push_back(
8733               llvm::Constant::getNullValue(CGF.Int64Ty));
8734           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8735           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8736         }
8737       }
8738     }
8739 
8740     // Look at the use_device_addr clause information and mark the existing map
8741     // entries as such. If there is no map information for an entry in the
8742     // use_device_addr list, we create one with map type 'alloc' and zero size
8743     // section. It is the user fault if that was not mapped before. If there is
8744     // no map information and the pointer is a struct member, then we defer the
8745     // emission of that entry until the whole struct has been processed.
8746     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8747     for (const auto *Cl : Clauses) {
8748       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8749       if (!C)
8750         continue;
8751       for (const auto L : C->component_lists()) {
8752         assert(!std::get<1>(L).empty() &&
8753                "Not expecting empty list of components!");
8754         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8755         if (!Processed.insert(VD).second)
8756           continue;
8757         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8758         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8759         // If the first component is a member expression, we have to look into
8760         // 'this', which maps to null in the map of map information. Otherwise
8761         // look directly for the information.
8762         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8763 
8764         // We potentially have map information for this declaration already.
8765         // Look for the first set of components that refer to it.
8766         if (It != Info.end()) {
8767           bool Found = false;
8768           for (auto &Data : It->second) {
8769             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8770               return MI.Components.back().getAssociatedDeclaration() == VD;
8771             });
8772             // If we found a map entry, signal that the pointer has to be
8773             // returned and move on to the next declaration.
8774             if (CI != Data.end()) {
8775               CI->ReturnDevicePointer = true;
8776               Found = true;
8777               break;
8778             }
8779           }
8780           if (Found)
8781             continue;
8782         }
8783 
8784         // We didn't find any match in our map information - generate a zero
8785         // size array section - if the pointer is a struct member we defer this
8786         // action until the whole struct has been processed.
8787         if (isa<MemberExpr>(IE)) {
8788           // Insert the pointer into Info to be processed by
8789           // generateInfoForComponentList. Because it is a member pointer
8790           // without a pointee, no entry will be generated for it, therefore
8791           // we need to generate one after the whole struct has been processed.
8792           // Nonetheless, generateInfoForComponentList must be called to take
8793           // the pointer into account for the calculation of the range of the
8794           // partial struct.
8795           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8796                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8797                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8798           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8799         } else {
8800           llvm::Value *Ptr;
8801           if (IE->isGLValue())
8802             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8803           else
8804             Ptr = CGF.EmitScalarExpr(IE);
8805           CombinedInfo.Exprs.push_back(VD);
8806           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8807           CombinedInfo.Pointers.push_back(Ptr);
8808           CombinedInfo.Sizes.push_back(
8809               llvm::Constant::getNullValue(CGF.Int64Ty));
8810           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8811           CombinedInfo.Mappers.push_back(nullptr);
8812         }
8813       }
8814     }
8815 
8816     for (const auto &Data : Info) {
8817       StructRangeInfoTy PartialStruct;
8818       // Temporary generated information.
8819       MapCombinedInfoTy CurInfo;
8820       const Decl *D = Data.first;
8821       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8822       for (const auto &M : Data.second) {
8823         for (const MapInfo &L : M) {
8824           assert(!L.Components.empty() &&
8825                  "Not expecting declaration with no component lists.");
8826 
8827           // Remember the current base pointer index.
8828           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8829           CurInfo.NonContigInfo.IsNonContiguous =
8830               L.Components.back().isNonContiguous();
8831           generateInfoForComponentList(
8832               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8833               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8834               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8835 
8836           // If this entry relates with a device pointer, set the relevant
8837           // declaration and add the 'return pointer' flag.
8838           if (L.ReturnDevicePointer) {
8839             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8840                    "Unexpected number of mapped base pointers.");
8841 
8842             const ValueDecl *RelevantVD =
8843                 L.Components.back().getAssociatedDeclaration();
8844             assert(RelevantVD &&
8845                    "No relevant declaration related with device pointer??");
8846 
8847             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8848                 RelevantVD);
8849             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8850           }
8851         }
8852       }
8853 
8854       // Append any pending zero-length pointers which are struct members and
8855       // used with use_device_ptr or use_device_addr.
8856       auto CI = DeferredInfo.find(Data.first);
8857       if (CI != DeferredInfo.end()) {
8858         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8859           llvm::Value *BasePtr;
8860           llvm::Value *Ptr;
8861           if (L.ForDeviceAddr) {
8862             if (L.IE->isGLValue())
8863               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8864             else
8865               Ptr = this->CGF.EmitScalarExpr(L.IE);
8866             BasePtr = Ptr;
8867             // Entry is RETURN_PARAM. Also, set the placeholder value
8868             // MEMBER_OF=FFFF so that the entry is later updated with the
8869             // correct value of MEMBER_OF.
8870             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8871           } else {
8872             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8873             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8874                                              L.IE->getExprLoc());
8875             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8876             // placeholder value MEMBER_OF=FFFF so that the entry is later
8877             // updated with the correct value of MEMBER_OF.
8878             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8879                                     OMP_MAP_MEMBER_OF);
8880           }
8881           CurInfo.Exprs.push_back(L.VD);
8882           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8883           CurInfo.Pointers.push_back(Ptr);
8884           CurInfo.Sizes.push_back(
8885               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8886           CurInfo.Mappers.push_back(nullptr);
8887         }
8888       }
8889       // If there is an entry in PartialStruct it means we have a struct with
8890       // individual members mapped. Emit an extra combined entry.
8891       if (PartialStruct.Base.isValid()) {
8892         CurInfo.NonContigInfo.Dims.push_back(0);
8893         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8894       }
8895 
8896       // We need to append the results of this capture to what we already
8897       // have.
8898       CombinedInfo.append(CurInfo);
8899     }
8900     // Append data for use_device_ptr clauses.
8901     CombinedInfo.append(UseDevicePtrCombinedInfo);
8902   }
8903 
8904 public:
8905   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8906       : CurDir(&Dir), CGF(CGF) {
8907     // Extract firstprivate clause information.
8908     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8909       for (const auto *D : C->varlists())
8910         FirstPrivateDecls.try_emplace(
8911             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8912     // Extract implicit firstprivates from uses_allocators clauses.
8913     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8914       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8915         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8916         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8917           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8918                                         /*Implicit=*/true);
8919         else if (const auto *VD = dyn_cast<VarDecl>(
8920                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8921                          ->getDecl()))
8922           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8923       }
8924     }
8925     // Extract device pointer clause information.
8926     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8927       for (auto L : C->component_lists())
8928         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8929     // Extract map information.
8930     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8931       if (C->getMapType() != OMPC_MAP_to)
8932         continue;
8933       for (auto L : C->component_lists()) {
8934         const ValueDecl *VD = std::get<0>(L);
8935         const auto *RD = VD ? VD->getType()
8936                                   .getCanonicalType()
8937                                   .getNonReferenceType()
8938                                   ->getAsCXXRecordDecl()
8939                             : nullptr;
8940         if (RD && RD->isLambda())
8941           LambdasMap.try_emplace(std::get<0>(L), C);
8942       }
8943     }
8944   }
8945 
8946   /// Constructor for the declare mapper directive.
8947   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8948       : CurDir(&Dir), CGF(CGF) {}
8949 
8950   /// Generate code for the combined entry if we have a partially mapped struct
8951   /// and take care of the mapping flags of the arguments corresponding to
8952   /// individual struct members.
8953   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8954                          MapFlagsArrayTy &CurTypes,
8955                          const StructRangeInfoTy &PartialStruct,
8956                          const ValueDecl *VD = nullptr,
8957                          bool NotTargetParams = true) const {
8958     if (CurTypes.size() == 1 &&
8959         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8960         !PartialStruct.IsArraySection)
8961       return;
8962     Address LBAddr = PartialStruct.LowestElem.second;
8963     Address HBAddr = PartialStruct.HighestElem.second;
8964     if (PartialStruct.HasCompleteRecord) {
8965       LBAddr = PartialStruct.LB;
8966       HBAddr = PartialStruct.LB;
8967     }
8968     CombinedInfo.Exprs.push_back(VD);
8969     // Base is the base of the struct
8970     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8971     // Pointer is the address of the lowest element
8972     llvm::Value *LB = LBAddr.getPointer();
8973     CombinedInfo.Pointers.push_back(LB);
8974     // There should not be a mapper for a combined entry.
8975     CombinedInfo.Mappers.push_back(nullptr);
8976     // Size is (addr of {highest+1} element) - (addr of lowest element)
8977     llvm::Value *HB = HBAddr.getPointer();
8978     llvm::Value *HAddr =
8979         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8980     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8981     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8982     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8983     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8984                                                   /*isSigned=*/false);
8985     CombinedInfo.Sizes.push_back(Size);
8986     // Map type is always TARGET_PARAM, if generate info for captures.
8987     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8988                                                  : OMP_MAP_TARGET_PARAM);
8989     // If any element has the present modifier, then make sure the runtime
8990     // doesn't attempt to allocate the struct.
8991     if (CurTypes.end() !=
8992         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8993           return Type & OMP_MAP_PRESENT;
8994         }))
8995       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8996     // Remove TARGET_PARAM flag from the first element
8997     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8998     // If any element has the ompx_hold modifier, then make sure the runtime
8999     // uses the hold reference count for the struct as a whole so that it won't
9000     // be unmapped by an extra dynamic reference count decrement.  Add it to all
9001     // elements as well so the runtime knows which reference count to check
9002     // when determining whether it's time for device-to-host transfers of
9003     // individual elements.
9004     if (CurTypes.end() !=
9005         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9006           return Type & OMP_MAP_OMPX_HOLD;
9007         })) {
9008       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9009       for (auto &M : CurTypes)
9010         M |= OMP_MAP_OMPX_HOLD;
9011     }
9012 
9013     // All other current entries will be MEMBER_OF the combined entry
9014     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9015     // 0xFFFF in the MEMBER_OF field).
9016     OpenMPOffloadMappingFlags MemberOfFlag =
9017         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9018     for (auto &M : CurTypes)
9019       setCorrectMemberOfFlag(M, MemberOfFlag);
9020   }
9021 
9022   /// Generate all the base pointers, section pointers, sizes, map types, and
9023   /// mappers for the extracted mappable expressions (all included in \a
9024   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9025   /// pair of the relevant declaration and index where it occurs is appended to
9026   /// the device pointers info array.
9027   void generateAllInfo(
9028       MapCombinedInfoTy &CombinedInfo,
9029       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9030           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9031     assert(CurDir.is<const OMPExecutableDirective *>() &&
9032            "Expect a executable directive");
9033     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9034     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9035   }
9036 
9037   /// Generate all the base pointers, section pointers, sizes, map types, and
9038   /// mappers for the extracted map clauses of user-defined mapper (all included
9039   /// in \a CombinedInfo).
9040   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9041     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9042            "Expect a declare mapper directive");
9043     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9044     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9045   }
9046 
9047   /// Emit capture info for lambdas for variables captured by reference.
9048   void generateInfoForLambdaCaptures(
9049       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9050       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9051     const auto *RD = VD->getType()
9052                          .getCanonicalType()
9053                          .getNonReferenceType()
9054                          ->getAsCXXRecordDecl();
9055     if (!RD || !RD->isLambda())
9056       return;
9057     Address VDAddr =
9058         Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD));
9059     LValue VDLVal = CGF.MakeAddrLValue(
9060         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9061     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9062     FieldDecl *ThisCapture = nullptr;
9063     RD->getCaptureFields(Captures, ThisCapture);
9064     if (ThisCapture) {
9065       LValue ThisLVal =
9066           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9067       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9068       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9069                                  VDLVal.getPointer(CGF));
9070       CombinedInfo.Exprs.push_back(VD);
9071       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9072       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9073       CombinedInfo.Sizes.push_back(
9074           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9075                                     CGF.Int64Ty, /*isSigned=*/true));
9076       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9077                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9078       CombinedInfo.Mappers.push_back(nullptr);
9079     }
9080     for (const LambdaCapture &LC : RD->captures()) {
9081       if (!LC.capturesVariable())
9082         continue;
9083       const VarDecl *VD = LC.getCapturedVar();
9084       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9085         continue;
9086       auto It = Captures.find(VD);
9087       assert(It != Captures.end() && "Found lambda capture without field.");
9088       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9089       if (LC.getCaptureKind() == LCK_ByRef) {
9090         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9091         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9092                                    VDLVal.getPointer(CGF));
9093         CombinedInfo.Exprs.push_back(VD);
9094         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9095         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9096         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9097             CGF.getTypeSize(
9098                 VD->getType().getCanonicalType().getNonReferenceType()),
9099             CGF.Int64Ty, /*isSigned=*/true));
9100       } else {
9101         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9102         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9103                                    VDLVal.getPointer(CGF));
9104         CombinedInfo.Exprs.push_back(VD);
9105         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9106         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9107         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9108       }
9109       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9110                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9111       CombinedInfo.Mappers.push_back(nullptr);
9112     }
9113   }
9114 
9115   /// Set correct indices for lambdas captures.
9116   void adjustMemberOfForLambdaCaptures(
9117       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9118       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9119       MapFlagsArrayTy &Types) const {
9120     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9121       // Set correct member_of idx for all implicit lambda captures.
9122       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9123                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9124         continue;
9125       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9126       assert(BasePtr && "Unable to find base lambda address.");
9127       int TgtIdx = -1;
9128       for (unsigned J = I; J > 0; --J) {
9129         unsigned Idx = J - 1;
9130         if (Pointers[Idx] != BasePtr)
9131           continue;
9132         TgtIdx = Idx;
9133         break;
9134       }
9135       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9136       // All other current entries will be MEMBER_OF the combined entry
9137       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9138       // 0xFFFF in the MEMBER_OF field).
9139       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9140       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9141     }
9142   }
9143 
9144   /// Generate the base pointers, section pointers, sizes, map types, and
9145   /// mappers associated to a given capture (all included in \a CombinedInfo).
9146   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9147                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9148                               StructRangeInfoTy &PartialStruct) const {
9149     assert(!Cap->capturesVariableArrayType() &&
9150            "Not expecting to generate map info for a variable array type!");
9151 
9152     // We need to know when we generating information for the first component
9153     const ValueDecl *VD = Cap->capturesThis()
9154                               ? nullptr
9155                               : Cap->getCapturedVar()->getCanonicalDecl();
9156 
9157     // for map(to: lambda): skip here, processing it in
9158     // generateDefaultMapInfo
9159     if (LambdasMap.count(VD))
9160       return;
9161 
9162     // If this declaration appears in a is_device_ptr clause we just have to
9163     // pass the pointer by value. If it is a reference to a declaration, we just
9164     // pass its value.
9165     if (DevPointersMap.count(VD)) {
9166       CombinedInfo.Exprs.push_back(VD);
9167       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9168       CombinedInfo.Pointers.push_back(Arg);
9169       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9170           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9171           /*isSigned=*/true));
9172       CombinedInfo.Types.push_back(
9173           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9174           OMP_MAP_TARGET_PARAM);
9175       CombinedInfo.Mappers.push_back(nullptr);
9176       return;
9177     }
9178 
9179     using MapData =
9180         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9181                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9182                    const ValueDecl *, const Expr *>;
9183     SmallVector<MapData, 4> DeclComponentLists;
9184     assert(CurDir.is<const OMPExecutableDirective *>() &&
9185            "Expect a executable directive");
9186     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9187     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9188       const auto *EI = C->getVarRefs().begin();
9189       for (const auto L : C->decl_component_lists(VD)) {
9190         const ValueDecl *VDecl, *Mapper;
9191         // The Expression is not correct if the mapping is implicit
9192         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9193         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9194         std::tie(VDecl, Components, Mapper) = L;
9195         assert(VDecl == VD && "We got information for the wrong declaration??");
9196         assert(!Components.empty() &&
9197                "Not expecting declaration with no component lists.");
9198         DeclComponentLists.emplace_back(Components, C->getMapType(),
9199                                         C->getMapTypeModifiers(),
9200                                         C->isImplicit(), Mapper, E);
9201         ++EI;
9202       }
9203     }
9204     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9205                                              const MapData &RHS) {
9206       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9207       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9208       bool HasPresent =
9209           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9210       bool HasAllocs = MapType == OMPC_MAP_alloc;
9211       MapModifiers = std::get<2>(RHS);
9212       MapType = std::get<1>(LHS);
9213       bool HasPresentR =
9214           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9215       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9216       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9217     });
9218 
9219     // Find overlapping elements (including the offset from the base element).
9220     llvm::SmallDenseMap<
9221         const MapData *,
9222         llvm::SmallVector<
9223             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9224         4>
9225         OverlappedData;
9226     size_t Count = 0;
9227     for (const MapData &L : DeclComponentLists) {
9228       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9229       OpenMPMapClauseKind MapType;
9230       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9231       bool IsImplicit;
9232       const ValueDecl *Mapper;
9233       const Expr *VarRef;
9234       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9235           L;
9236       ++Count;
9237       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9238         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9239         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9240                  VarRef) = L1;
9241         auto CI = Components.rbegin();
9242         auto CE = Components.rend();
9243         auto SI = Components1.rbegin();
9244         auto SE = Components1.rend();
9245         for (; CI != CE && SI != SE; ++CI, ++SI) {
9246           if (CI->getAssociatedExpression()->getStmtClass() !=
9247               SI->getAssociatedExpression()->getStmtClass())
9248             break;
9249           // Are we dealing with different variables/fields?
9250           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9251             break;
9252         }
9253         // Found overlapping if, at least for one component, reached the head
9254         // of the components list.
9255         if (CI == CE || SI == SE) {
9256           // Ignore it if it is the same component.
9257           if (CI == CE && SI == SE)
9258             continue;
9259           const auto It = (SI == SE) ? CI : SI;
9260           // If one component is a pointer and another one is a kind of
9261           // dereference of this pointer (array subscript, section, dereference,
9262           // etc.), it is not an overlapping.
9263           // Same, if one component is a base and another component is a
9264           // dereferenced pointer memberexpr with the same base.
9265           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9266               (std::prev(It)->getAssociatedDeclaration() &&
9267                std::prev(It)
9268                    ->getAssociatedDeclaration()
9269                    ->getType()
9270                    ->isPointerType()) ||
9271               (It->getAssociatedDeclaration() &&
9272                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9273                std::next(It) != CE && std::next(It) != SE))
9274             continue;
9275           const MapData &BaseData = CI == CE ? L : L1;
9276           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9277               SI == SE ? Components : Components1;
9278           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9279           OverlappedElements.getSecond().push_back(SubData);
9280         }
9281       }
9282     }
9283     // Sort the overlapped elements for each item.
9284     llvm::SmallVector<const FieldDecl *, 4> Layout;
9285     if (!OverlappedData.empty()) {
9286       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9287       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9288       while (BaseType != OrigType) {
9289         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9290         OrigType = BaseType->getPointeeOrArrayElementType();
9291       }
9292 
9293       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9294         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9295       else {
9296         const auto *RD = BaseType->getAsRecordDecl();
9297         Layout.append(RD->field_begin(), RD->field_end());
9298       }
9299     }
9300     for (auto &Pair : OverlappedData) {
9301       llvm::stable_sort(
9302           Pair.getSecond(),
9303           [&Layout](
9304               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9305               OMPClauseMappableExprCommon::MappableExprComponentListRef
9306                   Second) {
9307             auto CI = First.rbegin();
9308             auto CE = First.rend();
9309             auto SI = Second.rbegin();
9310             auto SE = Second.rend();
9311             for (; CI != CE && SI != SE; ++CI, ++SI) {
9312               if (CI->getAssociatedExpression()->getStmtClass() !=
9313                   SI->getAssociatedExpression()->getStmtClass())
9314                 break;
9315               // Are we dealing with different variables/fields?
9316               if (CI->getAssociatedDeclaration() !=
9317                   SI->getAssociatedDeclaration())
9318                 break;
9319             }
9320 
9321             // Lists contain the same elements.
9322             if (CI == CE && SI == SE)
9323               return false;
9324 
9325             // List with less elements is less than list with more elements.
9326             if (CI == CE || SI == SE)
9327               return CI == CE;
9328 
9329             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9330             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9331             if (FD1->getParent() == FD2->getParent())
9332               return FD1->getFieldIndex() < FD2->getFieldIndex();
9333             const auto *It =
9334                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9335                   return FD == FD1 || FD == FD2;
9336                 });
9337             return *It == FD1;
9338           });
9339     }
9340 
9341     // Associated with a capture, because the mapping flags depend on it.
9342     // Go through all of the elements with the overlapped elements.
9343     bool IsFirstComponentList = true;
9344     for (const auto &Pair : OverlappedData) {
9345       const MapData &L = *Pair.getFirst();
9346       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9347       OpenMPMapClauseKind MapType;
9348       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9349       bool IsImplicit;
9350       const ValueDecl *Mapper;
9351       const Expr *VarRef;
9352       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9353           L;
9354       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9355           OverlappedComponents = Pair.getSecond();
9356       generateInfoForComponentList(
9357           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9358           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9359           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9360       IsFirstComponentList = false;
9361     }
9362     // Go through other elements without overlapped elements.
9363     for (const MapData &L : DeclComponentLists) {
9364       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9365       OpenMPMapClauseKind MapType;
9366       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9367       bool IsImplicit;
9368       const ValueDecl *Mapper;
9369       const Expr *VarRef;
9370       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9371           L;
9372       auto It = OverlappedData.find(&L);
9373       if (It == OverlappedData.end())
9374         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9375                                      Components, CombinedInfo, PartialStruct,
9376                                      IsFirstComponentList, IsImplicit, Mapper,
9377                                      /*ForDeviceAddr=*/false, VD, VarRef);
9378       IsFirstComponentList = false;
9379     }
9380   }
9381 
9382   /// Generate the default map information for a given capture \a CI,
9383   /// record field declaration \a RI and captured value \a CV.
9384   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9385                               const FieldDecl &RI, llvm::Value *CV,
9386                               MapCombinedInfoTy &CombinedInfo) const {
9387     bool IsImplicit = true;
9388     // Do the default mapping.
9389     if (CI.capturesThis()) {
9390       CombinedInfo.Exprs.push_back(nullptr);
9391       CombinedInfo.BasePointers.push_back(CV);
9392       CombinedInfo.Pointers.push_back(CV);
9393       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9394       CombinedInfo.Sizes.push_back(
9395           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9396                                     CGF.Int64Ty, /*isSigned=*/true));
9397       // Default map type.
9398       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9399     } else if (CI.capturesVariableByCopy()) {
9400       const VarDecl *VD = CI.getCapturedVar();
9401       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9402       CombinedInfo.BasePointers.push_back(CV);
9403       CombinedInfo.Pointers.push_back(CV);
9404       if (!RI.getType()->isAnyPointerType()) {
9405         // We have to signal to the runtime captures passed by value that are
9406         // not pointers.
9407         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9408         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9409             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9410       } else {
9411         // Pointers are implicitly mapped with a zero size and no flags
9412         // (other than first map that is added for all implicit maps).
9413         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9414         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9415       }
9416       auto I = FirstPrivateDecls.find(VD);
9417       if (I != FirstPrivateDecls.end())
9418         IsImplicit = I->getSecond();
9419     } else {
9420       assert(CI.capturesVariable() && "Expected captured reference.");
9421       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9422       QualType ElementType = PtrTy->getPointeeType();
9423       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9424           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9425       // The default map type for a scalar/complex type is 'to' because by
9426       // default the value doesn't have to be retrieved. For an aggregate
9427       // type, the default is 'tofrom'.
9428       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9429       const VarDecl *VD = CI.getCapturedVar();
9430       auto I = FirstPrivateDecls.find(VD);
9431       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9432       CombinedInfo.BasePointers.push_back(CV);
9433       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9434         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9435             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9436             AlignmentSource::Decl));
9437         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9438       } else {
9439         CombinedInfo.Pointers.push_back(CV);
9440       }
9441       if (I != FirstPrivateDecls.end())
9442         IsImplicit = I->getSecond();
9443     }
9444     // Every default map produces a single argument which is a target parameter.
9445     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9446 
9447     // Add flag stating this is an implicit map.
9448     if (IsImplicit)
9449       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9450 
9451     // No user-defined mapper for default mapping.
9452     CombinedInfo.Mappers.push_back(nullptr);
9453   }
9454 };
9455 } // anonymous namespace
9456 
9457 static void emitNonContiguousDescriptor(
9458     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9459     CGOpenMPRuntime::TargetDataInfo &Info) {
9460   CodeGenModule &CGM = CGF.CGM;
9461   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9462       &NonContigInfo = CombinedInfo.NonContigInfo;
9463 
9464   // Build an array of struct descriptor_dim and then assign it to
9465   // offload_args.
9466   //
9467   // struct descriptor_dim {
9468   //  uint64_t offset;
9469   //  uint64_t count;
9470   //  uint64_t stride
9471   // };
9472   ASTContext &C = CGF.getContext();
9473   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9474   RecordDecl *RD;
9475   RD = C.buildImplicitRecord("descriptor_dim");
9476   RD->startDefinition();
9477   addFieldToRecordDecl(C, RD, Int64Ty);
9478   addFieldToRecordDecl(C, RD, Int64Ty);
9479   addFieldToRecordDecl(C, RD, Int64Ty);
9480   RD->completeDefinition();
9481   QualType DimTy = C.getRecordType(RD);
9482 
9483   enum { OffsetFD = 0, CountFD, StrideFD };
9484   // We need two index variable here since the size of "Dims" is the same as the
9485   // size of Components, however, the size of offset, count, and stride is equal
9486   // to the size of base declaration that is non-contiguous.
9487   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9488     // Skip emitting ir if dimension size is 1 since it cannot be
9489     // non-contiguous.
9490     if (NonContigInfo.Dims[I] == 1)
9491       continue;
9492     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9493     QualType ArrayTy =
9494         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9495     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9496     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9497       unsigned RevIdx = EE - II - 1;
9498       LValue DimsLVal = CGF.MakeAddrLValue(
9499           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9500       // Offset
9501       LValue OffsetLVal = CGF.EmitLValueForField(
9502           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9503       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9504       // Count
9505       LValue CountLVal = CGF.EmitLValueForField(
9506           DimsLVal, *std::next(RD->field_begin(), CountFD));
9507       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9508       // Stride
9509       LValue StrideLVal = CGF.EmitLValueForField(
9510           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9511       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9512     }
9513     // args[I] = &dims
9514     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9515         DimsAddr, CGM.Int8PtrTy);
9516     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9517         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9518         Info.PointersArray, 0, I);
9519     Address PAddr = Address::deprecated(P, CGF.getPointerAlign());
9520     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9521     ++L;
9522   }
9523 }
9524 
9525 // Try to extract the base declaration from a `this->x` expression if possible.
9526 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9527   if (!E)
9528     return nullptr;
9529 
9530   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9531     if (const MemberExpr *ME =
9532             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9533       return ME->getMemberDecl();
9534   return nullptr;
9535 }
9536 
9537 /// Emit a string constant containing the names of the values mapped to the
9538 /// offloading runtime library.
9539 llvm::Constant *
9540 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9541                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9542 
9543   uint32_t SrcLocStrSize;
9544   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9545     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9546 
9547   SourceLocation Loc;
9548   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9549     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9550       Loc = VD->getLocation();
9551     else
9552       Loc = MapExprs.getMapExpr()->getExprLoc();
9553   } else {
9554     Loc = MapExprs.getMapDecl()->getLocation();
9555   }
9556 
9557   std::string ExprName;
9558   if (MapExprs.getMapExpr()) {
9559     PrintingPolicy P(CGF.getContext().getLangOpts());
9560     llvm::raw_string_ostream OS(ExprName);
9561     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9562     OS.flush();
9563   } else {
9564     ExprName = MapExprs.getMapDecl()->getNameAsString();
9565   }
9566 
9567   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9568   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9569                                          PLoc.getLine(), PLoc.getColumn(),
9570                                          SrcLocStrSize);
9571 }
9572 
9573 /// Emit the arrays used to pass the captures and map information to the
9574 /// offloading runtime library. If there is no map or capture information,
9575 /// return nullptr by reference.
9576 static void emitOffloadingArrays(
9577     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9578     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9579     bool IsNonContiguous = false) {
9580   CodeGenModule &CGM = CGF.CGM;
9581   ASTContext &Ctx = CGF.getContext();
9582 
9583   // Reset the array information.
9584   Info.clearArrayInfo();
9585   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9586 
9587   if (Info.NumberOfPtrs) {
9588     // Detect if we have any capture size requiring runtime evaluation of the
9589     // size so that a constant array could be eventually used.
9590     bool hasRuntimeEvaluationCaptureSize = false;
9591     for (llvm::Value *S : CombinedInfo.Sizes)
9592       if (!isa<llvm::Constant>(S)) {
9593         hasRuntimeEvaluationCaptureSize = true;
9594         break;
9595       }
9596 
9597     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9598     QualType PointerArrayType = Ctx.getConstantArrayType(
9599         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9600         /*IndexTypeQuals=*/0);
9601 
9602     Info.BasePointersArray =
9603         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9604     Info.PointersArray =
9605         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9606     Address MappersArray =
9607         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9608     Info.MappersArray = MappersArray.getPointer();
9609 
9610     // If we don't have any VLA types or other types that require runtime
9611     // evaluation, we can use a constant array for the map sizes, otherwise we
9612     // need to fill up the arrays as we do for the pointers.
9613     QualType Int64Ty =
9614         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9615     if (hasRuntimeEvaluationCaptureSize) {
9616       QualType SizeArrayType = Ctx.getConstantArrayType(
9617           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9618           /*IndexTypeQuals=*/0);
9619       Info.SizesArray =
9620           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9621     } else {
9622       // We expect all the sizes to be constant, so we collect them to create
9623       // a constant array.
9624       SmallVector<llvm::Constant *, 16> ConstSizes;
9625       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9626         if (IsNonContiguous &&
9627             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9628           ConstSizes.push_back(llvm::ConstantInt::get(
9629               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9630         } else {
9631           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9632         }
9633       }
9634 
9635       auto *SizesArrayInit = llvm::ConstantArray::get(
9636           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9637       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9638       auto *SizesArrayGbl = new llvm::GlobalVariable(
9639           CGM.getModule(), SizesArrayInit->getType(),
9640           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9641           SizesArrayInit, Name);
9642       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9643       Info.SizesArray = SizesArrayGbl;
9644     }
9645 
9646     // The map types are always constant so we don't need to generate code to
9647     // fill arrays. Instead, we create an array constant.
9648     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9649     llvm::copy(CombinedInfo.Types, Mapping.begin());
9650     std::string MaptypesName =
9651         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9652     auto *MapTypesArrayGbl =
9653         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9654     Info.MapTypesArray = MapTypesArrayGbl;
9655 
9656     // The information types are only built if there is debug information
9657     // requested.
9658     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9659       Info.MapNamesArray = llvm::Constant::getNullValue(
9660           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9661     } else {
9662       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9663         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9664       };
9665       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9666       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9667       std::string MapnamesName =
9668           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9669       auto *MapNamesArrayGbl =
9670           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9671       Info.MapNamesArray = MapNamesArrayGbl;
9672     }
9673 
9674     // If there's a present map type modifier, it must not be applied to the end
9675     // of a region, so generate a separate map type array in that case.
9676     if (Info.separateBeginEndCalls()) {
9677       bool EndMapTypesDiffer = false;
9678       for (uint64_t &Type : Mapping) {
9679         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9680           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9681           EndMapTypesDiffer = true;
9682         }
9683       }
9684       if (EndMapTypesDiffer) {
9685         MapTypesArrayGbl =
9686             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9687         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9688       }
9689     }
9690 
9691     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9692       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9693       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9694           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9695           Info.BasePointersArray, 0, I);
9696       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9697           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9698       Address BPAddr =
9699           Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9700       CGF.Builder.CreateStore(BPVal, BPAddr);
9701 
9702       if (Info.requiresDevicePointerInfo())
9703         if (const ValueDecl *DevVD =
9704                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9705           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9706 
9707       llvm::Value *PVal = CombinedInfo.Pointers[I];
9708       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9709           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9710           Info.PointersArray, 0, I);
9711       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9712           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9713       Address PAddr =
9714           Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9715       CGF.Builder.CreateStore(PVal, PAddr);
9716 
9717       if (hasRuntimeEvaluationCaptureSize) {
9718         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9719             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9720             Info.SizesArray,
9721             /*Idx0=*/0,
9722             /*Idx1=*/I);
9723         Address SAddr =
9724             Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty));
9725         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9726                                                           CGM.Int64Ty,
9727                                                           /*isSigned=*/true),
9728                                 SAddr);
9729       }
9730 
9731       // Fill up the mapper array.
9732       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9733       if (CombinedInfo.Mappers[I]) {
9734         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9735             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9736         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9737         Info.HasMapper = true;
9738       }
9739       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9740       CGF.Builder.CreateStore(MFunc, MAddr);
9741     }
9742   }
9743 
9744   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9745       Info.NumberOfPtrs == 0)
9746     return;
9747 
9748   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9749 }
9750 
9751 namespace {
9752 /// Additional arguments for emitOffloadingArraysArgument function.
9753 struct ArgumentsOptions {
9754   bool ForEndCall = false;
9755   ArgumentsOptions() = default;
9756   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9757 };
9758 } // namespace
9759 
9760 /// Emit the arguments to be passed to the runtime library based on the
9761 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9762 /// ForEndCall, emit map types to be passed for the end of the region instead of
9763 /// the beginning.
9764 static void emitOffloadingArraysArgument(
9765     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9766     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9767     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9768     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9769     const ArgumentsOptions &Options = ArgumentsOptions()) {
9770   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9771          "expected region end call to runtime only when end call is separate");
9772   CodeGenModule &CGM = CGF.CGM;
9773   if (Info.NumberOfPtrs) {
9774     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9775         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9776         Info.BasePointersArray,
9777         /*Idx0=*/0, /*Idx1=*/0);
9778     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9779         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9780         Info.PointersArray,
9781         /*Idx0=*/0,
9782         /*Idx1=*/0);
9783     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9784         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9785         /*Idx0=*/0, /*Idx1=*/0);
9786     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9787         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9788         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9789                                                     : Info.MapTypesArray,
9790         /*Idx0=*/0,
9791         /*Idx1=*/0);
9792 
9793     // Only emit the mapper information arrays if debug information is
9794     // requested.
9795     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9796       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9797     else
9798       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9799           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9800           Info.MapNamesArray,
9801           /*Idx0=*/0,
9802           /*Idx1=*/0);
9803     // If there is no user-defined mapper, set the mapper array to nullptr to
9804     // avoid an unnecessary data privatization
9805     if (!Info.HasMapper)
9806       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9807     else
9808       MappersArrayArg =
9809           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9810   } else {
9811     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9812     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9813     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9814     MapTypesArrayArg =
9815         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9816     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9817     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9818   }
9819 }
9820 
9821 /// Check for inner distribute directive.
9822 static const OMPExecutableDirective *
9823 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9824   const auto *CS = D.getInnermostCapturedStmt();
9825   const auto *Body =
9826       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9827   const Stmt *ChildStmt =
9828       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9829 
9830   if (const auto *NestedDir =
9831           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9832     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9833     switch (D.getDirectiveKind()) {
9834     case OMPD_target:
9835       if (isOpenMPDistributeDirective(DKind))
9836         return NestedDir;
9837       if (DKind == OMPD_teams) {
9838         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9839             /*IgnoreCaptured=*/true);
9840         if (!Body)
9841           return nullptr;
9842         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9843         if (const auto *NND =
9844                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9845           DKind = NND->getDirectiveKind();
9846           if (isOpenMPDistributeDirective(DKind))
9847             return NND;
9848         }
9849       }
9850       return nullptr;
9851     case OMPD_target_teams:
9852       if (isOpenMPDistributeDirective(DKind))
9853         return NestedDir;
9854       return nullptr;
9855     case OMPD_target_parallel:
9856     case OMPD_target_simd:
9857     case OMPD_target_parallel_for:
9858     case OMPD_target_parallel_for_simd:
9859       return nullptr;
9860     case OMPD_target_teams_distribute:
9861     case OMPD_target_teams_distribute_simd:
9862     case OMPD_target_teams_distribute_parallel_for:
9863     case OMPD_target_teams_distribute_parallel_for_simd:
9864     case OMPD_parallel:
9865     case OMPD_for:
9866     case OMPD_parallel_for:
9867     case OMPD_parallel_master:
9868     case OMPD_parallel_sections:
9869     case OMPD_for_simd:
9870     case OMPD_parallel_for_simd:
9871     case OMPD_cancel:
9872     case OMPD_cancellation_point:
9873     case OMPD_ordered:
9874     case OMPD_threadprivate:
9875     case OMPD_allocate:
9876     case OMPD_task:
9877     case OMPD_simd:
9878     case OMPD_tile:
9879     case OMPD_unroll:
9880     case OMPD_sections:
9881     case OMPD_section:
9882     case OMPD_single:
9883     case OMPD_master:
9884     case OMPD_critical:
9885     case OMPD_taskyield:
9886     case OMPD_barrier:
9887     case OMPD_taskwait:
9888     case OMPD_taskgroup:
9889     case OMPD_atomic:
9890     case OMPD_flush:
9891     case OMPD_depobj:
9892     case OMPD_scan:
9893     case OMPD_teams:
9894     case OMPD_target_data:
9895     case OMPD_target_exit_data:
9896     case OMPD_target_enter_data:
9897     case OMPD_distribute:
9898     case OMPD_distribute_simd:
9899     case OMPD_distribute_parallel_for:
9900     case OMPD_distribute_parallel_for_simd:
9901     case OMPD_teams_distribute:
9902     case OMPD_teams_distribute_simd:
9903     case OMPD_teams_distribute_parallel_for:
9904     case OMPD_teams_distribute_parallel_for_simd:
9905     case OMPD_target_update:
9906     case OMPD_declare_simd:
9907     case OMPD_declare_variant:
9908     case OMPD_begin_declare_variant:
9909     case OMPD_end_declare_variant:
9910     case OMPD_declare_target:
9911     case OMPD_end_declare_target:
9912     case OMPD_declare_reduction:
9913     case OMPD_declare_mapper:
9914     case OMPD_taskloop:
9915     case OMPD_taskloop_simd:
9916     case OMPD_master_taskloop:
9917     case OMPD_master_taskloop_simd:
9918     case OMPD_parallel_master_taskloop:
9919     case OMPD_parallel_master_taskloop_simd:
9920     case OMPD_requires:
9921     case OMPD_metadirective:
9922     case OMPD_unknown:
9923     default:
9924       llvm_unreachable("Unexpected directive.");
9925     }
9926   }
9927 
9928   return nullptr;
9929 }
9930 
9931 /// Emit the user-defined mapper function. The code generation follows the
9932 /// pattern in the example below.
9933 /// \code
9934 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9935 ///                                           void *base, void *begin,
9936 ///                                           int64_t size, int64_t type,
9937 ///                                           void *name = nullptr) {
9938 ///   // Allocate space for an array section first or add a base/begin for
9939 ///   // pointer dereference.
9940 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9941 ///       !maptype.IsDelete)
9942 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9943 ///                                 size*sizeof(Ty), clearToFromMember(type));
9944 ///   // Map members.
9945 ///   for (unsigned i = 0; i < size; i++) {
9946 ///     // For each component specified by this mapper:
9947 ///     for (auto c : begin[i]->all_components) {
9948 ///       if (c.hasMapper())
9949 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9950 ///                       c.arg_type, c.arg_name);
9951 ///       else
9952 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9953 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9954 ///                                     c.arg_name);
9955 ///     }
9956 ///   }
9957 ///   // Delete the array section.
9958 ///   if (size > 1 && maptype.IsDelete)
9959 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9960 ///                                 size*sizeof(Ty), clearToFromMember(type));
9961 /// }
9962 /// \endcode
9963 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9964                                             CodeGenFunction *CGF) {
9965   if (UDMMap.count(D) > 0)
9966     return;
9967   ASTContext &C = CGM.getContext();
9968   QualType Ty = D->getType();
9969   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9970   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9971   auto *MapperVarDecl =
9972       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9973   SourceLocation Loc = D->getLocation();
9974   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9975 
9976   // Prepare mapper function arguments and attributes.
9977   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9978                               C.VoidPtrTy, ImplicitParamDecl::Other);
9979   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9980                             ImplicitParamDecl::Other);
9981   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9982                              C.VoidPtrTy, ImplicitParamDecl::Other);
9983   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9984                             ImplicitParamDecl::Other);
9985   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9986                             ImplicitParamDecl::Other);
9987   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9988                             ImplicitParamDecl::Other);
9989   FunctionArgList Args;
9990   Args.push_back(&HandleArg);
9991   Args.push_back(&BaseArg);
9992   Args.push_back(&BeginArg);
9993   Args.push_back(&SizeArg);
9994   Args.push_back(&TypeArg);
9995   Args.push_back(&NameArg);
9996   const CGFunctionInfo &FnInfo =
9997       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9998   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9999   SmallString<64> TyStr;
10000   llvm::raw_svector_ostream Out(TyStr);
10001   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
10002   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10003   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
10004                                     Name, &CGM.getModule());
10005   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
10006   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
10007   // Start the mapper function code generation.
10008   CodeGenFunction MapperCGF(CGM);
10009   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10010   // Compute the starting and end addresses of array elements.
10011   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10012       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10013       C.getPointerType(Int64Ty), Loc);
10014   // Prepare common arguments for array initiation and deletion.
10015   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10016       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10017       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10018   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10019       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10020       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10021   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10022       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10023       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10024   // Convert the size in bytes into the number of array elements.
10025   Size = MapperCGF.Builder.CreateExactUDiv(
10026       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10027   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10028       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10029   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10030       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10031   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10032       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10033       C.getPointerType(Int64Ty), Loc);
10034   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10035       MapperCGF.GetAddrOfLocalVar(&NameArg),
10036       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10037 
10038   // Emit array initiation if this is an array section and \p MapType indicates
10039   // that memory allocation is required.
10040   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10041   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10042                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10043 
10044   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10045 
10046   // Emit the loop header block.
10047   MapperCGF.EmitBlock(HeadBB);
10048   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10049   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10050   // Evaluate whether the initial condition is satisfied.
10051   llvm::Value *IsEmpty =
10052       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10053   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10054   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10055 
10056   // Emit the loop body block.
10057   MapperCGF.EmitBlock(BodyBB);
10058   llvm::BasicBlock *LastBB = BodyBB;
10059   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10060       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10061   PtrPHI->addIncoming(PtrBegin, EntryBB);
10062   Address PtrCurrent =
10063       Address::deprecated(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10064                                       .getAlignment()
10065                                       .alignmentOfArrayElement(ElementSize));
10066   // Privatize the declared variable of mapper to be the current array element.
10067   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10068   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10069   (void)Scope.Privatize();
10070 
10071   // Get map clause information. Fill up the arrays with all mapped variables.
10072   MappableExprsHandler::MapCombinedInfoTy Info;
10073   MappableExprsHandler MEHandler(*D, MapperCGF);
10074   MEHandler.generateAllInfoForMapper(Info);
10075 
10076   // Call the runtime API __tgt_mapper_num_components to get the number of
10077   // pre-existing components.
10078   llvm::Value *OffloadingArgs[] = {Handle};
10079   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10080       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10081                                             OMPRTL___tgt_mapper_num_components),
10082       OffloadingArgs);
10083   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10084       PreviousSize,
10085       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10086 
10087   // Fill up the runtime mapper handle for all components.
10088   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10089     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10090         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10091     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10092         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10093     llvm::Value *CurSizeArg = Info.Sizes[I];
10094     llvm::Value *CurNameArg =
10095         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10096             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10097             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10098 
10099     // Extract the MEMBER_OF field from the map type.
10100     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10101     llvm::Value *MemberMapType =
10102         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10103 
10104     // Combine the map type inherited from user-defined mapper with that
10105     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10106     // bits of the \a MapType, which is the input argument of the mapper
10107     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10108     // bits of MemberMapType.
10109     // [OpenMP 5.0], 1.2.6. map-type decay.
10110     //        | alloc |  to   | from  | tofrom | release | delete
10111     // ----------------------------------------------------------
10112     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10113     // to     | alloc |  to   | alloc |   to   | release | delete
10114     // from   | alloc | alloc | from  |  from  | release | delete
10115     // tofrom | alloc |  to   | from  | tofrom | release | delete
10116     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10117         MapType,
10118         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10119                                    MappableExprsHandler::OMP_MAP_FROM));
10120     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10121     llvm::BasicBlock *AllocElseBB =
10122         MapperCGF.createBasicBlock("omp.type.alloc.else");
10123     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10124     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10125     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10126     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10127     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10128     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10129     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10130     MapperCGF.EmitBlock(AllocBB);
10131     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10132         MemberMapType,
10133         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10134                                      MappableExprsHandler::OMP_MAP_FROM)));
10135     MapperCGF.Builder.CreateBr(EndBB);
10136     MapperCGF.EmitBlock(AllocElseBB);
10137     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10138         LeftToFrom,
10139         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10140     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10141     // In case of to, clear OMP_MAP_FROM.
10142     MapperCGF.EmitBlock(ToBB);
10143     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10144         MemberMapType,
10145         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10146     MapperCGF.Builder.CreateBr(EndBB);
10147     MapperCGF.EmitBlock(ToElseBB);
10148     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10149         LeftToFrom,
10150         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10151     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10152     // In case of from, clear OMP_MAP_TO.
10153     MapperCGF.EmitBlock(FromBB);
10154     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10155         MemberMapType,
10156         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10157     // In case of tofrom, do nothing.
10158     MapperCGF.EmitBlock(EndBB);
10159     LastBB = EndBB;
10160     llvm::PHINode *CurMapType =
10161         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10162     CurMapType->addIncoming(AllocMapType, AllocBB);
10163     CurMapType->addIncoming(ToMapType, ToBB);
10164     CurMapType->addIncoming(FromMapType, FromBB);
10165     CurMapType->addIncoming(MemberMapType, ToElseBB);
10166 
10167     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10168                                      CurSizeArg, CurMapType, CurNameArg};
10169     if (Info.Mappers[I]) {
10170       // Call the corresponding mapper function.
10171       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10172           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10173       assert(MapperFunc && "Expect a valid mapper function is available.");
10174       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10175     } else {
10176       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10177       // data structure.
10178       MapperCGF.EmitRuntimeCall(
10179           OMPBuilder.getOrCreateRuntimeFunction(
10180               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10181           OffloadingArgs);
10182     }
10183   }
10184 
10185   // Update the pointer to point to the next element that needs to be mapped,
10186   // and check whether we have mapped all elements.
10187   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10188   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10189       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10190   PtrPHI->addIncoming(PtrNext, LastBB);
10191   llvm::Value *IsDone =
10192       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10193   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10194   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10195 
10196   MapperCGF.EmitBlock(ExitBB);
10197   // Emit array deletion if this is an array section and \p MapType indicates
10198   // that deletion is required.
10199   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10200                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10201 
10202   // Emit the function exit block.
10203   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10204   MapperCGF.FinishFunction();
10205   UDMMap.try_emplace(D, Fn);
10206   if (CGF) {
10207     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10208     Decls.second.push_back(D);
10209   }
10210 }
10211 
10212 /// Emit the array initialization or deletion portion for user-defined mapper
10213 /// code generation. First, it evaluates whether an array section is mapped and
10214 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10215 /// true, and \a MapType indicates to not delete this array, array
10216 /// initialization code is generated. If \a IsInit is false, and \a MapType
10217 /// indicates to not this array, array deletion code is generated.
10218 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10219     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10220     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10221     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10222     bool IsInit) {
10223   StringRef Prefix = IsInit ? ".init" : ".del";
10224 
10225   // Evaluate if this is an array section.
10226   llvm::BasicBlock *BodyBB =
10227       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10228   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10229       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10230   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10231       MapType,
10232       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10233   llvm::Value *DeleteCond;
10234   llvm::Value *Cond;
10235   if (IsInit) {
10236     // base != begin?
10237     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10238     // IsPtrAndObj?
10239     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10240         MapType,
10241         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10242     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10243     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10244     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10245     DeleteCond = MapperCGF.Builder.CreateIsNull(
10246         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10247   } else {
10248     Cond = IsArray;
10249     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10250         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10251   }
10252   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10253   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10254 
10255   MapperCGF.EmitBlock(BodyBB);
10256   // Get the array size by multiplying element size and element number (i.e., \p
10257   // Size).
10258   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10259       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10260   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10261   // memory allocation/deletion purpose only.
10262   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10263       MapType,
10264       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10265                                    MappableExprsHandler::OMP_MAP_FROM)));
10266   MapTypeArg = MapperCGF.Builder.CreateOr(
10267       MapTypeArg,
10268       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10269 
10270   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10271   // data structure.
10272   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10273                                    ArraySize, MapTypeArg, MapName};
10274   MapperCGF.EmitRuntimeCall(
10275       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10276                                             OMPRTL___tgt_push_mapper_component),
10277       OffloadingArgs);
10278 }
10279 
10280 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10281     const OMPDeclareMapperDecl *D) {
10282   auto I = UDMMap.find(D);
10283   if (I != UDMMap.end())
10284     return I->second;
10285   emitUserDefinedMapper(D);
10286   return UDMMap.lookup(D);
10287 }
10288 
10289 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10290     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10291     llvm::Value *DeviceID,
10292     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10293                                      const OMPLoopDirective &D)>
10294         SizeEmitter) {
10295   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10296   const OMPExecutableDirective *TD = &D;
10297   // Get nested teams distribute kind directive, if any.
10298   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10299     TD = getNestedDistributeDirective(CGM.getContext(), D);
10300   if (!TD)
10301     return;
10302   const auto *LD = cast<OMPLoopDirective>(TD);
10303   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10304                                                          PrePostActionTy &) {
10305     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10306       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10307       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10308       CGF.EmitRuntimeCall(
10309           OMPBuilder.getOrCreateRuntimeFunction(
10310               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10311           Args);
10312     }
10313   };
10314   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10315 }
10316 
10317 void CGOpenMPRuntime::emitTargetCall(
10318     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10319     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10320     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10321     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10322                                      const OMPLoopDirective &D)>
10323         SizeEmitter) {
10324   if (!CGF.HaveInsertPoint())
10325     return;
10326 
10327   assert(OutlinedFn && "Invalid outlined function!");
10328 
10329   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10330                                  D.hasClausesOfKind<OMPNowaitClause>();
10331   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10332   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10333   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10334                                             PrePostActionTy &) {
10335     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10336   };
10337   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10338 
10339   CodeGenFunction::OMPTargetDataInfo InputInfo;
10340   llvm::Value *MapTypesArray = nullptr;
10341   llvm::Value *MapNamesArray = nullptr;
10342   // Fill up the pointer arrays and transfer execution to the device.
10343   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10344                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10345                     &CapturedVars,
10346                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10347     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10348       // Reverse offloading is not supported, so just execute on the host.
10349       if (RequiresOuterTask) {
10350         CapturedVars.clear();
10351         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10352       }
10353       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10354       return;
10355     }
10356 
10357     // On top of the arrays that were filled up, the target offloading call
10358     // takes as arguments the device id as well as the host pointer. The host
10359     // pointer is used by the runtime library to identify the current target
10360     // region, so it only has to be unique and not necessarily point to
10361     // anything. It could be the pointer to the outlined function that
10362     // implements the target region, but we aren't using that so that the
10363     // compiler doesn't need to keep that, and could therefore inline the host
10364     // function if proven worthwhile during optimization.
10365 
10366     // From this point on, we need to have an ID of the target region defined.
10367     assert(OutlinedFnID && "Invalid outlined function ID!");
10368 
10369     // Emit device ID if any.
10370     llvm::Value *DeviceID;
10371     if (Device.getPointer()) {
10372       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10373               Device.getInt() == OMPC_DEVICE_device_num) &&
10374              "Expected device_num modifier.");
10375       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10376       DeviceID =
10377           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10378     } else {
10379       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10380     }
10381 
10382     // Emit the number of elements in the offloading arrays.
10383     llvm::Value *PointerNum =
10384         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10385 
10386     // Return value of the runtime offloading call.
10387     llvm::Value *Return;
10388 
10389     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10390     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10391 
10392     // Source location for the ident struct
10393     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10394 
10395     // Emit tripcount for the target loop-based directive.
10396     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10397 
10398     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10399     // The target region is an outlined function launched by the runtime
10400     // via calls __tgt_target() or __tgt_target_teams().
10401     //
10402     // __tgt_target() launches a target region with one team and one thread,
10403     // executing a serial region.  This master thread may in turn launch
10404     // more threads within its team upon encountering a parallel region,
10405     // however, no additional teams can be launched on the device.
10406     //
10407     // __tgt_target_teams() launches a target region with one or more teams,
10408     // each with one or more threads.  This call is required for target
10409     // constructs such as:
10410     //  'target teams'
10411     //  'target' / 'teams'
10412     //  'target teams distribute parallel for'
10413     //  'target parallel'
10414     // and so on.
10415     //
10416     // Note that on the host and CPU targets, the runtime implementation of
10417     // these calls simply call the outlined function without forking threads.
10418     // The outlined functions themselves have runtime calls to
10419     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10420     // the compiler in emitTeamsCall() and emitParallelCall().
10421     //
10422     // In contrast, on the NVPTX target, the implementation of
10423     // __tgt_target_teams() launches a GPU kernel with the requested number
10424     // of teams and threads so no additional calls to the runtime are required.
10425     if (NumTeams) {
10426       // If we have NumTeams defined this means that we have an enclosed teams
10427       // region. Therefore we also expect to have NumThreads defined. These two
10428       // values should be defined in the presence of a teams directive,
10429       // regardless of having any clauses associated. If the user is using teams
10430       // but no clauses, these two values will be the default that should be
10431       // passed to the runtime library - a 32-bit integer with the value zero.
10432       assert(NumThreads && "Thread limit expression should be available along "
10433                            "with number of teams.");
10434       SmallVector<llvm::Value *> OffloadingArgs = {
10435           RTLoc,
10436           DeviceID,
10437           OutlinedFnID,
10438           PointerNum,
10439           InputInfo.BasePointersArray.getPointer(),
10440           InputInfo.PointersArray.getPointer(),
10441           InputInfo.SizesArray.getPointer(),
10442           MapTypesArray,
10443           MapNamesArray,
10444           InputInfo.MappersArray.getPointer(),
10445           NumTeams,
10446           NumThreads};
10447       if (HasNowait) {
10448         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10449         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10450         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10451         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10452         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10453         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10454       }
10455       Return = CGF.EmitRuntimeCall(
10456           OMPBuilder.getOrCreateRuntimeFunction(
10457               CGM.getModule(), HasNowait
10458                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10459                                    : OMPRTL___tgt_target_teams_mapper),
10460           OffloadingArgs);
10461     } else {
10462       SmallVector<llvm::Value *> OffloadingArgs = {
10463           RTLoc,
10464           DeviceID,
10465           OutlinedFnID,
10466           PointerNum,
10467           InputInfo.BasePointersArray.getPointer(),
10468           InputInfo.PointersArray.getPointer(),
10469           InputInfo.SizesArray.getPointer(),
10470           MapTypesArray,
10471           MapNamesArray,
10472           InputInfo.MappersArray.getPointer()};
10473       if (HasNowait) {
10474         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10475         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10476         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10477         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10478         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10479         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10480       }
10481       Return = CGF.EmitRuntimeCall(
10482           OMPBuilder.getOrCreateRuntimeFunction(
10483               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10484                                          : OMPRTL___tgt_target_mapper),
10485           OffloadingArgs);
10486     }
10487 
10488     // Check the error code and execute the host version if required.
10489     llvm::BasicBlock *OffloadFailedBlock =
10490         CGF.createBasicBlock("omp_offload.failed");
10491     llvm::BasicBlock *OffloadContBlock =
10492         CGF.createBasicBlock("omp_offload.cont");
10493     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10494     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10495 
10496     CGF.EmitBlock(OffloadFailedBlock);
10497     if (RequiresOuterTask) {
10498       CapturedVars.clear();
10499       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10500     }
10501     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10502     CGF.EmitBranch(OffloadContBlock);
10503 
10504     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10505   };
10506 
10507   // Notify that the host version must be executed.
10508   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10509                     RequiresOuterTask](CodeGenFunction &CGF,
10510                                        PrePostActionTy &) {
10511     if (RequiresOuterTask) {
10512       CapturedVars.clear();
10513       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10514     }
10515     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10516   };
10517 
10518   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10519                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10520                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10521     // Fill up the arrays with all the captured variables.
10522     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10523 
10524     // Get mappable expression information.
10525     MappableExprsHandler MEHandler(D, CGF);
10526     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10527     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10528 
10529     auto RI = CS.getCapturedRecordDecl()->field_begin();
10530     auto *CV = CapturedVars.begin();
10531     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10532                                               CE = CS.capture_end();
10533          CI != CE; ++CI, ++RI, ++CV) {
10534       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10535       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10536 
10537       // VLA sizes are passed to the outlined region by copy and do not have map
10538       // information associated.
10539       if (CI->capturesVariableArrayType()) {
10540         CurInfo.Exprs.push_back(nullptr);
10541         CurInfo.BasePointers.push_back(*CV);
10542         CurInfo.Pointers.push_back(*CV);
10543         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10544             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10545         // Copy to the device as an argument. No need to retrieve it.
10546         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10547                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10548                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10549         CurInfo.Mappers.push_back(nullptr);
10550       } else {
10551         // If we have any information in the map clause, we use it, otherwise we
10552         // just do a default mapping.
10553         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10554         if (!CI->capturesThis())
10555           MappedVarSet.insert(CI->getCapturedVar());
10556         else
10557           MappedVarSet.insert(nullptr);
10558         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10559           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10560         // Generate correct mapping for variables captured by reference in
10561         // lambdas.
10562         if (CI->capturesVariable())
10563           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10564                                                   CurInfo, LambdaPointers);
10565       }
10566       // We expect to have at least an element of information for this capture.
10567       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10568              "Non-existing map pointer for capture!");
10569       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10570              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10571              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10572              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10573              "Inconsistent map information sizes!");
10574 
10575       // If there is an entry in PartialStruct it means we have a struct with
10576       // individual members mapped. Emit an extra combined entry.
10577       if (PartialStruct.Base.isValid()) {
10578         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10579         MEHandler.emitCombinedEntry(
10580             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10581             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10582       }
10583 
10584       // We need to append the results of this capture to what we already have.
10585       CombinedInfo.append(CurInfo);
10586     }
10587     // Adjust MEMBER_OF flags for the lambdas captures.
10588     MEHandler.adjustMemberOfForLambdaCaptures(
10589         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10590         CombinedInfo.Types);
10591     // Map any list items in a map clause that were not captures because they
10592     // weren't referenced within the construct.
10593     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10594 
10595     TargetDataInfo Info;
10596     // Fill up the arrays and create the arguments.
10597     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10598     emitOffloadingArraysArgument(
10599         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10600         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10601         {/*ForEndCall=*/false});
10602 
10603     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10604     InputInfo.BasePointersArray =
10605         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
10606     InputInfo.PointersArray =
10607         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
10608     InputInfo.SizesArray =
10609         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
10610     InputInfo.MappersArray =
10611         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
10612     MapTypesArray = Info.MapTypesArray;
10613     MapNamesArray = Info.MapNamesArray;
10614     if (RequiresOuterTask)
10615       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10616     else
10617       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10618   };
10619 
10620   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10621                              CodeGenFunction &CGF, PrePostActionTy &) {
10622     if (RequiresOuterTask) {
10623       CodeGenFunction::OMPTargetDataInfo InputInfo;
10624       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10625     } else {
10626       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10627     }
10628   };
10629 
10630   // If we have a target function ID it means that we need to support
10631   // offloading, otherwise, just execute on the host. We need to execute on host
10632   // regardless of the conditional in the if clause if, e.g., the user do not
10633   // specify target triples.
10634   if (OutlinedFnID) {
10635     if (IfCond) {
10636       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10637     } else {
10638       RegionCodeGenTy ThenRCG(TargetThenGen);
10639       ThenRCG(CGF);
10640     }
10641   } else {
10642     RegionCodeGenTy ElseRCG(TargetElseGen);
10643     ElseRCG(CGF);
10644   }
10645 }
10646 
10647 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10648                                                     StringRef ParentName) {
10649   if (!S)
10650     return;
10651 
10652   // Codegen OMP target directives that offload compute to the device.
10653   bool RequiresDeviceCodegen =
10654       isa<OMPExecutableDirective>(S) &&
10655       isOpenMPTargetExecutionDirective(
10656           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10657 
10658   if (RequiresDeviceCodegen) {
10659     const auto &E = *cast<OMPExecutableDirective>(S);
10660     unsigned DeviceID;
10661     unsigned FileID;
10662     unsigned Line;
10663     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10664                              FileID, Line);
10665 
10666     // Is this a target region that should not be emitted as an entry point? If
10667     // so just signal we are done with this target region.
10668     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10669                                                             ParentName, Line))
10670       return;
10671 
10672     switch (E.getDirectiveKind()) {
10673     case OMPD_target:
10674       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10675                                                    cast<OMPTargetDirective>(E));
10676       break;
10677     case OMPD_target_parallel:
10678       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10679           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10680       break;
10681     case OMPD_target_teams:
10682       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10683           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10684       break;
10685     case OMPD_target_teams_distribute:
10686       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10687           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10688       break;
10689     case OMPD_target_teams_distribute_simd:
10690       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10691           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10692       break;
10693     case OMPD_target_parallel_for:
10694       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10695           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10696       break;
10697     case OMPD_target_parallel_for_simd:
10698       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10699           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10700       break;
10701     case OMPD_target_simd:
10702       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10703           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10704       break;
10705     case OMPD_target_teams_distribute_parallel_for:
10706       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10707           CGM, ParentName,
10708           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10709       break;
10710     case OMPD_target_teams_distribute_parallel_for_simd:
10711       CodeGenFunction::
10712           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10713               CGM, ParentName,
10714               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10715       break;
10716     case OMPD_parallel:
10717     case OMPD_for:
10718     case OMPD_parallel_for:
10719     case OMPD_parallel_master:
10720     case OMPD_parallel_sections:
10721     case OMPD_for_simd:
10722     case OMPD_parallel_for_simd:
10723     case OMPD_cancel:
10724     case OMPD_cancellation_point:
10725     case OMPD_ordered:
10726     case OMPD_threadprivate:
10727     case OMPD_allocate:
10728     case OMPD_task:
10729     case OMPD_simd:
10730     case OMPD_tile:
10731     case OMPD_unroll:
10732     case OMPD_sections:
10733     case OMPD_section:
10734     case OMPD_single:
10735     case OMPD_master:
10736     case OMPD_critical:
10737     case OMPD_taskyield:
10738     case OMPD_barrier:
10739     case OMPD_taskwait:
10740     case OMPD_taskgroup:
10741     case OMPD_atomic:
10742     case OMPD_flush:
10743     case OMPD_depobj:
10744     case OMPD_scan:
10745     case OMPD_teams:
10746     case OMPD_target_data:
10747     case OMPD_target_exit_data:
10748     case OMPD_target_enter_data:
10749     case OMPD_distribute:
10750     case OMPD_distribute_simd:
10751     case OMPD_distribute_parallel_for:
10752     case OMPD_distribute_parallel_for_simd:
10753     case OMPD_teams_distribute:
10754     case OMPD_teams_distribute_simd:
10755     case OMPD_teams_distribute_parallel_for:
10756     case OMPD_teams_distribute_parallel_for_simd:
10757     case OMPD_target_update:
10758     case OMPD_declare_simd:
10759     case OMPD_declare_variant:
10760     case OMPD_begin_declare_variant:
10761     case OMPD_end_declare_variant:
10762     case OMPD_declare_target:
10763     case OMPD_end_declare_target:
10764     case OMPD_declare_reduction:
10765     case OMPD_declare_mapper:
10766     case OMPD_taskloop:
10767     case OMPD_taskloop_simd:
10768     case OMPD_master_taskloop:
10769     case OMPD_master_taskloop_simd:
10770     case OMPD_parallel_master_taskloop:
10771     case OMPD_parallel_master_taskloop_simd:
10772     case OMPD_requires:
10773     case OMPD_metadirective:
10774     case OMPD_unknown:
10775     default:
10776       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10777     }
10778     return;
10779   }
10780 
10781   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10782     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10783       return;
10784 
10785     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10786     return;
10787   }
10788 
10789   // If this is a lambda function, look into its body.
10790   if (const auto *L = dyn_cast<LambdaExpr>(S))
10791     S = L->getBody();
10792 
10793   // Keep looking for target regions recursively.
10794   for (const Stmt *II : S->children())
10795     scanForTargetRegionsFunctions(II, ParentName);
10796 }
10797 
10798 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10799   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10800       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10801   if (!DevTy)
10802     return false;
10803   // Do not emit device_type(nohost) functions for the host.
10804   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10805     return true;
10806   // Do not emit device_type(host) functions for the device.
10807   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10808     return true;
10809   return false;
10810 }
10811 
10812 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10813   // If emitting code for the host, we do not process FD here. Instead we do
10814   // the normal code generation.
10815   if (!CGM.getLangOpts().OpenMPIsDevice) {
10816     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10817       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10818                                   CGM.getLangOpts().OpenMPIsDevice))
10819         return true;
10820     return false;
10821   }
10822 
10823   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10824   // Try to detect target regions in the function.
10825   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10826     StringRef Name = CGM.getMangledName(GD);
10827     scanForTargetRegionsFunctions(FD->getBody(), Name);
10828     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10829                                 CGM.getLangOpts().OpenMPIsDevice))
10830       return true;
10831   }
10832 
10833   // Do not to emit function if it is not marked as declare target.
10834   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10835          AlreadyEmittedTargetDecls.count(VD) == 0;
10836 }
10837 
10838 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10839   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10840                               CGM.getLangOpts().OpenMPIsDevice))
10841     return true;
10842 
10843   if (!CGM.getLangOpts().OpenMPIsDevice)
10844     return false;
10845 
10846   // Check if there are Ctors/Dtors in this declaration and look for target
10847   // regions in it. We use the complete variant to produce the kernel name
10848   // mangling.
10849   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10850   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10851     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10852       StringRef ParentName =
10853           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10854       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10855     }
10856     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10857       StringRef ParentName =
10858           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10859       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10860     }
10861   }
10862 
10863   // Do not to emit variable if it is not marked as declare target.
10864   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10865       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10866           cast<VarDecl>(GD.getDecl()));
10867   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10868       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10869        HasRequiresUnifiedSharedMemory)) {
10870     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10871     return true;
10872   }
10873   return false;
10874 }
10875 
10876 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10877                                                    llvm::Constant *Addr) {
10878   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10879       !CGM.getLangOpts().OpenMPIsDevice)
10880     return;
10881 
10882   // If we have host/nohost variables, they do not need to be registered.
10883   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10884       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10885   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10886     return;
10887 
10888   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10889       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10890   if (!Res) {
10891     if (CGM.getLangOpts().OpenMPIsDevice) {
10892       // Register non-target variables being emitted in device code (debug info
10893       // may cause this).
10894       StringRef VarName = CGM.getMangledName(VD);
10895       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10896     }
10897     return;
10898   }
10899   // Register declare target variables.
10900   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10901   StringRef VarName;
10902   CharUnits VarSize;
10903   llvm::GlobalValue::LinkageTypes Linkage;
10904 
10905   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10906       !HasRequiresUnifiedSharedMemory) {
10907     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10908     VarName = CGM.getMangledName(VD);
10909     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10910       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10911       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10912     } else {
10913       VarSize = CharUnits::Zero();
10914     }
10915     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10916     // Temp solution to prevent optimizations of the internal variables.
10917     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10918       // Do not create a "ref-variable" if the original is not also available
10919       // on the host.
10920       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10921         return;
10922       std::string RefName = getName({VarName, "ref"});
10923       if (!CGM.GetGlobalValue(RefName)) {
10924         llvm::Constant *AddrRef =
10925             getOrCreateInternalVariable(Addr->getType(), RefName);
10926         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10927         GVAddrRef->setConstant(/*Val=*/true);
10928         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10929         GVAddrRef->setInitializer(Addr);
10930         CGM.addCompilerUsedGlobal(GVAddrRef);
10931       }
10932     }
10933   } else {
10934     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10935             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10936              HasRequiresUnifiedSharedMemory)) &&
10937            "Declare target attribute must link or to with unified memory.");
10938     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10939       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10940     else
10941       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10942 
10943     if (CGM.getLangOpts().OpenMPIsDevice) {
10944       VarName = Addr->getName();
10945       Addr = nullptr;
10946     } else {
10947       VarName = getAddrOfDeclareTargetVar(VD).getName();
10948       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10949     }
10950     VarSize = CGM.getPointerSize();
10951     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10952   }
10953 
10954   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10955       VarName, Addr, VarSize, Flags, Linkage);
10956 }
10957 
10958 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10959   if (isa<FunctionDecl>(GD.getDecl()) ||
10960       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10961     return emitTargetFunctions(GD);
10962 
10963   return emitTargetGlobalVariable(GD);
10964 }
10965 
10966 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10967   for (const VarDecl *VD : DeferredGlobalVariables) {
10968     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10969         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10970     if (!Res)
10971       continue;
10972     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10973         !HasRequiresUnifiedSharedMemory) {
10974       CGM.EmitGlobal(VD);
10975     } else {
10976       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10977               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10978                HasRequiresUnifiedSharedMemory)) &&
10979              "Expected link clause or to clause with unified memory.");
10980       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10981     }
10982   }
10983 }
10984 
10985 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10986     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10987   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10988          " Expected target-based directive.");
10989 }
10990 
10991 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10992   for (const OMPClause *Clause : D->clauselists()) {
10993     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10994       HasRequiresUnifiedSharedMemory = true;
10995     } else if (const auto *AC =
10996                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10997       switch (AC->getAtomicDefaultMemOrderKind()) {
10998       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10999         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11000         break;
11001       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11002         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11003         break;
11004       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11005         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11006         break;
11007       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11008         break;
11009       }
11010     }
11011   }
11012 }
11013 
11014 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11015   return RequiresAtomicOrdering;
11016 }
11017 
11018 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11019                                                        LangAS &AS) {
11020   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11021     return false;
11022   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11023   switch(A->getAllocatorType()) {
11024   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11025   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11026   // Not supported, fallback to the default mem space.
11027   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11028   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11029   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11030   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11031   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11032   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11033   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11034     AS = LangAS::Default;
11035     return true;
11036   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11037     llvm_unreachable("Expected predefined allocator for the variables with the "
11038                      "static storage.");
11039   }
11040   return false;
11041 }
11042 
11043 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11044   return HasRequiresUnifiedSharedMemory;
11045 }
11046 
11047 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11048     CodeGenModule &CGM)
11049     : CGM(CGM) {
11050   if (CGM.getLangOpts().OpenMPIsDevice) {
11051     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11052     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11053   }
11054 }
11055 
11056 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11057   if (CGM.getLangOpts().OpenMPIsDevice)
11058     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11059 }
11060 
11061 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11062   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11063     return true;
11064 
11065   const auto *D = cast<FunctionDecl>(GD.getDecl());
11066   // Do not to emit function if it is marked as declare target as it was already
11067   // emitted.
11068   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11069     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11070       if (auto *F = dyn_cast_or_null<llvm::Function>(
11071               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11072         return !F->isDeclaration();
11073       return false;
11074     }
11075     return true;
11076   }
11077 
11078   return !AlreadyEmittedTargetDecls.insert(D).second;
11079 }
11080 
11081 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11082   // If we don't have entries or if we are emitting code for the device, we
11083   // don't need to do anything.
11084   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11085       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11086       (OffloadEntriesInfoManager.empty() &&
11087        !HasEmittedDeclareTargetRegion &&
11088        !HasEmittedTargetRegion))
11089     return nullptr;
11090 
11091   // Create and register the function that handles the requires directives.
11092   ASTContext &C = CGM.getContext();
11093 
11094   llvm::Function *RequiresRegFn;
11095   {
11096     CodeGenFunction CGF(CGM);
11097     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11098     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11099     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11100     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11101     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11102     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11103     // TODO: check for other requires clauses.
11104     // The requires directive takes effect only when a target region is
11105     // present in the compilation unit. Otherwise it is ignored and not
11106     // passed to the runtime. This avoids the runtime from throwing an error
11107     // for mismatching requires clauses across compilation units that don't
11108     // contain at least 1 target region.
11109     assert((HasEmittedTargetRegion ||
11110             HasEmittedDeclareTargetRegion ||
11111             !OffloadEntriesInfoManager.empty()) &&
11112            "Target or declare target region expected.");
11113     if (HasRequiresUnifiedSharedMemory)
11114       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11115     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11116                             CGM.getModule(), OMPRTL___tgt_register_requires),
11117                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11118     CGF.FinishFunction();
11119   }
11120   return RequiresRegFn;
11121 }
11122 
11123 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11124                                     const OMPExecutableDirective &D,
11125                                     SourceLocation Loc,
11126                                     llvm::Function *OutlinedFn,
11127                                     ArrayRef<llvm::Value *> CapturedVars) {
11128   if (!CGF.HaveInsertPoint())
11129     return;
11130 
11131   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11132   CodeGenFunction::RunCleanupsScope Scope(CGF);
11133 
11134   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11135   llvm::Value *Args[] = {
11136       RTLoc,
11137       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11138       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11139   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11140   RealArgs.append(std::begin(Args), std::end(Args));
11141   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11142 
11143   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11144       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11145   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11146 }
11147 
11148 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11149                                          const Expr *NumTeams,
11150                                          const Expr *ThreadLimit,
11151                                          SourceLocation Loc) {
11152   if (!CGF.HaveInsertPoint())
11153     return;
11154 
11155   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11156 
11157   llvm::Value *NumTeamsVal =
11158       NumTeams
11159           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11160                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11161           : CGF.Builder.getInt32(0);
11162 
11163   llvm::Value *ThreadLimitVal =
11164       ThreadLimit
11165           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11166                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11167           : CGF.Builder.getInt32(0);
11168 
11169   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11170   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11171                                      ThreadLimitVal};
11172   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11173                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11174                       PushNumTeamsArgs);
11175 }
11176 
11177 void CGOpenMPRuntime::emitTargetDataCalls(
11178     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11179     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11180   if (!CGF.HaveInsertPoint())
11181     return;
11182 
11183   // Action used to replace the default codegen action and turn privatization
11184   // off.
11185   PrePostActionTy NoPrivAction;
11186 
11187   // Generate the code for the opening of the data environment. Capture all the
11188   // arguments of the runtime call by reference because they are used in the
11189   // closing of the region.
11190   auto &&BeginThenGen = [this, &D, Device, &Info,
11191                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11192     // Fill up the arrays with all the mapped variables.
11193     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11194 
11195     // Get map clause information.
11196     MappableExprsHandler MEHandler(D, CGF);
11197     MEHandler.generateAllInfo(CombinedInfo);
11198 
11199     // Fill up the arrays and create the arguments.
11200     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11201                          /*IsNonContiguous=*/true);
11202 
11203     llvm::Value *BasePointersArrayArg = nullptr;
11204     llvm::Value *PointersArrayArg = nullptr;
11205     llvm::Value *SizesArrayArg = nullptr;
11206     llvm::Value *MapTypesArrayArg = nullptr;
11207     llvm::Value *MapNamesArrayArg = nullptr;
11208     llvm::Value *MappersArrayArg = nullptr;
11209     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11210                                  SizesArrayArg, MapTypesArrayArg,
11211                                  MapNamesArrayArg, MappersArrayArg, Info);
11212 
11213     // Emit device ID if any.
11214     llvm::Value *DeviceID = nullptr;
11215     if (Device) {
11216       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11217                                            CGF.Int64Ty, /*isSigned=*/true);
11218     } else {
11219       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11220     }
11221 
11222     // Emit the number of elements in the offloading arrays.
11223     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11224     //
11225     // Source location for the ident struct
11226     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11227 
11228     llvm::Value *OffloadingArgs[] = {RTLoc,
11229                                      DeviceID,
11230                                      PointerNum,
11231                                      BasePointersArrayArg,
11232                                      PointersArrayArg,
11233                                      SizesArrayArg,
11234                                      MapTypesArrayArg,
11235                                      MapNamesArrayArg,
11236                                      MappersArrayArg};
11237     CGF.EmitRuntimeCall(
11238         OMPBuilder.getOrCreateRuntimeFunction(
11239             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11240         OffloadingArgs);
11241 
11242     // If device pointer privatization is required, emit the body of the region
11243     // here. It will have to be duplicated: with and without privatization.
11244     if (!Info.CaptureDeviceAddrMap.empty())
11245       CodeGen(CGF);
11246   };
11247 
11248   // Generate code for the closing of the data region.
11249   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11250                                                 PrePostActionTy &) {
11251     assert(Info.isValid() && "Invalid data environment closing arguments.");
11252 
11253     llvm::Value *BasePointersArrayArg = nullptr;
11254     llvm::Value *PointersArrayArg = nullptr;
11255     llvm::Value *SizesArrayArg = nullptr;
11256     llvm::Value *MapTypesArrayArg = nullptr;
11257     llvm::Value *MapNamesArrayArg = nullptr;
11258     llvm::Value *MappersArrayArg = nullptr;
11259     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11260                                  SizesArrayArg, MapTypesArrayArg,
11261                                  MapNamesArrayArg, MappersArrayArg, Info,
11262                                  {/*ForEndCall=*/true});
11263 
11264     // Emit device ID if any.
11265     llvm::Value *DeviceID = nullptr;
11266     if (Device) {
11267       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11268                                            CGF.Int64Ty, /*isSigned=*/true);
11269     } else {
11270       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11271     }
11272 
11273     // Emit the number of elements in the offloading arrays.
11274     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11275 
11276     // Source location for the ident struct
11277     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11278 
11279     llvm::Value *OffloadingArgs[] = {RTLoc,
11280                                      DeviceID,
11281                                      PointerNum,
11282                                      BasePointersArrayArg,
11283                                      PointersArrayArg,
11284                                      SizesArrayArg,
11285                                      MapTypesArrayArg,
11286                                      MapNamesArrayArg,
11287                                      MappersArrayArg};
11288     CGF.EmitRuntimeCall(
11289         OMPBuilder.getOrCreateRuntimeFunction(
11290             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11291         OffloadingArgs);
11292   };
11293 
11294   // If we need device pointer privatization, we need to emit the body of the
11295   // region with no privatization in the 'else' branch of the conditional.
11296   // Otherwise, we don't have to do anything.
11297   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11298                                                          PrePostActionTy &) {
11299     if (!Info.CaptureDeviceAddrMap.empty()) {
11300       CodeGen.setAction(NoPrivAction);
11301       CodeGen(CGF);
11302     }
11303   };
11304 
11305   // We don't have to do anything to close the region if the if clause evaluates
11306   // to false.
11307   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11308 
11309   if (IfCond) {
11310     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11311   } else {
11312     RegionCodeGenTy RCG(BeginThenGen);
11313     RCG(CGF);
11314   }
11315 
11316   // If we don't require privatization of device pointers, we emit the body in
11317   // between the runtime calls. This avoids duplicating the body code.
11318   if (Info.CaptureDeviceAddrMap.empty()) {
11319     CodeGen.setAction(NoPrivAction);
11320     CodeGen(CGF);
11321   }
11322 
11323   if (IfCond) {
11324     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11325   } else {
11326     RegionCodeGenTy RCG(EndThenGen);
11327     RCG(CGF);
11328   }
11329 }
11330 
11331 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11332     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11333     const Expr *Device) {
11334   if (!CGF.HaveInsertPoint())
11335     return;
11336 
11337   assert((isa<OMPTargetEnterDataDirective>(D) ||
11338           isa<OMPTargetExitDataDirective>(D) ||
11339           isa<OMPTargetUpdateDirective>(D)) &&
11340          "Expecting either target enter, exit data, or update directives.");
11341 
11342   CodeGenFunction::OMPTargetDataInfo InputInfo;
11343   llvm::Value *MapTypesArray = nullptr;
11344   llvm::Value *MapNamesArray = nullptr;
11345   // Generate the code for the opening of the data environment.
11346   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11347                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11348     // Emit device ID if any.
11349     llvm::Value *DeviceID = nullptr;
11350     if (Device) {
11351       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11352                                            CGF.Int64Ty, /*isSigned=*/true);
11353     } else {
11354       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11355     }
11356 
11357     // Emit the number of elements in the offloading arrays.
11358     llvm::Constant *PointerNum =
11359         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11360 
11361     // Source location for the ident struct
11362     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11363 
11364     llvm::Value *OffloadingArgs[] = {RTLoc,
11365                                      DeviceID,
11366                                      PointerNum,
11367                                      InputInfo.BasePointersArray.getPointer(),
11368                                      InputInfo.PointersArray.getPointer(),
11369                                      InputInfo.SizesArray.getPointer(),
11370                                      MapTypesArray,
11371                                      MapNamesArray,
11372                                      InputInfo.MappersArray.getPointer()};
11373 
11374     // Select the right runtime function call for each standalone
11375     // directive.
11376     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11377     RuntimeFunction RTLFn;
11378     switch (D.getDirectiveKind()) {
11379     case OMPD_target_enter_data:
11380       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11381                         : OMPRTL___tgt_target_data_begin_mapper;
11382       break;
11383     case OMPD_target_exit_data:
11384       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11385                         : OMPRTL___tgt_target_data_end_mapper;
11386       break;
11387     case OMPD_target_update:
11388       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11389                         : OMPRTL___tgt_target_data_update_mapper;
11390       break;
11391     case OMPD_parallel:
11392     case OMPD_for:
11393     case OMPD_parallel_for:
11394     case OMPD_parallel_master:
11395     case OMPD_parallel_sections:
11396     case OMPD_for_simd:
11397     case OMPD_parallel_for_simd:
11398     case OMPD_cancel:
11399     case OMPD_cancellation_point:
11400     case OMPD_ordered:
11401     case OMPD_threadprivate:
11402     case OMPD_allocate:
11403     case OMPD_task:
11404     case OMPD_simd:
11405     case OMPD_tile:
11406     case OMPD_unroll:
11407     case OMPD_sections:
11408     case OMPD_section:
11409     case OMPD_single:
11410     case OMPD_master:
11411     case OMPD_critical:
11412     case OMPD_taskyield:
11413     case OMPD_barrier:
11414     case OMPD_taskwait:
11415     case OMPD_taskgroup:
11416     case OMPD_atomic:
11417     case OMPD_flush:
11418     case OMPD_depobj:
11419     case OMPD_scan:
11420     case OMPD_teams:
11421     case OMPD_target_data:
11422     case OMPD_distribute:
11423     case OMPD_distribute_simd:
11424     case OMPD_distribute_parallel_for:
11425     case OMPD_distribute_parallel_for_simd:
11426     case OMPD_teams_distribute:
11427     case OMPD_teams_distribute_simd:
11428     case OMPD_teams_distribute_parallel_for:
11429     case OMPD_teams_distribute_parallel_for_simd:
11430     case OMPD_declare_simd:
11431     case OMPD_declare_variant:
11432     case OMPD_begin_declare_variant:
11433     case OMPD_end_declare_variant:
11434     case OMPD_declare_target:
11435     case OMPD_end_declare_target:
11436     case OMPD_declare_reduction:
11437     case OMPD_declare_mapper:
11438     case OMPD_taskloop:
11439     case OMPD_taskloop_simd:
11440     case OMPD_master_taskloop:
11441     case OMPD_master_taskloop_simd:
11442     case OMPD_parallel_master_taskloop:
11443     case OMPD_parallel_master_taskloop_simd:
11444     case OMPD_target:
11445     case OMPD_target_simd:
11446     case OMPD_target_teams_distribute:
11447     case OMPD_target_teams_distribute_simd:
11448     case OMPD_target_teams_distribute_parallel_for:
11449     case OMPD_target_teams_distribute_parallel_for_simd:
11450     case OMPD_target_teams:
11451     case OMPD_target_parallel:
11452     case OMPD_target_parallel_for:
11453     case OMPD_target_parallel_for_simd:
11454     case OMPD_requires:
11455     case OMPD_metadirective:
11456     case OMPD_unknown:
11457     default:
11458       llvm_unreachable("Unexpected standalone target data directive.");
11459       break;
11460     }
11461     CGF.EmitRuntimeCall(
11462         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11463         OffloadingArgs);
11464   };
11465 
11466   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11467                           &MapNamesArray](CodeGenFunction &CGF,
11468                                           PrePostActionTy &) {
11469     // Fill up the arrays with all the mapped variables.
11470     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11471 
11472     // Get map clause information.
11473     MappableExprsHandler MEHandler(D, CGF);
11474     MEHandler.generateAllInfo(CombinedInfo);
11475 
11476     TargetDataInfo Info;
11477     // Fill up the arrays and create the arguments.
11478     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11479                          /*IsNonContiguous=*/true);
11480     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11481                              D.hasClausesOfKind<OMPNowaitClause>();
11482     emitOffloadingArraysArgument(
11483         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11484         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11485         {/*ForEndCall=*/false});
11486     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11487     InputInfo.BasePointersArray =
11488         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
11489     InputInfo.PointersArray =
11490         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
11491     InputInfo.SizesArray =
11492         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
11493     InputInfo.MappersArray =
11494         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
11495     MapTypesArray = Info.MapTypesArray;
11496     MapNamesArray = Info.MapNamesArray;
11497     if (RequiresOuterTask)
11498       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11499     else
11500       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11501   };
11502 
11503   if (IfCond) {
11504     emitIfClause(CGF, IfCond, TargetThenGen,
11505                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11506   } else {
11507     RegionCodeGenTy ThenRCG(TargetThenGen);
11508     ThenRCG(CGF);
11509   }
11510 }
11511 
11512 namespace {
11513   /// Kind of parameter in a function with 'declare simd' directive.
11514   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11515   /// Attribute set of the parameter.
11516   struct ParamAttrTy {
11517     ParamKindTy Kind = Vector;
11518     llvm::APSInt StrideOrArg;
11519     llvm::APSInt Alignment;
11520   };
11521 } // namespace
11522 
11523 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11524                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11525   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11526   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11527   // of that clause. The VLEN value must be power of 2.
11528   // In other case the notion of the function`s "characteristic data type" (CDT)
11529   // is used to compute the vector length.
11530   // CDT is defined in the following order:
11531   //   a) For non-void function, the CDT is the return type.
11532   //   b) If the function has any non-uniform, non-linear parameters, then the
11533   //   CDT is the type of the first such parameter.
11534   //   c) If the CDT determined by a) or b) above is struct, union, or class
11535   //   type which is pass-by-value (except for the type that maps to the
11536   //   built-in complex data type), the characteristic data type is int.
11537   //   d) If none of the above three cases is applicable, the CDT is int.
11538   // The VLEN is then determined based on the CDT and the size of vector
11539   // register of that ISA for which current vector version is generated. The
11540   // VLEN is computed using the formula below:
11541   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11542   // where vector register size specified in section 3.2.1 Registers and the
11543   // Stack Frame of original AMD64 ABI document.
11544   QualType RetType = FD->getReturnType();
11545   if (RetType.isNull())
11546     return 0;
11547   ASTContext &C = FD->getASTContext();
11548   QualType CDT;
11549   if (!RetType.isNull() && !RetType->isVoidType()) {
11550     CDT = RetType;
11551   } else {
11552     unsigned Offset = 0;
11553     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11554       if (ParamAttrs[Offset].Kind == Vector)
11555         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11556       ++Offset;
11557     }
11558     if (CDT.isNull()) {
11559       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11560         if (ParamAttrs[I + Offset].Kind == Vector) {
11561           CDT = FD->getParamDecl(I)->getType();
11562           break;
11563         }
11564       }
11565     }
11566   }
11567   if (CDT.isNull())
11568     CDT = C.IntTy;
11569   CDT = CDT->getCanonicalTypeUnqualified();
11570   if (CDT->isRecordType() || CDT->isUnionType())
11571     CDT = C.IntTy;
11572   return C.getTypeSize(CDT);
11573 }
11574 
11575 static void
11576 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11577                            const llvm::APSInt &VLENVal,
11578                            ArrayRef<ParamAttrTy> ParamAttrs,
11579                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11580   struct ISADataTy {
11581     char ISA;
11582     unsigned VecRegSize;
11583   };
11584   ISADataTy ISAData[] = {
11585       {
11586           'b', 128
11587       }, // SSE
11588       {
11589           'c', 256
11590       }, // AVX
11591       {
11592           'd', 256
11593       }, // AVX2
11594       {
11595           'e', 512
11596       }, // AVX512
11597   };
11598   llvm::SmallVector<char, 2> Masked;
11599   switch (State) {
11600   case OMPDeclareSimdDeclAttr::BS_Undefined:
11601     Masked.push_back('N');
11602     Masked.push_back('M');
11603     break;
11604   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11605     Masked.push_back('N');
11606     break;
11607   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11608     Masked.push_back('M');
11609     break;
11610   }
11611   for (char Mask : Masked) {
11612     for (const ISADataTy &Data : ISAData) {
11613       SmallString<256> Buffer;
11614       llvm::raw_svector_ostream Out(Buffer);
11615       Out << "_ZGV" << Data.ISA << Mask;
11616       if (!VLENVal) {
11617         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11618         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11619         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11620       } else {
11621         Out << VLENVal;
11622       }
11623       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11624         switch (ParamAttr.Kind){
11625         case LinearWithVarStride:
11626           Out << 's' << ParamAttr.StrideOrArg;
11627           break;
11628         case Linear:
11629           Out << 'l';
11630           if (ParamAttr.StrideOrArg != 1)
11631             Out << ParamAttr.StrideOrArg;
11632           break;
11633         case Uniform:
11634           Out << 'u';
11635           break;
11636         case Vector:
11637           Out << 'v';
11638           break;
11639         }
11640         if (!!ParamAttr.Alignment)
11641           Out << 'a' << ParamAttr.Alignment;
11642       }
11643       Out << '_' << Fn->getName();
11644       Fn->addFnAttr(Out.str());
11645     }
11646   }
11647 }
11648 
11649 // This are the Functions that are needed to mangle the name of the
11650 // vector functions generated by the compiler, according to the rules
11651 // defined in the "Vector Function ABI specifications for AArch64",
11652 // available at
11653 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11654 
11655 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11656 ///
11657 /// TODO: Need to implement the behavior for reference marked with a
11658 /// var or no linear modifiers (1.b in the section). For this, we
11659 /// need to extend ParamKindTy to support the linear modifiers.
11660 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11661   QT = QT.getCanonicalType();
11662 
11663   if (QT->isVoidType())
11664     return false;
11665 
11666   if (Kind == ParamKindTy::Uniform)
11667     return false;
11668 
11669   if (Kind == ParamKindTy::Linear)
11670     return false;
11671 
11672   // TODO: Handle linear references with modifiers
11673 
11674   if (Kind == ParamKindTy::LinearWithVarStride)
11675     return false;
11676 
11677   return true;
11678 }
11679 
11680 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11681 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11682   QT = QT.getCanonicalType();
11683   unsigned Size = C.getTypeSize(QT);
11684 
11685   // Only scalars and complex within 16 bytes wide set PVB to true.
11686   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11687     return false;
11688 
11689   if (QT->isFloatingType())
11690     return true;
11691 
11692   if (QT->isIntegerType())
11693     return true;
11694 
11695   if (QT->isPointerType())
11696     return true;
11697 
11698   // TODO: Add support for complex types (section 3.1.2, item 2).
11699 
11700   return false;
11701 }
11702 
11703 /// Computes the lane size (LS) of a return type or of an input parameter,
11704 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11705 /// TODO: Add support for references, section 3.2.1, item 1.
11706 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11707   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11708     QualType PTy = QT.getCanonicalType()->getPointeeType();
11709     if (getAArch64PBV(PTy, C))
11710       return C.getTypeSize(PTy);
11711   }
11712   if (getAArch64PBV(QT, C))
11713     return C.getTypeSize(QT);
11714 
11715   return C.getTypeSize(C.getUIntPtrType());
11716 }
11717 
11718 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11719 // signature of the scalar function, as defined in 3.2.2 of the
11720 // AAVFABI.
11721 static std::tuple<unsigned, unsigned, bool>
11722 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11723   QualType RetType = FD->getReturnType().getCanonicalType();
11724 
11725   ASTContext &C = FD->getASTContext();
11726 
11727   bool OutputBecomesInput = false;
11728 
11729   llvm::SmallVector<unsigned, 8> Sizes;
11730   if (!RetType->isVoidType()) {
11731     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11732     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11733       OutputBecomesInput = true;
11734   }
11735   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11736     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11737     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11738   }
11739 
11740   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11741   // The LS of a function parameter / return value can only be a power
11742   // of 2, starting from 8 bits, up to 128.
11743   assert(llvm::all_of(Sizes,
11744                       [](unsigned Size) {
11745                         return Size == 8 || Size == 16 || Size == 32 ||
11746                                Size == 64 || Size == 128;
11747                       }) &&
11748          "Invalid size");
11749 
11750   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11751                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11752                          OutputBecomesInput);
11753 }
11754 
11755 /// Mangle the parameter part of the vector function name according to
11756 /// their OpenMP classification. The mangling function is defined in
11757 /// section 3.5 of the AAVFABI.
11758 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11759   SmallString<256> Buffer;
11760   llvm::raw_svector_ostream Out(Buffer);
11761   for (const auto &ParamAttr : ParamAttrs) {
11762     switch (ParamAttr.Kind) {
11763     case LinearWithVarStride:
11764       Out << "ls" << ParamAttr.StrideOrArg;
11765       break;
11766     case Linear:
11767       Out << 'l';
11768       // Don't print the step value if it is not present or if it is
11769       // equal to 1.
11770       if (ParamAttr.StrideOrArg != 1)
11771         Out << ParamAttr.StrideOrArg;
11772       break;
11773     case Uniform:
11774       Out << 'u';
11775       break;
11776     case Vector:
11777       Out << 'v';
11778       break;
11779     }
11780 
11781     if (!!ParamAttr.Alignment)
11782       Out << 'a' << ParamAttr.Alignment;
11783   }
11784 
11785   return std::string(Out.str());
11786 }
11787 
11788 // Function used to add the attribute. The parameter `VLEN` is
11789 // templated to allow the use of "x" when targeting scalable functions
11790 // for SVE.
11791 template <typename T>
11792 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11793                                  char ISA, StringRef ParSeq,
11794                                  StringRef MangledName, bool OutputBecomesInput,
11795                                  llvm::Function *Fn) {
11796   SmallString<256> Buffer;
11797   llvm::raw_svector_ostream Out(Buffer);
11798   Out << Prefix << ISA << LMask << VLEN;
11799   if (OutputBecomesInput)
11800     Out << "v";
11801   Out << ParSeq << "_" << MangledName;
11802   Fn->addFnAttr(Out.str());
11803 }
11804 
11805 // Helper function to generate the Advanced SIMD names depending on
11806 // the value of the NDS when simdlen is not present.
11807 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11808                                       StringRef Prefix, char ISA,
11809                                       StringRef ParSeq, StringRef MangledName,
11810                                       bool OutputBecomesInput,
11811                                       llvm::Function *Fn) {
11812   switch (NDS) {
11813   case 8:
11814     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11815                          OutputBecomesInput, Fn);
11816     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11817                          OutputBecomesInput, Fn);
11818     break;
11819   case 16:
11820     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11821                          OutputBecomesInput, Fn);
11822     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11823                          OutputBecomesInput, Fn);
11824     break;
11825   case 32:
11826     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11827                          OutputBecomesInput, Fn);
11828     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11829                          OutputBecomesInput, Fn);
11830     break;
11831   case 64:
11832   case 128:
11833     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11834                          OutputBecomesInput, Fn);
11835     break;
11836   default:
11837     llvm_unreachable("Scalar type is too wide.");
11838   }
11839 }
11840 
11841 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11842 static void emitAArch64DeclareSimdFunction(
11843     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11844     ArrayRef<ParamAttrTy> ParamAttrs,
11845     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11846     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11847 
11848   // Get basic data for building the vector signature.
11849   const auto Data = getNDSWDS(FD, ParamAttrs);
11850   const unsigned NDS = std::get<0>(Data);
11851   const unsigned WDS = std::get<1>(Data);
11852   const bool OutputBecomesInput = std::get<2>(Data);
11853 
11854   // Check the values provided via `simdlen` by the user.
11855   // 1. A `simdlen(1)` doesn't produce vector signatures,
11856   if (UserVLEN == 1) {
11857     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11858         DiagnosticsEngine::Warning,
11859         "The clause simdlen(1) has no effect when targeting aarch64.");
11860     CGM.getDiags().Report(SLoc, DiagID);
11861     return;
11862   }
11863 
11864   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11865   // Advanced SIMD output.
11866   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11867     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11868         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11869                                     "power of 2 when targeting Advanced SIMD.");
11870     CGM.getDiags().Report(SLoc, DiagID);
11871     return;
11872   }
11873 
11874   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11875   // limits.
11876   if (ISA == 's' && UserVLEN != 0) {
11877     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11878       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11879           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11880                                       "lanes in the architectural constraints "
11881                                       "for SVE (min is 128-bit, max is "
11882                                       "2048-bit, by steps of 128-bit)");
11883       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11884       return;
11885     }
11886   }
11887 
11888   // Sort out parameter sequence.
11889   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11890   StringRef Prefix = "_ZGV";
11891   // Generate simdlen from user input (if any).
11892   if (UserVLEN) {
11893     if (ISA == 's') {
11894       // SVE generates only a masked function.
11895       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11896                            OutputBecomesInput, Fn);
11897     } else {
11898       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11899       // Advanced SIMD generates one or two functions, depending on
11900       // the `[not]inbranch` clause.
11901       switch (State) {
11902       case OMPDeclareSimdDeclAttr::BS_Undefined:
11903         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11904                              OutputBecomesInput, Fn);
11905         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11906                              OutputBecomesInput, Fn);
11907         break;
11908       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11909         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11910                              OutputBecomesInput, Fn);
11911         break;
11912       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11913         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11914                              OutputBecomesInput, Fn);
11915         break;
11916       }
11917     }
11918   } else {
11919     // If no user simdlen is provided, follow the AAVFABI rules for
11920     // generating the vector length.
11921     if (ISA == 's') {
11922       // SVE, section 3.4.1, item 1.
11923       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11924                            OutputBecomesInput, Fn);
11925     } else {
11926       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11927       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11928       // two vector names depending on the use of the clause
11929       // `[not]inbranch`.
11930       switch (State) {
11931       case OMPDeclareSimdDeclAttr::BS_Undefined:
11932         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11933                                   OutputBecomesInput, Fn);
11934         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11935                                   OutputBecomesInput, Fn);
11936         break;
11937       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11938         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11939                                   OutputBecomesInput, Fn);
11940         break;
11941       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11942         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11943                                   OutputBecomesInput, Fn);
11944         break;
11945       }
11946     }
11947   }
11948 }
11949 
11950 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11951                                               llvm::Function *Fn) {
11952   ASTContext &C = CGM.getContext();
11953   FD = FD->getMostRecentDecl();
11954   // Map params to their positions in function decl.
11955   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11956   if (isa<CXXMethodDecl>(FD))
11957     ParamPositions.try_emplace(FD, 0);
11958   unsigned ParamPos = ParamPositions.size();
11959   for (const ParmVarDecl *P : FD->parameters()) {
11960     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11961     ++ParamPos;
11962   }
11963   while (FD) {
11964     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11965       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11966       // Mark uniform parameters.
11967       for (const Expr *E : Attr->uniforms()) {
11968         E = E->IgnoreParenImpCasts();
11969         unsigned Pos;
11970         if (isa<CXXThisExpr>(E)) {
11971           Pos = ParamPositions[FD];
11972         } else {
11973           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11974                                 ->getCanonicalDecl();
11975           Pos = ParamPositions[PVD];
11976         }
11977         ParamAttrs[Pos].Kind = Uniform;
11978       }
11979       // Get alignment info.
11980       auto NI = Attr->alignments_begin();
11981       for (const Expr *E : Attr->aligneds()) {
11982         E = E->IgnoreParenImpCasts();
11983         unsigned Pos;
11984         QualType ParmTy;
11985         if (isa<CXXThisExpr>(E)) {
11986           Pos = ParamPositions[FD];
11987           ParmTy = E->getType();
11988         } else {
11989           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11990                                 ->getCanonicalDecl();
11991           Pos = ParamPositions[PVD];
11992           ParmTy = PVD->getType();
11993         }
11994         ParamAttrs[Pos].Alignment =
11995             (*NI)
11996                 ? (*NI)->EvaluateKnownConstInt(C)
11997                 : llvm::APSInt::getUnsigned(
11998                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11999                           .getQuantity());
12000         ++NI;
12001       }
12002       // Mark linear parameters.
12003       auto SI = Attr->steps_begin();
12004       auto MI = Attr->modifiers_begin();
12005       for (const Expr *E : Attr->linears()) {
12006         E = E->IgnoreParenImpCasts();
12007         unsigned Pos;
12008         // Rescaling factor needed to compute the linear parameter
12009         // value in the mangled name.
12010         unsigned PtrRescalingFactor = 1;
12011         if (isa<CXXThisExpr>(E)) {
12012           Pos = ParamPositions[FD];
12013         } else {
12014           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12015                                 ->getCanonicalDecl();
12016           Pos = ParamPositions[PVD];
12017           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12018             PtrRescalingFactor = CGM.getContext()
12019                                      .getTypeSizeInChars(P->getPointeeType())
12020                                      .getQuantity();
12021         }
12022         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12023         ParamAttr.Kind = Linear;
12024         // Assuming a stride of 1, for `linear` without modifiers.
12025         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12026         if (*SI) {
12027           Expr::EvalResult Result;
12028           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12029             if (const auto *DRE =
12030                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12031               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12032                 ParamAttr.Kind = LinearWithVarStride;
12033                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12034                     ParamPositions[StridePVD->getCanonicalDecl()]);
12035               }
12036             }
12037           } else {
12038             ParamAttr.StrideOrArg = Result.Val.getInt();
12039           }
12040         }
12041         // If we are using a linear clause on a pointer, we need to
12042         // rescale the value of linear_step with the byte size of the
12043         // pointee type.
12044         if (Linear == ParamAttr.Kind)
12045           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12046         ++SI;
12047         ++MI;
12048       }
12049       llvm::APSInt VLENVal;
12050       SourceLocation ExprLoc;
12051       const Expr *VLENExpr = Attr->getSimdlen();
12052       if (VLENExpr) {
12053         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12054         ExprLoc = VLENExpr->getExprLoc();
12055       }
12056       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12057       if (CGM.getTriple().isX86()) {
12058         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12059       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12060         unsigned VLEN = VLENVal.getExtValue();
12061         StringRef MangledName = Fn->getName();
12062         if (CGM.getTarget().hasFeature("sve"))
12063           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12064                                          MangledName, 's', 128, Fn, ExprLoc);
12065         if (CGM.getTarget().hasFeature("neon"))
12066           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12067                                          MangledName, 'n', 128, Fn, ExprLoc);
12068       }
12069     }
12070     FD = FD->getPreviousDecl();
12071   }
12072 }
12073 
12074 namespace {
12075 /// Cleanup action for doacross support.
12076 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12077 public:
12078   static const int DoacrossFinArgs = 2;
12079 
12080 private:
12081   llvm::FunctionCallee RTLFn;
12082   llvm::Value *Args[DoacrossFinArgs];
12083 
12084 public:
12085   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12086                     ArrayRef<llvm::Value *> CallArgs)
12087       : RTLFn(RTLFn) {
12088     assert(CallArgs.size() == DoacrossFinArgs);
12089     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12090   }
12091   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12092     if (!CGF.HaveInsertPoint())
12093       return;
12094     CGF.EmitRuntimeCall(RTLFn, Args);
12095   }
12096 };
12097 } // namespace
12098 
12099 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12100                                        const OMPLoopDirective &D,
12101                                        ArrayRef<Expr *> NumIterations) {
12102   if (!CGF.HaveInsertPoint())
12103     return;
12104 
12105   ASTContext &C = CGM.getContext();
12106   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12107   RecordDecl *RD;
12108   if (KmpDimTy.isNull()) {
12109     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12110     //  kmp_int64 lo; // lower
12111     //  kmp_int64 up; // upper
12112     //  kmp_int64 st; // stride
12113     // };
12114     RD = C.buildImplicitRecord("kmp_dim");
12115     RD->startDefinition();
12116     addFieldToRecordDecl(C, RD, Int64Ty);
12117     addFieldToRecordDecl(C, RD, Int64Ty);
12118     addFieldToRecordDecl(C, RD, Int64Ty);
12119     RD->completeDefinition();
12120     KmpDimTy = C.getRecordType(RD);
12121   } else {
12122     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12123   }
12124   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12125   QualType ArrayTy =
12126       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12127 
12128   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12129   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12130   enum { LowerFD = 0, UpperFD, StrideFD };
12131   // Fill dims with data.
12132   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12133     LValue DimsLVal = CGF.MakeAddrLValue(
12134         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12135     // dims.upper = num_iterations;
12136     LValue UpperLVal = CGF.EmitLValueForField(
12137         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12138     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12139         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12140         Int64Ty, NumIterations[I]->getExprLoc());
12141     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12142     // dims.stride = 1;
12143     LValue StrideLVal = CGF.EmitLValueForField(
12144         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12145     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12146                           StrideLVal);
12147   }
12148 
12149   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12150   // kmp_int32 num_dims, struct kmp_dim * dims);
12151   llvm::Value *Args[] = {
12152       emitUpdateLocation(CGF, D.getBeginLoc()),
12153       getThreadID(CGF, D.getBeginLoc()),
12154       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12155       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12156           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12157           CGM.VoidPtrTy)};
12158 
12159   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12160       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12161   CGF.EmitRuntimeCall(RTLFn, Args);
12162   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12163       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12164   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12165       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12166   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12167                                              llvm::makeArrayRef(FiniArgs));
12168 }
12169 
12170 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12171                                           const OMPDependClause *C) {
12172   QualType Int64Ty =
12173       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12174   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12175   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12176       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12177   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12178   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12179     const Expr *CounterVal = C->getLoopData(I);
12180     assert(CounterVal);
12181     llvm::Value *CntVal = CGF.EmitScalarConversion(
12182         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12183         CounterVal->getExprLoc());
12184     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12185                           /*Volatile=*/false, Int64Ty);
12186   }
12187   llvm::Value *Args[] = {
12188       emitUpdateLocation(CGF, C->getBeginLoc()),
12189       getThreadID(CGF, C->getBeginLoc()),
12190       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12191   llvm::FunctionCallee RTLFn;
12192   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12193     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12194                                                   OMPRTL___kmpc_doacross_post);
12195   } else {
12196     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12197     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12198                                                   OMPRTL___kmpc_doacross_wait);
12199   }
12200   CGF.EmitRuntimeCall(RTLFn, Args);
12201 }
12202 
12203 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12204                                llvm::FunctionCallee Callee,
12205                                ArrayRef<llvm::Value *> Args) const {
12206   assert(Loc.isValid() && "Outlined function call location must be valid.");
12207   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12208 
12209   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12210     if (Fn->doesNotThrow()) {
12211       CGF.EmitNounwindRuntimeCall(Fn, Args);
12212       return;
12213     }
12214   }
12215   CGF.EmitRuntimeCall(Callee, Args);
12216 }
12217 
12218 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12219     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12220     ArrayRef<llvm::Value *> Args) const {
12221   emitCall(CGF, Loc, OutlinedFn, Args);
12222 }
12223 
12224 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12225   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12226     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12227       HasEmittedDeclareTargetRegion = true;
12228 }
12229 
12230 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12231                                              const VarDecl *NativeParam,
12232                                              const VarDecl *TargetParam) const {
12233   return CGF.GetAddrOfLocalVar(NativeParam);
12234 }
12235 
12236 /// Return allocator value from expression, or return a null allocator (default
12237 /// when no allocator specified).
12238 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12239                                     const Expr *Allocator) {
12240   llvm::Value *AllocVal;
12241   if (Allocator) {
12242     AllocVal = CGF.EmitScalarExpr(Allocator);
12243     // According to the standard, the original allocator type is a enum
12244     // (integer). Convert to pointer type, if required.
12245     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12246                                         CGF.getContext().VoidPtrTy,
12247                                         Allocator->getExprLoc());
12248   } else {
12249     // If no allocator specified, it defaults to the null allocator.
12250     AllocVal = llvm::Constant::getNullValue(
12251         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12252   }
12253   return AllocVal;
12254 }
12255 
12256 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12257                                                    const VarDecl *VD) {
12258   if (!VD)
12259     return Address::invalid();
12260   Address UntiedAddr = Address::invalid();
12261   Address UntiedRealAddr = Address::invalid();
12262   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12263   if (It != FunctionToUntiedTaskStackMap.end()) {
12264     const UntiedLocalVarsAddressesMap &UntiedData =
12265         UntiedLocalVarsStack[It->second];
12266     auto I = UntiedData.find(VD);
12267     if (I != UntiedData.end()) {
12268       UntiedAddr = I->second.first;
12269       UntiedRealAddr = I->second.second;
12270     }
12271   }
12272   const VarDecl *CVD = VD->getCanonicalDecl();
12273   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12274     // Use the default allocation.
12275     if (!isAllocatableDecl(VD))
12276       return UntiedAddr;
12277     llvm::Value *Size;
12278     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12279     if (CVD->getType()->isVariablyModifiedType()) {
12280       Size = CGF.getTypeSize(CVD->getType());
12281       // Align the size: ((size + align - 1) / align) * align
12282       Size = CGF.Builder.CreateNUWAdd(
12283           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12284       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12285       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12286     } else {
12287       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12288       Size = CGM.getSize(Sz.alignTo(Align));
12289     }
12290     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12291     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12292     const Expr *Allocator = AA->getAllocator();
12293     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12294     llvm::Value *Alignment =
12295         AA->getAlignment()
12296             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12297                                         CGM.SizeTy, /*isSigned=*/false)
12298             : nullptr;
12299     SmallVector<llvm::Value *, 4> Args;
12300     Args.push_back(ThreadID);
12301     if (Alignment)
12302       Args.push_back(Alignment);
12303     Args.push_back(Size);
12304     Args.push_back(AllocVal);
12305     llvm::omp::RuntimeFunction FnID =
12306         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12307     llvm::Value *Addr = CGF.EmitRuntimeCall(
12308         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12309         getName({CVD->getName(), ".void.addr"}));
12310     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12311         CGM.getModule(), OMPRTL___kmpc_free);
12312     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12313     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12314         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12315     if (UntiedAddr.isValid())
12316       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12317 
12318     // Cleanup action for allocate support.
12319     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12320       llvm::FunctionCallee RTLFn;
12321       SourceLocation::UIntTy LocEncoding;
12322       Address Addr;
12323       const Expr *AllocExpr;
12324 
12325     public:
12326       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12327                            SourceLocation::UIntTy LocEncoding, Address Addr,
12328                            const Expr *AllocExpr)
12329           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12330             AllocExpr(AllocExpr) {}
12331       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12332         if (!CGF.HaveInsertPoint())
12333           return;
12334         llvm::Value *Args[3];
12335         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12336             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12337         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12338             Addr.getPointer(), CGF.VoidPtrTy);
12339         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12340         Args[2] = AllocVal;
12341         CGF.EmitRuntimeCall(RTLFn, Args);
12342       }
12343     };
12344     Address VDAddr = UntiedRealAddr.isValid()
12345                          ? UntiedRealAddr
12346                          : Address::deprecated(Addr, Align);
12347     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12348         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12349         VDAddr, Allocator);
12350     if (UntiedRealAddr.isValid())
12351       if (auto *Region =
12352               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12353         Region->emitUntiedSwitch(CGF);
12354     return VDAddr;
12355   }
12356   return UntiedAddr;
12357 }
12358 
12359 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12360                                              const VarDecl *VD) const {
12361   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12362   if (It == FunctionToUntiedTaskStackMap.end())
12363     return false;
12364   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12365 }
12366 
12367 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12368     CodeGenModule &CGM, const OMPLoopDirective &S)
12369     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12370   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12371   if (!NeedToPush)
12372     return;
12373   NontemporalDeclsSet &DS =
12374       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12375   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12376     for (const Stmt *Ref : C->private_refs()) {
12377       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12378       const ValueDecl *VD;
12379       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12380         VD = DRE->getDecl();
12381       } else {
12382         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12383         assert((ME->isImplicitCXXThis() ||
12384                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12385                "Expected member of current class.");
12386         VD = ME->getMemberDecl();
12387       }
12388       DS.insert(VD);
12389     }
12390   }
12391 }
12392 
12393 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12394   if (!NeedToPush)
12395     return;
12396   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12397 }
12398 
12399 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12400     CodeGenFunction &CGF,
12401     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12402                           std::pair<Address, Address>> &LocalVars)
12403     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12404   if (!NeedToPush)
12405     return;
12406   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12407       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12408   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12409 }
12410 
12411 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12412   if (!NeedToPush)
12413     return;
12414   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12415 }
12416 
12417 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12418   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12419 
12420   return llvm::any_of(
12421       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12422       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12423 }
12424 
12425 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12426     const OMPExecutableDirective &S,
12427     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12428     const {
12429   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12430   // Vars in target/task regions must be excluded completely.
12431   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12432       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12433     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12434     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12435     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12436     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12437       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12438         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12439     }
12440   }
12441   // Exclude vars in private clauses.
12442   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12443     for (const Expr *Ref : C->varlists()) {
12444       if (!Ref->getType()->isScalarType())
12445         continue;
12446       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12447       if (!DRE)
12448         continue;
12449       NeedToCheckForLPCs.insert(DRE->getDecl());
12450     }
12451   }
12452   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12453     for (const Expr *Ref : C->varlists()) {
12454       if (!Ref->getType()->isScalarType())
12455         continue;
12456       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12457       if (!DRE)
12458         continue;
12459       NeedToCheckForLPCs.insert(DRE->getDecl());
12460     }
12461   }
12462   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12463     for (const Expr *Ref : C->varlists()) {
12464       if (!Ref->getType()->isScalarType())
12465         continue;
12466       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12467       if (!DRE)
12468         continue;
12469       NeedToCheckForLPCs.insert(DRE->getDecl());
12470     }
12471   }
12472   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12473     for (const Expr *Ref : C->varlists()) {
12474       if (!Ref->getType()->isScalarType())
12475         continue;
12476       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12477       if (!DRE)
12478         continue;
12479       NeedToCheckForLPCs.insert(DRE->getDecl());
12480     }
12481   }
12482   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12483     for (const Expr *Ref : C->varlists()) {
12484       if (!Ref->getType()->isScalarType())
12485         continue;
12486       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12487       if (!DRE)
12488         continue;
12489       NeedToCheckForLPCs.insert(DRE->getDecl());
12490     }
12491   }
12492   for (const Decl *VD : NeedToCheckForLPCs) {
12493     for (const LastprivateConditionalData &Data :
12494          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12495       if (Data.DeclToUniqueName.count(VD) > 0) {
12496         if (!Data.Disabled)
12497           NeedToAddForLPCsAsDisabled.insert(VD);
12498         break;
12499       }
12500     }
12501   }
12502 }
12503 
12504 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12505     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12506     : CGM(CGF.CGM),
12507       Action((CGM.getLangOpts().OpenMP >= 50 &&
12508               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12509                            [](const OMPLastprivateClause *C) {
12510                              return C->getKind() ==
12511                                     OMPC_LASTPRIVATE_conditional;
12512                            }))
12513                  ? ActionToDo::PushAsLastprivateConditional
12514                  : ActionToDo::DoNotPush) {
12515   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12516   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12517     return;
12518   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12519          "Expected a push action.");
12520   LastprivateConditionalData &Data =
12521       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12522   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12523     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12524       continue;
12525 
12526     for (const Expr *Ref : C->varlists()) {
12527       Data.DeclToUniqueName.insert(std::make_pair(
12528           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12529           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12530     }
12531   }
12532   Data.IVLVal = IVLVal;
12533   Data.Fn = CGF.CurFn;
12534 }
12535 
12536 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12537     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12538     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12539   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12540   if (CGM.getLangOpts().OpenMP < 50)
12541     return;
12542   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12543   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12544   if (!NeedToAddForLPCsAsDisabled.empty()) {
12545     Action = ActionToDo::DisableLastprivateConditional;
12546     LastprivateConditionalData &Data =
12547         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12548     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12549       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12550     Data.Fn = CGF.CurFn;
12551     Data.Disabled = true;
12552   }
12553 }
12554 
12555 CGOpenMPRuntime::LastprivateConditionalRAII
12556 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12557     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12558   return LastprivateConditionalRAII(CGF, S);
12559 }
12560 
12561 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12562   if (CGM.getLangOpts().OpenMP < 50)
12563     return;
12564   if (Action == ActionToDo::DisableLastprivateConditional) {
12565     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12566            "Expected list of disabled private vars.");
12567     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12568   }
12569   if (Action == ActionToDo::PushAsLastprivateConditional) {
12570     assert(
12571         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12572         "Expected list of lastprivate conditional vars.");
12573     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12574   }
12575 }
12576 
12577 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12578                                                         const VarDecl *VD) {
12579   ASTContext &C = CGM.getContext();
12580   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12581   if (I == LastprivateConditionalToTypes.end())
12582     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12583   QualType NewType;
12584   const FieldDecl *VDField;
12585   const FieldDecl *FiredField;
12586   LValue BaseLVal;
12587   auto VI = I->getSecond().find(VD);
12588   if (VI == I->getSecond().end()) {
12589     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12590     RD->startDefinition();
12591     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12592     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12593     RD->completeDefinition();
12594     NewType = C.getRecordType(RD);
12595     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12596     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12597     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12598   } else {
12599     NewType = std::get<0>(VI->getSecond());
12600     VDField = std::get<1>(VI->getSecond());
12601     FiredField = std::get<2>(VI->getSecond());
12602     BaseLVal = std::get<3>(VI->getSecond());
12603   }
12604   LValue FiredLVal =
12605       CGF.EmitLValueForField(BaseLVal, FiredField);
12606   CGF.EmitStoreOfScalar(
12607       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12608       FiredLVal);
12609   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12610 }
12611 
12612 namespace {
12613 /// Checks if the lastprivate conditional variable is referenced in LHS.
12614 class LastprivateConditionalRefChecker final
12615     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12616   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12617   const Expr *FoundE = nullptr;
12618   const Decl *FoundD = nullptr;
12619   StringRef UniqueDeclName;
12620   LValue IVLVal;
12621   llvm::Function *FoundFn = nullptr;
12622   SourceLocation Loc;
12623 
12624 public:
12625   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12626     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12627          llvm::reverse(LPM)) {
12628       auto It = D.DeclToUniqueName.find(E->getDecl());
12629       if (It == D.DeclToUniqueName.end())
12630         continue;
12631       if (D.Disabled)
12632         return false;
12633       FoundE = E;
12634       FoundD = E->getDecl()->getCanonicalDecl();
12635       UniqueDeclName = It->second;
12636       IVLVal = D.IVLVal;
12637       FoundFn = D.Fn;
12638       break;
12639     }
12640     return FoundE == E;
12641   }
12642   bool VisitMemberExpr(const MemberExpr *E) {
12643     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12644       return false;
12645     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12646          llvm::reverse(LPM)) {
12647       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12648       if (It == D.DeclToUniqueName.end())
12649         continue;
12650       if (D.Disabled)
12651         return false;
12652       FoundE = E;
12653       FoundD = E->getMemberDecl()->getCanonicalDecl();
12654       UniqueDeclName = It->second;
12655       IVLVal = D.IVLVal;
12656       FoundFn = D.Fn;
12657       break;
12658     }
12659     return FoundE == E;
12660   }
12661   bool VisitStmt(const Stmt *S) {
12662     for (const Stmt *Child : S->children()) {
12663       if (!Child)
12664         continue;
12665       if (const auto *E = dyn_cast<Expr>(Child))
12666         if (!E->isGLValue())
12667           continue;
12668       if (Visit(Child))
12669         return true;
12670     }
12671     return false;
12672   }
12673   explicit LastprivateConditionalRefChecker(
12674       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12675       : LPM(LPM) {}
12676   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12677   getFoundData() const {
12678     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12679   }
12680 };
12681 } // namespace
12682 
12683 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12684                                                        LValue IVLVal,
12685                                                        StringRef UniqueDeclName,
12686                                                        LValue LVal,
12687                                                        SourceLocation Loc) {
12688   // Last updated loop counter for the lastprivate conditional var.
12689   // int<xx> last_iv = 0;
12690   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12691   llvm::Constant *LastIV =
12692       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12693   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12694       IVLVal.getAlignment().getAsAlign());
12695   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12696 
12697   // Last value of the lastprivate conditional.
12698   // decltype(priv_a) last_a;
12699   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12700       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12701   Last->setAlignment(LVal.getAlignment().getAsAlign());
12702   LValue LastLVal = CGF.MakeAddrLValue(
12703       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12704 
12705   // Global loop counter. Required to handle inner parallel-for regions.
12706   // iv
12707   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12708 
12709   // #pragma omp critical(a)
12710   // if (last_iv <= iv) {
12711   //   last_iv = iv;
12712   //   last_a = priv_a;
12713   // }
12714   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12715                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12716     Action.Enter(CGF);
12717     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12718     // (last_iv <= iv) ? Check if the variable is updated and store new
12719     // value in global var.
12720     llvm::Value *CmpRes;
12721     if (IVLVal.getType()->isSignedIntegerType()) {
12722       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12723     } else {
12724       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12725              "Loop iteration variable must be integer.");
12726       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12727     }
12728     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12729     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12730     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12731     // {
12732     CGF.EmitBlock(ThenBB);
12733 
12734     //   last_iv = iv;
12735     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12736 
12737     //   last_a = priv_a;
12738     switch (CGF.getEvaluationKind(LVal.getType())) {
12739     case TEK_Scalar: {
12740       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12741       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12742       break;
12743     }
12744     case TEK_Complex: {
12745       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12746       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12747       break;
12748     }
12749     case TEK_Aggregate:
12750       llvm_unreachable(
12751           "Aggregates are not supported in lastprivate conditional.");
12752     }
12753     // }
12754     CGF.EmitBranch(ExitBB);
12755     // There is no need to emit line number for unconditional branch.
12756     (void)ApplyDebugLocation::CreateEmpty(CGF);
12757     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12758   };
12759 
12760   if (CGM.getLangOpts().OpenMPSimd) {
12761     // Do not emit as a critical region as no parallel region could be emitted.
12762     RegionCodeGenTy ThenRCG(CodeGen);
12763     ThenRCG(CGF);
12764   } else {
12765     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12766   }
12767 }
12768 
12769 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12770                                                          const Expr *LHS) {
12771   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12772     return;
12773   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12774   if (!Checker.Visit(LHS))
12775     return;
12776   const Expr *FoundE;
12777   const Decl *FoundD;
12778   StringRef UniqueDeclName;
12779   LValue IVLVal;
12780   llvm::Function *FoundFn;
12781   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12782       Checker.getFoundData();
12783   if (FoundFn != CGF.CurFn) {
12784     // Special codegen for inner parallel regions.
12785     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12786     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12787     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12788            "Lastprivate conditional is not found in outer region.");
12789     QualType StructTy = std::get<0>(It->getSecond());
12790     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12791     LValue PrivLVal = CGF.EmitLValue(FoundE);
12792     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12793         PrivLVal.getAddress(CGF),
12794         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12795     LValue BaseLVal =
12796         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12797     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12798     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12799                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12800                         FiredLVal, llvm::AtomicOrdering::Unordered,
12801                         /*IsVolatile=*/true, /*isInit=*/false);
12802     return;
12803   }
12804 
12805   // Private address of the lastprivate conditional in the current context.
12806   // priv_a
12807   LValue LVal = CGF.EmitLValue(FoundE);
12808   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12809                                    FoundE->getExprLoc());
12810 }
12811 
12812 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12813     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12814     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12815   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12816     return;
12817   auto Range = llvm::reverse(LastprivateConditionalStack);
12818   auto It = llvm::find_if(
12819       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12820   if (It == Range.end() || It->Fn != CGF.CurFn)
12821     return;
12822   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12823   assert(LPCI != LastprivateConditionalToTypes.end() &&
12824          "Lastprivates must be registered already.");
12825   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12826   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12827   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12828   for (const auto &Pair : It->DeclToUniqueName) {
12829     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12830     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12831       continue;
12832     auto I = LPCI->getSecond().find(Pair.first);
12833     assert(I != LPCI->getSecond().end() &&
12834            "Lastprivate must be rehistered already.");
12835     // bool Cmp = priv_a.Fired != 0;
12836     LValue BaseLVal = std::get<3>(I->getSecond());
12837     LValue FiredLVal =
12838         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12839     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12840     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12841     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12842     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12843     // if (Cmp) {
12844     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12845     CGF.EmitBlock(ThenBB);
12846     Address Addr = CGF.GetAddrOfLocalVar(VD);
12847     LValue LVal;
12848     if (VD->getType()->isReferenceType())
12849       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12850                                            AlignmentSource::Decl);
12851     else
12852       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12853                                 AlignmentSource::Decl);
12854     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12855                                      D.getBeginLoc());
12856     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12857     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12858     // }
12859   }
12860 }
12861 
12862 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12863     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12864     SourceLocation Loc) {
12865   if (CGF.getLangOpts().OpenMP < 50)
12866     return;
12867   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12868   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12869          "Unknown lastprivate conditional variable.");
12870   StringRef UniqueName = It->second;
12871   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12872   // The variable was not updated in the region - exit.
12873   if (!GV)
12874     return;
12875   LValue LPLVal = CGF.MakeAddrLValue(
12876       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12877       PrivLVal.getType().getNonReferenceType());
12878   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12879   CGF.EmitStoreOfScalar(Res, PrivLVal);
12880 }
12881 
12882 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12883     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12884     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12885   llvm_unreachable("Not supported in SIMD-only mode");
12886 }
12887 
12888 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12889     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12890     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12891   llvm_unreachable("Not supported in SIMD-only mode");
12892 }
12893 
12894 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12895     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12896     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12897     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12898     bool Tied, unsigned &NumberOfParts) {
12899   llvm_unreachable("Not supported in SIMD-only mode");
12900 }
12901 
12902 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12903                                            SourceLocation Loc,
12904                                            llvm::Function *OutlinedFn,
12905                                            ArrayRef<llvm::Value *> CapturedVars,
12906                                            const Expr *IfCond,
12907                                            llvm::Value *NumThreads) {
12908   llvm_unreachable("Not supported in SIMD-only mode");
12909 }
12910 
12911 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12912     CodeGenFunction &CGF, StringRef CriticalName,
12913     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12914     const Expr *Hint) {
12915   llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917 
12918 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12919                                            const RegionCodeGenTy &MasterOpGen,
12920                                            SourceLocation Loc) {
12921   llvm_unreachable("Not supported in SIMD-only mode");
12922 }
12923 
12924 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12925                                            const RegionCodeGenTy &MasterOpGen,
12926                                            SourceLocation Loc,
12927                                            const Expr *Filter) {
12928   llvm_unreachable("Not supported in SIMD-only mode");
12929 }
12930 
12931 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12932                                             SourceLocation Loc) {
12933   llvm_unreachable("Not supported in SIMD-only mode");
12934 }
12935 
12936 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12937     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12938     SourceLocation Loc) {
12939   llvm_unreachable("Not supported in SIMD-only mode");
12940 }
12941 
12942 void CGOpenMPSIMDRuntime::emitSingleRegion(
12943     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12944     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12945     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12946     ArrayRef<const Expr *> AssignmentOps) {
12947   llvm_unreachable("Not supported in SIMD-only mode");
12948 }
12949 
12950 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12951                                             const RegionCodeGenTy &OrderedOpGen,
12952                                             SourceLocation Loc,
12953                                             bool IsThreads) {
12954   llvm_unreachable("Not supported in SIMD-only mode");
12955 }
12956 
12957 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12958                                           SourceLocation Loc,
12959                                           OpenMPDirectiveKind Kind,
12960                                           bool EmitChecks,
12961                                           bool ForceSimpleCall) {
12962   llvm_unreachable("Not supported in SIMD-only mode");
12963 }
12964 
12965 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12966     CodeGenFunction &CGF, SourceLocation Loc,
12967     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12968     bool Ordered, const DispatchRTInput &DispatchValues) {
12969   llvm_unreachable("Not supported in SIMD-only mode");
12970 }
12971 
12972 void CGOpenMPSIMDRuntime::emitForStaticInit(
12973     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12974     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12975   llvm_unreachable("Not supported in SIMD-only mode");
12976 }
12977 
12978 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12979     CodeGenFunction &CGF, SourceLocation Loc,
12980     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12981   llvm_unreachable("Not supported in SIMD-only mode");
12982 }
12983 
12984 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12985                                                      SourceLocation Loc,
12986                                                      unsigned IVSize,
12987                                                      bool IVSigned) {
12988   llvm_unreachable("Not supported in SIMD-only mode");
12989 }
12990 
12991 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12992                                               SourceLocation Loc,
12993                                               OpenMPDirectiveKind DKind) {
12994   llvm_unreachable("Not supported in SIMD-only mode");
12995 }
12996 
12997 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12998                                               SourceLocation Loc,
12999                                               unsigned IVSize, bool IVSigned,
13000                                               Address IL, Address LB,
13001                                               Address UB, Address ST) {
13002   llvm_unreachable("Not supported in SIMD-only mode");
13003 }
13004 
13005 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
13006                                                llvm::Value *NumThreads,
13007                                                SourceLocation Loc) {
13008   llvm_unreachable("Not supported in SIMD-only mode");
13009 }
13010 
13011 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13012                                              ProcBindKind ProcBind,
13013                                              SourceLocation Loc) {
13014   llvm_unreachable("Not supported in SIMD-only mode");
13015 }
13016 
13017 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13018                                                     const VarDecl *VD,
13019                                                     Address VDAddr,
13020                                                     SourceLocation Loc) {
13021   llvm_unreachable("Not supported in SIMD-only mode");
13022 }
13023 
13024 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13025     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13026     CodeGenFunction *CGF) {
13027   llvm_unreachable("Not supported in SIMD-only mode");
13028 }
13029 
13030 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13031     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13032   llvm_unreachable("Not supported in SIMD-only mode");
13033 }
13034 
13035 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13036                                     ArrayRef<const Expr *> Vars,
13037                                     SourceLocation Loc,
13038                                     llvm::AtomicOrdering AO) {
13039   llvm_unreachable("Not supported in SIMD-only mode");
13040 }
13041 
13042 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13043                                        const OMPExecutableDirective &D,
13044                                        llvm::Function *TaskFunction,
13045                                        QualType SharedsTy, Address Shareds,
13046                                        const Expr *IfCond,
13047                                        const OMPTaskDataTy &Data) {
13048   llvm_unreachable("Not supported in SIMD-only mode");
13049 }
13050 
13051 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13052     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13053     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13054     const Expr *IfCond, const OMPTaskDataTy &Data) {
13055   llvm_unreachable("Not supported in SIMD-only mode");
13056 }
13057 
13058 void CGOpenMPSIMDRuntime::emitReduction(
13059     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13060     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13061     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13062   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13063   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13064                                  ReductionOps, Options);
13065 }
13066 
13067 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13068     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13069     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13070   llvm_unreachable("Not supported in SIMD-only mode");
13071 }
13072 
13073 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13074                                                 SourceLocation Loc,
13075                                                 bool IsWorksharingReduction) {
13076   llvm_unreachable("Not supported in SIMD-only mode");
13077 }
13078 
13079 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13080                                                   SourceLocation Loc,
13081                                                   ReductionCodeGen &RCG,
13082                                                   unsigned N) {
13083   llvm_unreachable("Not supported in SIMD-only mode");
13084 }
13085 
13086 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13087                                                   SourceLocation Loc,
13088                                                   llvm::Value *ReductionsPtr,
13089                                                   LValue SharedLVal) {
13090   llvm_unreachable("Not supported in SIMD-only mode");
13091 }
13092 
13093 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13094                                            SourceLocation Loc,
13095                                            const OMPTaskDataTy &Data) {
13096   llvm_unreachable("Not supported in SIMD-only mode");
13097 }
13098 
13099 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13100     CodeGenFunction &CGF, SourceLocation Loc,
13101     OpenMPDirectiveKind CancelRegion) {
13102   llvm_unreachable("Not supported in SIMD-only mode");
13103 }
13104 
13105 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13106                                          SourceLocation Loc, const Expr *IfCond,
13107                                          OpenMPDirectiveKind CancelRegion) {
13108   llvm_unreachable("Not supported in SIMD-only mode");
13109 }
13110 
13111 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13112     const OMPExecutableDirective &D, StringRef ParentName,
13113     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13114     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13115   llvm_unreachable("Not supported in SIMD-only mode");
13116 }
13117 
13118 void CGOpenMPSIMDRuntime::emitTargetCall(
13119     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13120     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13121     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13122     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13123                                      const OMPLoopDirective &D)>
13124         SizeEmitter) {
13125   llvm_unreachable("Not supported in SIMD-only mode");
13126 }
13127 
13128 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13129   llvm_unreachable("Not supported in SIMD-only mode");
13130 }
13131 
13132 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13133   llvm_unreachable("Not supported in SIMD-only mode");
13134 }
13135 
13136 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13137   return false;
13138 }
13139 
13140 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13141                                         const OMPExecutableDirective &D,
13142                                         SourceLocation Loc,
13143                                         llvm::Function *OutlinedFn,
13144                                         ArrayRef<llvm::Value *> CapturedVars) {
13145   llvm_unreachable("Not supported in SIMD-only mode");
13146 }
13147 
13148 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13149                                              const Expr *NumTeams,
13150                                              const Expr *ThreadLimit,
13151                                              SourceLocation Loc) {
13152   llvm_unreachable("Not supported in SIMD-only mode");
13153 }
13154 
13155 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13156     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13157     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13158   llvm_unreachable("Not supported in SIMD-only mode");
13159 }
13160 
13161 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13162     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13163     const Expr *Device) {
13164   llvm_unreachable("Not supported in SIMD-only mode");
13165 }
13166 
13167 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13168                                            const OMPLoopDirective &D,
13169                                            ArrayRef<Expr *> NumIterations) {
13170   llvm_unreachable("Not supported in SIMD-only mode");
13171 }
13172 
13173 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13174                                               const OMPDependClause *C) {
13175   llvm_unreachable("Not supported in SIMD-only mode");
13176 }
13177 
13178 const VarDecl *
13179 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13180                                         const VarDecl *NativeParam) const {
13181   llvm_unreachable("Not supported in SIMD-only mode");
13182 }
13183 
13184 Address
13185 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13186                                          const VarDecl *NativeParam,
13187                                          const VarDecl *TargetParam) const {
13188   llvm_unreachable("Not supported in SIMD-only mode");
13189 }
13190