1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/Format.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <numeric>
43 
44 using namespace clang;
45 using namespace CodeGen;
46 using namespace llvm::omp;
47 
48 namespace {
49 /// Base class for handling code generation inside OpenMP regions.
50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51 public:
52   /// Kinds of OpenMP regions used in codegen.
53   enum CGOpenMPRegionKind {
54     /// Region with outlined function for standalone 'parallel'
55     /// directive.
56     ParallelOutlinedRegion,
57     /// Region with outlined function for standalone 'task' directive.
58     TaskOutlinedRegion,
59     /// Region for constructs that do not require function outlining,
60     /// like 'for', 'sections', 'atomic' etc. directives.
61     InlinedRegion,
62     /// Region with outlined function for standalone 'target' directive.
63     TargetRegion,
64   };
65 
66   CGOpenMPRegionInfo(const CapturedStmt &CS,
67                      const CGOpenMPRegionKind RegionKind,
68                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69                      bool HasCancel)
70       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72 
73   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75                      bool HasCancel)
76       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77         Kind(Kind), HasCancel(HasCancel) {}
78 
79   /// Get a variable or parameter for storing global thread id
80   /// inside OpenMP construct.
81   virtual const VarDecl *getThreadIDVariable() const = 0;
82 
83   /// Emit the captured statement body.
84   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85 
86   /// Get an LValue for the current ThreadID variable.
87   /// \return LValue for thread id variable. This LValue always has type int32*.
88   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89 
90   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91 
92   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93 
94   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95 
96   bool hasCancel() const { return HasCancel; }
97 
98   static bool classof(const CGCapturedStmtInfo *Info) {
99     return Info->getKind() == CR_OpenMP;
100   }
101 
102   ~CGOpenMPRegionInfo() override = default;
103 
104 protected:
105   CGOpenMPRegionKind RegionKind;
106   RegionCodeGenTy CodeGen;
107   OpenMPDirectiveKind Kind;
108   bool HasCancel;
109 };
110 
111 /// API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115                              const RegionCodeGenTy &CodeGen,
116                              OpenMPDirectiveKind Kind, bool HasCancel,
117                              StringRef HelperName)
118       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119                            HasCancel),
120         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
122   }
123 
124   /// Get a variable or parameter for storing global thread id
125   /// inside OpenMP construct.
126   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127 
128   /// Get the name of the capture helper.
129   StringRef getHelperName() const override { return HelperName; }
130 
131   static bool classof(const CGCapturedStmtInfo *Info) {
132     return CGOpenMPRegionInfo::classof(Info) &&
133            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134                ParallelOutlinedRegion;
135   }
136 
137 private:
138   /// A variable or parameter storing global thread id for OpenMP
139   /// constructs.
140   const VarDecl *ThreadIDVar;
141   StringRef HelperName;
142 };
143 
144 /// API for captured statement code generation in OpenMP constructs.
145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146 public:
147   class UntiedTaskActionTy final : public PrePostActionTy {
148     bool Untied;
149     const VarDecl *PartIDVar;
150     const RegionCodeGenTy UntiedCodeGen;
151     llvm::SwitchInst *UntiedSwitch = nullptr;
152 
153   public:
154     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155                        const RegionCodeGenTy &UntiedCodeGen)
156         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157     void Enter(CodeGenFunction &CGF) override {
158       if (Untied) {
159         // Emit task switching point.
160         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         llvm::Value *Res =
164             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167         CGF.EmitBlock(DoneBB);
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171                               CGF.Builder.GetInsertBlock());
172         emitUntiedSwitch(CGF);
173       }
174     }
175     void emitUntiedSwitch(CodeGenFunction &CGF) const {
176       if (Untied) {
177         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178             CGF.GetAddrOfLocalVar(PartIDVar),
179             PartIDVar->getType()->castAs<PointerType>());
180         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181                               PartIdLVal);
182         UntiedCodeGen(CGF);
183         CodeGenFunction::JumpDest CurPoint =
184             CGF.getJumpDestInCurrentScope(".untied.next.");
185         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188                               CGF.Builder.GetInsertBlock());
189         CGF.EmitBranchThroughCleanup(CurPoint);
190         CGF.EmitBlock(CurPoint.getBlock());
191       }
192     }
193     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194   };
195   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196                                  const VarDecl *ThreadIDVar,
197                                  const RegionCodeGenTy &CodeGen,
198                                  OpenMPDirectiveKind Kind, bool HasCancel,
199                                  const UntiedTaskActionTy &Action)
200       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201         ThreadIDVar(ThreadIDVar), Action(Action) {
202     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
203   }
204 
205   /// Get a variable or parameter for storing global thread id
206   /// inside OpenMP construct.
207   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208 
209   /// Get an LValue for the current ThreadID variable.
210   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211 
212   /// Get the name of the capture helper.
213   StringRef getHelperName() const override { return ".omp_outlined."; }
214 
215   void emitUntiedSwitch(CodeGenFunction &CGF) override {
216     Action.emitUntiedSwitch(CGF);
217   }
218 
219   static bool classof(const CGCapturedStmtInfo *Info) {
220     return CGOpenMPRegionInfo::classof(Info) &&
221            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222                TaskOutlinedRegion;
223   }
224 
225 private:
226   /// A variable or parameter storing global thread id for OpenMP
227   /// constructs.
228   const VarDecl *ThreadIDVar;
229   /// Action for emitting code for untied tasks.
230   const UntiedTaskActionTy &Action;
231 };
232 
233 /// API for inlined captured statement code generation in OpenMP
234 /// constructs.
235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236 public:
237   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238                             const RegionCodeGenTy &CodeGen,
239                             OpenMPDirectiveKind Kind, bool HasCancel)
240       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241         OldCSI(OldCSI),
242         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243 
244   // Retrieve the value of the context parameter.
245   llvm::Value *getContextValue() const override {
246     if (OuterRegionInfo)
247       return OuterRegionInfo->getContextValue();
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   void setContextValue(llvm::Value *V) override {
252     if (OuterRegionInfo) {
253       OuterRegionInfo->setContextValue(V);
254       return;
255     }
256     llvm_unreachable("No context value for inlined OpenMP region");
257   }
258 
259   /// Lookup the captured field decl for a variable.
260   const FieldDecl *lookup(const VarDecl *VD) const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->lookup(VD);
263     // If there is no outer outlined region,no need to lookup in a list of
264     // captured variables, we can use the original one.
265     return nullptr;
266   }
267 
268   FieldDecl *getThisFieldDecl() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThisFieldDecl();
271     return nullptr;
272   }
273 
274   /// Get a variable or parameter for storing global thread id
275   /// inside OpenMP construct.
276   const VarDecl *getThreadIDVariable() const override {
277     if (OuterRegionInfo)
278       return OuterRegionInfo->getThreadIDVariable();
279     return nullptr;
280   }
281 
282   /// Get an LValue for the current ThreadID variable.
283   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284     if (OuterRegionInfo)
285       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286     llvm_unreachable("No LValue for inlined OpenMP construct");
287   }
288 
289   /// Get the name of the capture helper.
290   StringRef getHelperName() const override {
291     if (auto *OuterRegionInfo = getOldCSI())
292       return OuterRegionInfo->getHelperName();
293     llvm_unreachable("No helper name for inlined OpenMP construct");
294   }
295 
296   void emitUntiedSwitch(CodeGenFunction &CGF) override {
297     if (OuterRegionInfo)
298       OuterRegionInfo->emitUntiedSwitch(CGF);
299   }
300 
301   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302 
303   static bool classof(const CGCapturedStmtInfo *Info) {
304     return CGOpenMPRegionInfo::classof(Info) &&
305            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306   }
307 
308   ~CGOpenMPInlinedRegionInfo() override = default;
309 
310 private:
311   /// CodeGen info about outer OpenMP region.
312   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313   CGOpenMPRegionInfo *OuterRegionInfo;
314 };
315 
316 /// API for captured statement code generation in OpenMP target
317 /// constructs. For this captures, implicit parameters are used instead of the
318 /// captured fields. The name of the target region has to be unique in a given
319 /// application so it is provided by the client, because only the client has
320 /// the information to generate that.
321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322 public:
323   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
325       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326                            /*HasCancel=*/false),
327         HelperName(HelperName) {}
328 
329   /// This is unused for target regions because each starts executing
330   /// with a single thread.
331   const VarDecl *getThreadIDVariable() const override { return nullptr; }
332 
333   /// Get the name of the capture helper.
334   StringRef getHelperName() const override { return HelperName; }
335 
336   static bool classof(const CGCapturedStmtInfo *Info) {
337     return CGOpenMPRegionInfo::classof(Info) &&
338            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339   }
340 
341 private:
342   StringRef HelperName;
343 };
344 
345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346   llvm_unreachable("No codegen for expressions");
347 }
348 /// API for generation of expressions captured in a innermost OpenMP
349 /// region.
350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351 public:
352   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354                                   OMPD_unknown,
355                                   /*HasCancel=*/false),
356         PrivScope(CGF) {
357     // Make sure the globals captured in the provided statement are local by
358     // using the privatization logic. We assume the same variable is not
359     // captured more than once.
360     for (const auto &C : CS.captures()) {
361       if (!C.capturesVariable() && !C.capturesVariableByCopy())
362         continue;
363 
364       const VarDecl *VD = C.getCapturedVar();
365       if (VD->isLocalVarDeclOrParm())
366         continue;
367 
368       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369                       /*RefersToEnclosingVariableOrCapture=*/false,
370                       VD->getType().getNonReferenceType(), VK_LValue,
371                       C.getLocation());
372       PrivScope.addPrivate(
373           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 namespace {
481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
482 /// Values for bit flags for marking which requires clauses have been used.
483 enum OpenMPOffloadingRequiresDirFlags : int64_t {
484   /// flag undefined.
485   OMP_REQ_UNDEFINED               = 0x000,
486   /// no requires clause present.
487   OMP_REQ_NONE                    = 0x001,
488   /// reverse_offload clause.
489   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
490   /// unified_address clause.
491   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
492   /// unified_shared_memory clause.
493   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
494   /// dynamic_allocators clause.
495   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
496   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
497 };
498 
499 enum OpenMPOffloadingReservedDeviceIDs {
500   /// Device ID if the device was not defined, runtime should get it
501   /// from environment variables in the spec.
502   OMP_DEVICEID_UNDEF = -1,
503 };
504 } // anonymous namespace
505 
506 /// Describes ident structure that describes a source location.
507 /// All descriptions are taken from
508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509 /// Original structure:
510 /// typedef struct ident {
511 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
512 ///                                  see above  */
513 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
514 ///                                  KMP_IDENT_KMPC identifies this union
515 ///                                  member  */
516 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
517 ///                                  see above */
518 ///#if USE_ITT_BUILD
519 ///                            /*  but currently used for storing
520 ///                                region-specific ITT */
521 ///                            /*  contextual information. */
522 ///#endif /* USE_ITT_BUILD */
523 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
524 ///                                 C++  */
525 ///    char const *psource;    /**< String describing the source location.
526 ///                            The string is composed of semi-colon separated
527 //                             fields which describe the source file,
528 ///                            the function and a pair of line numbers that
529 ///                            delimit the construct.
530 ///                             */
531 /// } ident_t;
532 enum IdentFieldIndex {
533   /// might be used in Fortran
534   IdentField_Reserved_1,
535   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536   IdentField_Flags,
537   /// Not really used in Fortran any more
538   IdentField_Reserved_2,
539   /// Source[4] in Fortran, do not use for C++
540   IdentField_Reserved_3,
541   /// String describing the source location. The string is composed of
542   /// semi-colon separated fields which describe the source file, the function
543   /// and a pair of line numbers that delimit the construct.
544   IdentField_PSource
545 };
546 
547 /// Schedule types for 'omp for' loops (these enumerators are taken from
548 /// the enum sched_type in kmp.h).
549 enum OpenMPSchedType {
550   /// Lower bound for default (unordered) versions.
551   OMP_sch_lower = 32,
552   OMP_sch_static_chunked = 33,
553   OMP_sch_static = 34,
554   OMP_sch_dynamic_chunked = 35,
555   OMP_sch_guided_chunked = 36,
556   OMP_sch_runtime = 37,
557   OMP_sch_auto = 38,
558   /// static with chunk adjustment (e.g., simd)
559   OMP_sch_static_balanced_chunked = 45,
560   /// Lower bound for 'ordered' versions.
561   OMP_ord_lower = 64,
562   OMP_ord_static_chunked = 65,
563   OMP_ord_static = 66,
564   OMP_ord_dynamic_chunked = 67,
565   OMP_ord_guided_chunked = 68,
566   OMP_ord_runtime = 69,
567   OMP_ord_auto = 70,
568   OMP_sch_default = OMP_sch_static,
569   /// dist_schedule types
570   OMP_dist_sch_static_chunked = 91,
571   OMP_dist_sch_static = 92,
572   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573   /// Set if the monotonic schedule modifier was present.
574   OMP_sch_modifier_monotonic = (1 << 29),
575   /// Set if the nonmonotonic schedule modifier was present.
576   OMP_sch_modifier_nonmonotonic = (1 << 30),
577 };
578 
579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580 /// region.
581 class CleanupTy final : public EHScopeStack::Cleanup {
582   PrePostActionTy *Action;
583 
584 public:
585   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587     if (!CGF.HaveInsertPoint())
588       return;
589     Action->Exit(CGF);
590   }
591 };
592 
593 } // anonymous namespace
594 
595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596   CodeGenFunction::RunCleanupsScope Scope(CGF);
597   if (PrePostAction) {
598     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599     Callback(CodeGen, CGF, *PrePostAction);
600   } else {
601     PrePostActionTy Action;
602     Callback(CodeGen, CGF, Action);
603   }
604 }
605 
606 /// Check if the combiner is a call to UDR combiner and if it is so return the
607 /// UDR decl used for reduction.
608 static const OMPDeclareReductionDecl *
609 getReductionInit(const Expr *ReductionOp) {
610   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612       if (const auto *DRE =
613               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615           return DRD;
616   return nullptr;
617 }
618 
619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620                                              const OMPDeclareReductionDecl *DRD,
621                                              const Expr *InitOp,
622                                              Address Private, Address Original,
623                                              QualType Ty) {
624   if (DRD->getInitializer()) {
625     std::pair<llvm::Function *, llvm::Function *> Reduction =
626         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627     const auto *CE = cast<CallExpr>(InitOp);
628     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631     const auto *LHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633     const auto *RHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637                             [=]() { return Private; });
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639                             [=]() { return Original; });
640     (void)PrivateScope.Privatize();
641     RValue Func = RValue::get(Reduction.second);
642     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643     CGF.EmitIgnoredExpr(InitOp);
644   } else {
645     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647     auto *GV = new llvm::GlobalVariable(
648         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649         llvm::GlobalValue::PrivateLinkage, Init, Name);
650     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651     RValue InitRVal;
652     switch (CGF.getEvaluationKind(Ty)) {
653     case TEK_Scalar:
654       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655       break;
656     case TEK_Complex:
657       InitRVal =
658           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659       break;
660     case TEK_Aggregate: {
661       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664                            /*IsInitializer=*/false);
665       return;
666     }
667     }
668     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671                          /*IsInitializer=*/false);
672   }
673 }
674 
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681                                  QualType Type, bool EmitDeclareReductionInit,
682                                  const Expr *Init,
683                                  const OMPDeclareReductionDecl *DRD,
684                                  Address SrcAddr = Address::invalid()) {
685   // Perform element-by-element initialization.
686   QualType ElementTy;
687 
688   // Drill down to the base element type on both arrays.
689   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691   DestAddr =
692       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
693   if (DRD)
694     SrcAddr =
695         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
696 
697   llvm::Value *SrcBegin = nullptr;
698   if (DRD)
699     SrcBegin = SrcAddr.getPointer();
700   llvm::Value *DestBegin = DestAddr.getPointer();
701   // Cast from pointer to array type to pointer to single element.
702   llvm::Value *DestEnd =
703       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
704   // The basic structure here is a while-do loop.
705   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
706   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
707   llvm::Value *IsEmpty =
708       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
709   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
710 
711   // Enter the loop body, making that address the current address.
712   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
713   CGF.EmitBlock(BodyBB);
714 
715   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
716 
717   llvm::PHINode *SrcElementPHI = nullptr;
718   Address SrcElementCurrent = Address::invalid();
719   if (DRD) {
720     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
721                                           "omp.arraycpy.srcElementPast");
722     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
723     SrcElementCurrent =
724         Address(SrcElementPHI,
725                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726   }
727   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
728       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
729   DestElementPHI->addIncoming(DestBegin, EntryBB);
730   Address DestElementCurrent =
731       Address(DestElementPHI,
732               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
733 
734   // Emit copy.
735   {
736     CodeGenFunction::RunCleanupsScope InitScope(CGF);
737     if (EmitDeclareReductionInit) {
738       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
739                                        SrcElementCurrent, ElementTy);
740     } else
741       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
742                            /*IsInitializer=*/false);
743   }
744 
745   if (DRD) {
746     // Shift the address forward by one element.
747     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
748         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
749         "omp.arraycpy.dest.element");
750     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
751   }
752 
753   // Shift the address forward by one element.
754   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
755       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
756       "omp.arraycpy.dest.element");
757   // Check whether we've reached the end.
758   llvm::Value *Done =
759       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
760   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
761   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
762 
763   // Done.
764   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
765 }
766 
767 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
768   return CGF.EmitOMPSharedLValue(E);
769 }
770 
771 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
772                                             const Expr *E) {
773   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
774     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
775   return LValue();
776 }
777 
778 void ReductionCodeGen::emitAggregateInitialization(
779     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
780     const OMPDeclareReductionDecl *DRD) {
781   // Emit VarDecl with copy init for arrays.
782   // Get the address of the original variable captured in current
783   // captured region.
784   const auto *PrivateVD =
785       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
786   bool EmitDeclareReductionInit =
787       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
788   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
789                        EmitDeclareReductionInit,
790                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
791                                                 : PrivateVD->getInit(),
792                        DRD, SharedLVal.getAddress(CGF));
793 }
794 
795 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
796                                    ArrayRef<const Expr *> Origs,
797                                    ArrayRef<const Expr *> Privates,
798                                    ArrayRef<const Expr *> ReductionOps) {
799   ClausesData.reserve(Shareds.size());
800   SharedAddresses.reserve(Shareds.size());
801   Sizes.reserve(Shareds.size());
802   BaseDecls.reserve(Shareds.size());
803   const auto *IOrig = Origs.begin();
804   const auto *IPriv = Privates.begin();
805   const auto *IRed = ReductionOps.begin();
806   for (const Expr *Ref : Shareds) {
807     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
808     std::advance(IOrig, 1);
809     std::advance(IPriv, 1);
810     std::advance(IRed, 1);
811   }
812 }
813 
814 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
815   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
816          "Number of generated lvalues must be exactly N.");
817   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
818   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
819   SharedAddresses.emplace_back(First, Second);
820   if (ClausesData[N].Shared == ClausesData[N].Ref) {
821     OrigAddresses.emplace_back(First, Second);
822   } else {
823     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
824     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
825     OrigAddresses.emplace_back(First, Second);
826   }
827 }
828 
829 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
830   const auto *PrivateVD =
831       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
832   QualType PrivateType = PrivateVD->getType();
833   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
834   if (!PrivateType->isVariablyModifiedType()) {
835     Sizes.emplace_back(
836         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
837         nullptr);
838     return;
839   }
840   llvm::Value *Size;
841   llvm::Value *SizeInChars;
842   auto *ElemType =
843       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
844           ->getElementType();
845   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
846   if (AsArraySection) {
847     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
848                                      OrigAddresses[N].first.getPointer(CGF));
849     Size = CGF.Builder.CreateNUWAdd(
850         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
851     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
852   } else {
853     SizeInChars =
854         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
855     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
856   }
857   Sizes.emplace_back(SizeInChars, Size);
858   CodeGenFunction::OpaqueValueMapping OpaqueMap(
859       CGF,
860       cast<OpaqueValueExpr>(
861           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
862       RValue::get(Size));
863   CGF.EmitVariablyModifiedType(PrivateType);
864 }
865 
866 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
867                                          llvm::Value *Size) {
868   const auto *PrivateVD =
869       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
870   QualType PrivateType = PrivateVD->getType();
871   if (!PrivateType->isVariablyModifiedType()) {
872     assert(!Size && !Sizes[N].second &&
873            "Size should be nullptr for non-variably modified reduction "
874            "items.");
875     return;
876   }
877   CodeGenFunction::OpaqueValueMapping OpaqueMap(
878       CGF,
879       cast<OpaqueValueExpr>(
880           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
881       RValue::get(Size));
882   CGF.EmitVariablyModifiedType(PrivateType);
883 }
884 
885 void ReductionCodeGen::emitInitialization(
886     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
887     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
888   assert(SharedAddresses.size() > N && "No variable was generated");
889   const auto *PrivateVD =
890       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
891   const OMPDeclareReductionDecl *DRD =
892       getReductionInit(ClausesData[N].ReductionOp);
893   QualType PrivateType = PrivateVD->getType();
894   PrivateAddr = CGF.Builder.CreateElementBitCast(
895       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
896   QualType SharedType = SharedAddresses[N].first.getType();
897   SharedLVal = CGF.MakeAddrLValue(
898       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
899                                        CGF.ConvertTypeForMem(SharedType)),
900       SharedType, SharedAddresses[N].first.getBaseInfo(),
901       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
902   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
903     if (DRD && DRD->getInitializer())
904       (void)DefaultInit(CGF);
905     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
906   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
907     (void)DefaultInit(CGF);
908     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
909                                      PrivateAddr, SharedLVal.getAddress(CGF),
910                                      SharedLVal.getType());
911   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
912              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
913     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
914                          PrivateVD->getType().getQualifiers(),
915                          /*IsInitializer=*/false);
916   }
917 }
918 
919 bool ReductionCodeGen::needCleanups(unsigned N) {
920   const auto *PrivateVD =
921       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922   QualType PrivateType = PrivateVD->getType();
923   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
924   return DTorKind != QualType::DK_none;
925 }
926 
927 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
928                                     Address PrivateAddr) {
929   const auto *PrivateVD =
930       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
931   QualType PrivateType = PrivateVD->getType();
932   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
933   if (needCleanups(N)) {
934     PrivateAddr = CGF.Builder.CreateElementBitCast(
935         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
936     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
937   }
938 }
939 
940 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
941                           LValue BaseLV) {
942   BaseTy = BaseTy.getNonReferenceType();
943   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
944          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
945     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
946       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
947     } else {
948       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
949       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
950     }
951     BaseTy = BaseTy->getPointeeType();
952   }
953   return CGF.MakeAddrLValue(
954       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
955                                        CGF.ConvertTypeForMem(ElTy)),
956       BaseLV.getType(), BaseLV.getBaseInfo(),
957       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
958 }
959 
960 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
961                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
962                           llvm::Value *Addr) {
963   Address Tmp = Address::invalid();
964   Address TopTmp = Address::invalid();
965   Address MostTopTmp = Address::invalid();
966   BaseTy = BaseTy.getNonReferenceType();
967   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
968          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
969     Tmp = CGF.CreateMemTemp(BaseTy);
970     if (TopTmp.isValid())
971       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
972     else
973       MostTopTmp = Tmp;
974     TopTmp = Tmp;
975     BaseTy = BaseTy->getPointeeType();
976   }
977   llvm::Type *Ty = BaseLVType;
978   if (Tmp.isValid())
979     Ty = Tmp.getElementType();
980   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
981   if (Tmp.isValid()) {
982     CGF.Builder.CreateStore(Addr, Tmp);
983     return MostTopTmp;
984   }
985   return Address(Addr, BaseLVAlignment);
986 }
987 
988 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
989   const VarDecl *OrigVD = nullptr;
990   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
991     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
992     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
993       Base = TempOASE->getBase()->IgnoreParenImpCasts();
994     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
995       Base = TempASE->getBase()->IgnoreParenImpCasts();
996     DE = cast<DeclRefExpr>(Base);
997     OrigVD = cast<VarDecl>(DE->getDecl());
998   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
999     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1000     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1001       Base = TempASE->getBase()->IgnoreParenImpCasts();
1002     DE = cast<DeclRefExpr>(Base);
1003     OrigVD = cast<VarDecl>(DE->getDecl());
1004   }
1005   return OrigVD;
1006 }
1007 
1008 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1009                                                Address PrivateAddr) {
1010   const DeclRefExpr *DE;
1011   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1012     BaseDecls.emplace_back(OrigVD);
1013     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1014     LValue BaseLValue =
1015         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1016                     OriginalBaseLValue);
1017     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1018     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1019         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1020     llvm::Value *PrivatePointer =
1021         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1022             PrivateAddr.getPointer(), SharedAddr.getType());
1023     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1024         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1025     return castToBase(CGF, OrigVD->getType(),
1026                       SharedAddresses[N].first.getType(),
1027                       OriginalBaseLValue.getAddress(CGF).getType(),
1028                       OriginalBaseLValue.getAlignment(), Ptr);
1029   }
1030   BaseDecls.emplace_back(
1031       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1032   return PrivateAddr;
1033 }
1034 
1035 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1036   const OMPDeclareReductionDecl *DRD =
1037       getReductionInit(ClausesData[N].ReductionOp);
1038   return DRD && DRD->getInitializer();
1039 }
1040 
1041 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1042   return CGF.EmitLoadOfPointerLValue(
1043       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1044       getThreadIDVariable()->getType()->castAs<PointerType>());
1045 }
1046 
1047 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1048   if (!CGF.HaveInsertPoint())
1049     return;
1050   // 1.2.2 OpenMP Language Terminology
1051   // Structured block - An executable statement with a single entry at the
1052   // top and a single exit at the bottom.
1053   // The point of exit cannot be a branch out of the structured block.
1054   // longjmp() and throw() must not violate the entry/exit criteria.
1055   CGF.EHStack.pushTerminate();
1056   if (S)
1057     CGF.incrementProfileCounter(S);
1058   CodeGen(CGF);
1059   CGF.EHStack.popTerminate();
1060 }
1061 
1062 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1063     CodeGenFunction &CGF) {
1064   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1065                             getThreadIDVariable()->getType(),
1066                             AlignmentSource::Decl);
1067 }
1068 
1069 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1070                                        QualType FieldTy) {
1071   auto *Field = FieldDecl::Create(
1072       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1073       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1074       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1075   Field->setAccess(AS_public);
1076   DC->addDecl(Field);
1077   return Field;
1078 }
1079 
1080 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1081                                  StringRef Separator)
1082     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1083       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1084   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1085 
1086   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1087   OMPBuilder.initialize();
1088   loadOffloadInfoMetadata();
1089 }
1090 
1091 void CGOpenMPRuntime::clear() {
1092   InternalVars.clear();
1093   // Clean non-target variable declarations possibly used only in debug info.
1094   for (const auto &Data : EmittedNonTargetVariables) {
1095     if (!Data.getValue().pointsToAliveValue())
1096       continue;
1097     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1098     if (!GV)
1099       continue;
1100     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1101       continue;
1102     GV->eraseFromParent();
1103   }
1104 }
1105 
1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1107   SmallString<128> Buffer;
1108   llvm::raw_svector_ostream OS(Buffer);
1109   StringRef Sep = FirstSeparator;
1110   for (StringRef Part : Parts) {
1111     OS << Sep << Part;
1112     Sep = Separator;
1113   }
1114   return std::string(OS.str());
1115 }
1116 
1117 static llvm::Function *
1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1119                           const Expr *CombinerInitializer, const VarDecl *In,
1120                           const VarDecl *Out, bool IsCombiner) {
1121   // void .omp_combiner.(Ty *in, Ty *out);
1122   ASTContext &C = CGM.getContext();
1123   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1124   FunctionArgList Args;
1125   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1126                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1127   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1128                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1129   Args.push_back(&OmpOutParm);
1130   Args.push_back(&OmpInParm);
1131   const CGFunctionInfo &FnInfo =
1132       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1133   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1134   std::string Name = CGM.getOpenMPRuntime().getName(
1135       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1136   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1137                                     Name, &CGM.getModule());
1138   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1139   if (CGM.getLangOpts().Optimize) {
1140     Fn->removeFnAttr(llvm::Attribute::NoInline);
1141     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1142     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1143   }
1144   CodeGenFunction CGF(CGM);
1145   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1146   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1147   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1148                     Out->getLocation());
1149   CodeGenFunction::OMPPrivateScope Scope(CGF);
1150   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1151   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1156   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1157     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1158         .getAddress(CGF);
1159   });
1160   (void)Scope.Privatize();
1161   if (!IsCombiner && Out->hasInit() &&
1162       !CGF.isTrivialInitializer(Out->getInit())) {
1163     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1164                          Out->getType().getQualifiers(),
1165                          /*IsInitializer=*/true);
1166   }
1167   if (CombinerInitializer)
1168     CGF.EmitIgnoredExpr(CombinerInitializer);
1169   Scope.ForceCleanup();
1170   CGF.FinishFunction();
1171   return Fn;
1172 }
1173 
1174 void CGOpenMPRuntime::emitUserDefinedReduction(
1175     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1176   if (UDRMap.count(D) > 0)
1177     return;
1178   llvm::Function *Combiner = emitCombinerOrInitializer(
1179       CGM, D->getType(), D->getCombiner(),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1181       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1182       /*IsCombiner=*/true);
1183   llvm::Function *Initializer = nullptr;
1184   if (const Expr *Init = D->getInitializer()) {
1185     Initializer = emitCombinerOrInitializer(
1186         CGM, D->getType(),
1187         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1188                                                                      : nullptr,
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1190         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1191         /*IsCombiner=*/false);
1192   }
1193   UDRMap.try_emplace(D, Combiner, Initializer);
1194   if (CGF) {
1195     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1196     Decls.second.push_back(D);
1197   }
1198 }
1199 
1200 std::pair<llvm::Function *, llvm::Function *>
1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1202   auto I = UDRMap.find(D);
1203   if (I != UDRMap.end())
1204     return I->second;
1205   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1206   return UDRMap.lookup(D);
1207 }
1208 
1209 namespace {
1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1211 // Builder if one is present.
1212 struct PushAndPopStackRAII {
1213   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1214                       bool HasCancel, llvm::omp::Directive Kind)
1215       : OMPBuilder(OMPBuilder) {
1216     if (!OMPBuilder)
1217       return;
1218 
1219     // The following callback is the crucial part of clangs cleanup process.
1220     //
1221     // NOTE:
1222     // Once the OpenMPIRBuilder is used to create parallel regions (and
1223     // similar), the cancellation destination (Dest below) is determined via
1224     // IP. That means if we have variables to finalize we split the block at IP,
1225     // use the new block (=BB) as destination to build a JumpDest (via
1226     // getJumpDestInCurrentScope(BB)) which then is fed to
1227     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1228     // to push & pop an FinalizationInfo object.
1229     // The FiniCB will still be needed but at the point where the
1230     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1231     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1232       assert(IP.getBlock()->end() == IP.getPoint() &&
1233              "Clang CG should cause non-terminated block!");
1234       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1235       CGF.Builder.restoreIP(IP);
1236       CodeGenFunction::JumpDest Dest =
1237           CGF.getOMPCancelDestination(OMPD_parallel);
1238       CGF.EmitBranchThroughCleanup(Dest);
1239     };
1240 
1241     // TODO: Remove this once we emit parallel regions through the
1242     //       OpenMPIRBuilder as it can do this setup internally.
1243     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1244     OMPBuilder->pushFinalizationCB(std::move(FI));
1245   }
1246   ~PushAndPopStackRAII() {
1247     if (OMPBuilder)
1248       OMPBuilder->popFinalizationCB();
1249   }
1250   llvm::OpenMPIRBuilder *OMPBuilder;
1251 };
1252 } // namespace
1253 
1254 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1255     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1256     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1257     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1258   assert(ThreadIDVar->getType()->isPointerType() &&
1259          "thread id variable must be of type kmp_int32 *");
1260   CodeGenFunction CGF(CGM, true);
1261   bool HasCancel = false;
1262   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1263     HasCancel = OPD->hasCancel();
1264   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1265     HasCancel = OPD->hasCancel();
1266   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1267     HasCancel = OPSD->hasCancel();
1268   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1269     HasCancel = OPFD->hasCancel();
1270   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1273     HasCancel = OPFD->hasCancel();
1274   else if (const auto *OPFD =
1275                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1276     HasCancel = OPFD->hasCancel();
1277   else if (const auto *OPFD =
1278                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1279     HasCancel = OPFD->hasCancel();
1280 
1281   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1282   //       parallel region to make cancellation barriers work properly.
1283   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1284   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1285   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1286                                     HasCancel, OutlinedHelperName);
1287   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1288   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1289 }
1290 
1291 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1292     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1295   return emitParallelOrTeamsOutlinedFunction(
1296       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297 }
1298 
1299 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1300     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1302   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1303   return emitParallelOrTeamsOutlinedFunction(
1304       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1305 }
1306 
1307 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1308     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1309     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1310     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1311     bool Tied, unsigned &NumberOfParts) {
1312   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1313                                               PrePostActionTy &) {
1314     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1315     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1316     llvm::Value *TaskArgs[] = {
1317         UpLoc, ThreadID,
1318         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1319                                     TaskTVar->getType()->castAs<PointerType>())
1320             .getPointer(CGF)};
1321     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1322                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1323                         TaskArgs);
1324   };
1325   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1326                                                             UntiedCodeGen);
1327   CodeGen.setAction(Action);
1328   assert(!ThreadIDVar->getType()->isPointerType() &&
1329          "thread id variable must be of type kmp_int32 for tasks");
1330   const OpenMPDirectiveKind Region =
1331       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1332                                                       : OMPD_task;
1333   const CapturedStmt *CS = D.getCapturedStmt(Region);
1334   bool HasCancel = false;
1335   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1336     HasCancel = TD->hasCancel();
1337   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1338     HasCancel = TD->hasCancel();
1339   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1340     HasCancel = TD->hasCancel();
1341   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1342     HasCancel = TD->hasCancel();
1343 
1344   CodeGenFunction CGF(CGM, true);
1345   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1346                                         InnermostKind, HasCancel, Action);
1347   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1348   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1349   if (!Tied)
1350     NumberOfParts = Action.getNumberOfParts();
1351   return Res;
1352 }
1353 
1354 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1355                              const RecordDecl *RD, const CGRecordLayout &RL,
1356                              ArrayRef<llvm::Constant *> Data) {
1357   llvm::StructType *StructTy = RL.getLLVMType();
1358   unsigned PrevIdx = 0;
1359   ConstantInitBuilder CIBuilder(CGM);
1360   auto DI = Data.begin();
1361   for (const FieldDecl *FD : RD->fields()) {
1362     unsigned Idx = RL.getLLVMFieldNo(FD);
1363     // Fill the alignment.
1364     for (unsigned I = PrevIdx; I < Idx; ++I)
1365       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1366     PrevIdx = Idx + 1;
1367     Fields.add(*DI);
1368     ++DI;
1369   }
1370 }
1371 
1372 template <class... As>
1373 static llvm::GlobalVariable *
1374 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1375                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1376                    As &&... Args) {
1377   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1378   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1379   ConstantInitBuilder CIBuilder(CGM);
1380   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1381   buildStructValue(Fields, CGM, RD, RL, Data);
1382   return Fields.finishAndCreateGlobal(
1383       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1384       std::forward<As>(Args)...);
1385 }
1386 
1387 template <typename T>
1388 static void
1389 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1390                                          ArrayRef<llvm::Constant *> Data,
1391                                          T &Parent) {
1392   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1393   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1394   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1395   buildStructValue(Fields, CGM, RD, RL, Data);
1396   Fields.finishAndAddTo(Parent);
1397 }
1398 
1399 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1400                                              bool AtCurrentPoint) {
1401   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1402   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1403 
1404   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1405   if (AtCurrentPoint) {
1406     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1407         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1408   } else {
1409     Elem.second.ServiceInsertPt =
1410         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1411     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1412   }
1413 }
1414 
1415 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1416   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1417   if (Elem.second.ServiceInsertPt) {
1418     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1419     Elem.second.ServiceInsertPt = nullptr;
1420     Ptr->eraseFromParent();
1421   }
1422 }
1423 
1424 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1425                                                   SourceLocation Loc,
1426                                                   SmallString<128> &Buffer) {
1427   llvm::raw_svector_ostream OS(Buffer);
1428   // Build debug location
1429   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1430   OS << ";" << PLoc.getFilename() << ";";
1431   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1432     OS << FD->getQualifiedNameAsString();
1433   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1434   return OS.str();
1435 }
1436 
1437 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1438                                                  SourceLocation Loc,
1439                                                  unsigned Flags) {
1440   llvm::Constant *SrcLocStr;
1441   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1442       Loc.isInvalid()) {
1443     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1444   } else {
1445     std::string FunctionName = "";
1446     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1447       FunctionName = FD->getQualifiedNameAsString();
1448     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1449     const char *FileName = PLoc.getFilename();
1450     unsigned Line = PLoc.getLine();
1451     unsigned Column = PLoc.getColumn();
1452     SrcLocStr =
1453         OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1454   }
1455   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1456   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1457                                      Reserved2Flags);
1458 }
1459 
1460 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1461                                           SourceLocation Loc) {
1462   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1464   // the clang invariants used below might be broken.
1465   if (CGM.getLangOpts().OpenMPIRBuilder) {
1466     SmallString<128> Buffer;
1467     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1468     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1469         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1470     return OMPBuilder.getOrCreateThreadID(
1471         OMPBuilder.getOrCreateIdent(SrcLocStr));
1472   }
1473 
1474   llvm::Value *ThreadID = nullptr;
1475   // Check whether we've already cached a load of the thread id in this
1476   // function.
1477   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1478   if (I != OpenMPLocThreadIDMap.end()) {
1479     ThreadID = I->second.ThreadID;
1480     if (ThreadID != nullptr)
1481       return ThreadID;
1482   }
1483   // If exceptions are enabled, do not use parameter to avoid possible crash.
1484   if (auto *OMPRegionInfo =
1485           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1486     if (OMPRegionInfo->getThreadIDVariable()) {
1487       // Check if this an outlined function with thread id passed as argument.
1488       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1489       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1490       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1491           !CGF.getLangOpts().CXXExceptions ||
1492           CGF.Builder.GetInsertBlock() == TopBlock ||
1493           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1494           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1495               TopBlock ||
1496           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1497               CGF.Builder.GetInsertBlock()) {
1498         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1499         // If value loaded in entry block, cache it and use it everywhere in
1500         // function.
1501         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1502           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1503           Elem.second.ThreadID = ThreadID;
1504         }
1505         return ThreadID;
1506       }
1507     }
1508   }
1509 
1510   // This is not an outlined function region - need to call __kmpc_int32
1511   // kmpc_global_thread_num(ident_t *loc).
1512   // Generate thread id value and cache this value for use across the
1513   // function.
1514   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1515   if (!Elem.second.ServiceInsertPt)
1516     setLocThreadIdInsertPt(CGF);
1517   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1518   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1519   llvm::CallInst *Call = CGF.Builder.CreateCall(
1520       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1521                                             OMPRTL___kmpc_global_thread_num),
1522       emitUpdateLocation(CGF, Loc));
1523   Call->setCallingConv(CGF.getRuntimeCC());
1524   Elem.second.ThreadID = Call;
1525   return Call;
1526 }
1527 
1528 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1529   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1530   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1531     clearLocThreadIdInsertPt(CGF);
1532     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1533   }
1534   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1535     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1536       UDRMap.erase(D);
1537     FunctionUDRMap.erase(CGF.CurFn);
1538   }
1539   auto I = FunctionUDMMap.find(CGF.CurFn);
1540   if (I != FunctionUDMMap.end()) {
1541     for(const auto *D : I->second)
1542       UDMMap.erase(D);
1543     FunctionUDMMap.erase(I);
1544   }
1545   LastprivateConditionalToTypes.erase(CGF.CurFn);
1546   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1547 }
1548 
1549 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1550   return OMPBuilder.IdentPtr;
1551 }
1552 
1553 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1554   if (!Kmpc_MicroTy) {
1555     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1556     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1557                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1558     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1559   }
1560   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1561 }
1562 
1563 llvm::FunctionCallee
1564 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1565                                              bool IsGPUDistribute) {
1566   assert((IVSize == 32 || IVSize == 64) &&
1567          "IV size is not compatible with the omp runtime");
1568   StringRef Name;
1569   if (IsGPUDistribute)
1570     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1571                                     : "__kmpc_distribute_static_init_4u")
1572                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1573                                     : "__kmpc_distribute_static_init_8u");
1574   else
1575     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1576                                     : "__kmpc_for_static_init_4u")
1577                         : (IVSigned ? "__kmpc_for_static_init_8"
1578                                     : "__kmpc_for_static_init_8u");
1579 
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582   llvm::Type *TypeParams[] = {
1583     getIdentTyPointerTy(),                     // loc
1584     CGM.Int32Ty,                               // tid
1585     CGM.Int32Ty,                               // schedtype
1586     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1587     PtrTy,                                     // p_lower
1588     PtrTy,                                     // p_upper
1589     PtrTy,                                     // p_stride
1590     ITy,                                       // incr
1591     ITy                                        // chunk
1592   };
1593   auto *FnTy =
1594       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1595   return CGM.CreateRuntimeFunction(FnTy, Name);
1596 }
1597 
1598 llvm::FunctionCallee
1599 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1600   assert((IVSize == 32 || IVSize == 64) &&
1601          "IV size is not compatible with the omp runtime");
1602   StringRef Name =
1603       IVSize == 32
1604           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1605           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1606   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1607   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1608                                CGM.Int32Ty,           // tid
1609                                CGM.Int32Ty,           // schedtype
1610                                ITy,                   // lower
1611                                ITy,                   // upper
1612                                ITy,                   // stride
1613                                ITy                    // chunk
1614   };
1615   auto *FnTy =
1616       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1617   return CGM.CreateRuntimeFunction(FnTy, Name);
1618 }
1619 
1620 llvm::FunctionCallee
1621 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1622   assert((IVSize == 32 || IVSize == 64) &&
1623          "IV size is not compatible with the omp runtime");
1624   StringRef Name =
1625       IVSize == 32
1626           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1627           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1628   llvm::Type *TypeParams[] = {
1629       getIdentTyPointerTy(), // loc
1630       CGM.Int32Ty,           // tid
1631   };
1632   auto *FnTy =
1633       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1634   return CGM.CreateRuntimeFunction(FnTy, Name);
1635 }
1636 
1637 llvm::FunctionCallee
1638 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1639   assert((IVSize == 32 || IVSize == 64) &&
1640          "IV size is not compatible with the omp runtime");
1641   StringRef Name =
1642       IVSize == 32
1643           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1644           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1645   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1646   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1647   llvm::Type *TypeParams[] = {
1648     getIdentTyPointerTy(),                     // loc
1649     CGM.Int32Ty,                               // tid
1650     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1651     PtrTy,                                     // p_lower
1652     PtrTy,                                     // p_upper
1653     PtrTy                                      // p_stride
1654   };
1655   auto *FnTy =
1656       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1657   return CGM.CreateRuntimeFunction(FnTy, Name);
1658 }
1659 
1660 /// Obtain information that uniquely identifies a target entry. This
1661 /// consists of the file and device IDs as well as line number associated with
1662 /// the relevant entry source location.
1663 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1664                                      unsigned &DeviceID, unsigned &FileID,
1665                                      unsigned &LineNum) {
1666   SourceManager &SM = C.getSourceManager();
1667 
1668   // The loc should be always valid and have a file ID (the user cannot use
1669   // #pragma directives in macros)
1670 
1671   assert(Loc.isValid() && "Source location is expected to be always valid.");
1672 
1673   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1674   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1675 
1676   llvm::sys::fs::UniqueID ID;
1677   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1678     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1679     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1680     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1681       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1682           << PLoc.getFilename() << EC.message();
1683   }
1684 
1685   DeviceID = ID.getDevice();
1686   FileID = ID.getFile();
1687   LineNum = PLoc.getLine();
1688 }
1689 
1690 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1691   if (CGM.getLangOpts().OpenMPSimd)
1692     return Address::invalid();
1693   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1694       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1695   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1696               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1697                HasRequiresUnifiedSharedMemory))) {
1698     SmallString<64> PtrName;
1699     {
1700       llvm::raw_svector_ostream OS(PtrName);
1701       OS << CGM.getMangledName(GlobalDecl(VD));
1702       if (!VD->isExternallyVisible()) {
1703         unsigned DeviceID, FileID, Line;
1704         getTargetEntryUniqueInfo(CGM.getContext(),
1705                                  VD->getCanonicalDecl()->getBeginLoc(),
1706                                  DeviceID, FileID, Line);
1707         OS << llvm::format("_%x", FileID);
1708       }
1709       OS << "_decl_tgt_ref_ptr";
1710     }
1711     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1712     if (!Ptr) {
1713       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1714       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1715                                         PtrName);
1716 
1717       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1718       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1719 
1720       if (!CGM.getLangOpts().OpenMPIsDevice)
1721         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1722       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1723     }
1724     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1725   }
1726   return Address::invalid();
1727 }
1728 
1729 llvm::Constant *
1730 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1731   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1732          !CGM.getContext().getTargetInfo().isTLSSupported());
1733   // Lookup the entry, lazily creating it if necessary.
1734   std::string Suffix = getName({"cache", ""});
1735   return getOrCreateInternalVariable(
1736       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1737 }
1738 
1739 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1740                                                 const VarDecl *VD,
1741                                                 Address VDAddr,
1742                                                 SourceLocation Loc) {
1743   if (CGM.getLangOpts().OpenMPUseTLS &&
1744       CGM.getContext().getTargetInfo().isTLSSupported())
1745     return VDAddr;
1746 
1747   llvm::Type *VarTy = VDAddr.getElementType();
1748   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1749                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1750                                                        CGM.Int8PtrTy),
1751                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1752                          getOrCreateThreadPrivateCache(VD)};
1753   return Address(CGF.EmitRuntimeCall(
1754                      OMPBuilder.getOrCreateRuntimeFunction(
1755                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1756                      Args),
1757                  VDAddr.getAlignment());
1758 }
1759 
1760 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1761     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1762     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1763   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1764   // library.
1765   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1766   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1767                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1768                       OMPLoc);
1769   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1770   // to register constructor/destructor for variable.
1771   llvm::Value *Args[] = {
1772       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1773       Ctor, CopyCtor, Dtor};
1774   CGF.EmitRuntimeCall(
1775       OMPBuilder.getOrCreateRuntimeFunction(
1776           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1777       Args);
1778 }
1779 
1780 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1781     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1782     bool PerformInit, CodeGenFunction *CGF) {
1783   if (CGM.getLangOpts().OpenMPUseTLS &&
1784       CGM.getContext().getTargetInfo().isTLSSupported())
1785     return nullptr;
1786 
1787   VD = VD->getDefinition(CGM.getContext());
1788   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1789     QualType ASTTy = VD->getType();
1790 
1791     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1792     const Expr *Init = VD->getAnyInitializer();
1793     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1794       // Generate function that re-emits the declaration's initializer into the
1795       // threadprivate copy of the variable VD
1796       CodeGenFunction CtorCGF(CGM);
1797       FunctionArgList Args;
1798       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1799                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1800                             ImplicitParamDecl::Other);
1801       Args.push_back(&Dst);
1802 
1803       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1804           CGM.getContext().VoidPtrTy, Args);
1805       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1806       std::string Name = getName({"__kmpc_global_ctor_", ""});
1807       llvm::Function *Fn =
1808           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1809       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1810                             Args, Loc, Loc);
1811       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1812           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1813           CGM.getContext().VoidPtrTy, Dst.getLocation());
1814       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1815       Arg = CtorCGF.Builder.CreateElementBitCast(
1816           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1817       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1818                                /*IsInitializer=*/true);
1819       ArgVal = CtorCGF.EmitLoadOfScalar(
1820           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1821           CGM.getContext().VoidPtrTy, Dst.getLocation());
1822       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1823       CtorCGF.FinishFunction();
1824       Ctor = Fn;
1825     }
1826     if (VD->getType().isDestructedType() != QualType::DK_none) {
1827       // Generate function that emits destructor call for the threadprivate copy
1828       // of the variable VD
1829       CodeGenFunction DtorCGF(CGM);
1830       FunctionArgList Args;
1831       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1832                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1833                             ImplicitParamDecl::Other);
1834       Args.push_back(&Dst);
1835 
1836       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1837           CGM.getContext().VoidTy, Args);
1838       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1839       std::string Name = getName({"__kmpc_global_dtor_", ""});
1840       llvm::Function *Fn =
1841           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1842       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1843       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1844                             Loc, Loc);
1845       // Create a scope with an artificial location for the body of this function.
1846       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1847       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1848           DtorCGF.GetAddrOfLocalVar(&Dst),
1849           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1850       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1851                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1852                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1853       DtorCGF.FinishFunction();
1854       Dtor = Fn;
1855     }
1856     // Do not emit init function if it is not required.
1857     if (!Ctor && !Dtor)
1858       return nullptr;
1859 
1860     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1861     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1862                                                /*isVarArg=*/false)
1863                            ->getPointerTo();
1864     // Copying constructor for the threadprivate variable.
1865     // Must be NULL - reserved by runtime, but currently it requires that this
1866     // parameter is always NULL. Otherwise it fires assertion.
1867     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1868     if (Ctor == nullptr) {
1869       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1870                                              /*isVarArg=*/false)
1871                          ->getPointerTo();
1872       Ctor = llvm::Constant::getNullValue(CtorTy);
1873     }
1874     if (Dtor == nullptr) {
1875       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1876                                              /*isVarArg=*/false)
1877                          ->getPointerTo();
1878       Dtor = llvm::Constant::getNullValue(DtorTy);
1879     }
1880     if (!CGF) {
1881       auto *InitFunctionTy =
1882           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1883       std::string Name = getName({"__omp_threadprivate_init_", ""});
1884       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1885           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1886       CodeGenFunction InitCGF(CGM);
1887       FunctionArgList ArgList;
1888       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1889                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1890                             Loc, Loc);
1891       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1892       InitCGF.FinishFunction();
1893       return InitFunction;
1894     }
1895     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1896   }
1897   return nullptr;
1898 }
1899 
1900 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1901                                                      llvm::GlobalVariable *Addr,
1902                                                      bool PerformInit) {
1903   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1904       !CGM.getLangOpts().OpenMPIsDevice)
1905     return false;
1906   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1907       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1908   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1909       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1910        HasRequiresUnifiedSharedMemory))
1911     return CGM.getLangOpts().OpenMPIsDevice;
1912   VD = VD->getDefinition(CGM.getContext());
1913   assert(VD && "Unknown VarDecl");
1914 
1915   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1916     return CGM.getLangOpts().OpenMPIsDevice;
1917 
1918   QualType ASTTy = VD->getType();
1919   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1920 
1921   // Produce the unique prefix to identify the new target regions. We use
1922   // the source location of the variable declaration which we know to not
1923   // conflict with any target region.
1924   unsigned DeviceID;
1925   unsigned FileID;
1926   unsigned Line;
1927   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1928   SmallString<128> Buffer, Out;
1929   {
1930     llvm::raw_svector_ostream OS(Buffer);
1931     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1932        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1933   }
1934 
1935   const Expr *Init = VD->getAnyInitializer();
1936   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1937     llvm::Constant *Ctor;
1938     llvm::Constant *ID;
1939     if (CGM.getLangOpts().OpenMPIsDevice) {
1940       // Generate function that re-emits the declaration's initializer into
1941       // the threadprivate copy of the variable VD
1942       CodeGenFunction CtorCGF(CGM);
1943 
1944       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1945       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1946       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1947           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1948       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1949       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1950                             FunctionArgList(), Loc, Loc);
1951       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1952       CtorCGF.EmitAnyExprToMem(Init,
1953                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1954                                Init->getType().getQualifiers(),
1955                                /*IsInitializer=*/true);
1956       CtorCGF.FinishFunction();
1957       Ctor = Fn;
1958       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1959       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1960     } else {
1961       Ctor = new llvm::GlobalVariable(
1962           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1963           llvm::GlobalValue::PrivateLinkage,
1964           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1965       ID = Ctor;
1966     }
1967 
1968     // Register the information for the entry associated with the constructor.
1969     Out.clear();
1970     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1971         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1972         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1973   }
1974   if (VD->getType().isDestructedType() != QualType::DK_none) {
1975     llvm::Constant *Dtor;
1976     llvm::Constant *ID;
1977     if (CGM.getLangOpts().OpenMPIsDevice) {
1978       // Generate function that emits destructor call for the threadprivate
1979       // copy of the variable VD
1980       CodeGenFunction DtorCGF(CGM);
1981 
1982       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1983       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1984       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1985           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1986       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1987       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1988                             FunctionArgList(), Loc, Loc);
1989       // Create a scope with an artificial location for the body of this
1990       // function.
1991       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1992       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1993                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1994                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1995       DtorCGF.FinishFunction();
1996       Dtor = Fn;
1997       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1998       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1999     } else {
2000       Dtor = new llvm::GlobalVariable(
2001           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2002           llvm::GlobalValue::PrivateLinkage,
2003           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2004       ID = Dtor;
2005     }
2006     // Register the information for the entry associated with the destructor.
2007     Out.clear();
2008     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2009         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2010         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2011   }
2012   return CGM.getLangOpts().OpenMPIsDevice;
2013 }
2014 
2015 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2016                                                           QualType VarType,
2017                                                           StringRef Name) {
2018   std::string Suffix = getName({"artificial", ""});
2019   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2020   llvm::GlobalVariable *GAddr =
2021       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2022   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2023       CGM.getTarget().isTLSSupported()) {
2024     GAddr->setThreadLocal(/*Val=*/true);
2025     return Address(GAddr, GAddr->getValueType(),
2026                    CGM.getContext().getTypeAlignInChars(VarType));
2027   }
2028   std::string CacheSuffix = getName({"cache", ""});
2029   llvm::Value *Args[] = {
2030       emitUpdateLocation(CGF, SourceLocation()),
2031       getThreadID(CGF, SourceLocation()),
2032       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2033       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2034                                 /*isSigned=*/false),
2035       getOrCreateInternalVariable(
2036           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2037   return Address(
2038       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2039           CGF.EmitRuntimeCall(
2040               OMPBuilder.getOrCreateRuntimeFunction(
2041                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2042               Args),
2043           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2044       CGM.getContext().getTypeAlignInChars(VarType));
2045 }
2046 
2047 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2048                                    const RegionCodeGenTy &ThenGen,
2049                                    const RegionCodeGenTy &ElseGen) {
2050   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2051 
2052   // If the condition constant folds and can be elided, try to avoid emitting
2053   // the condition and the dead arm of the if/else.
2054   bool CondConstant;
2055   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2056     if (CondConstant)
2057       ThenGen(CGF);
2058     else
2059       ElseGen(CGF);
2060     return;
2061   }
2062 
2063   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2064   // emit the conditional branch.
2065   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2066   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2067   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2068   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2069 
2070   // Emit the 'then' code.
2071   CGF.EmitBlock(ThenBlock);
2072   ThenGen(CGF);
2073   CGF.EmitBranch(ContBlock);
2074   // Emit the 'else' code if present.
2075   // There is no need to emit line number for unconditional branch.
2076   (void)ApplyDebugLocation::CreateEmpty(CGF);
2077   CGF.EmitBlock(ElseBlock);
2078   ElseGen(CGF);
2079   // There is no need to emit line number for unconditional branch.
2080   (void)ApplyDebugLocation::CreateEmpty(CGF);
2081   CGF.EmitBranch(ContBlock);
2082   // Emit the continuation block for code after the if.
2083   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2084 }
2085 
2086 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2087                                        llvm::Function *OutlinedFn,
2088                                        ArrayRef<llvm::Value *> CapturedVars,
2089                                        const Expr *IfCond,
2090                                        llvm::Value *NumThreads) {
2091   if (!CGF.HaveInsertPoint())
2092     return;
2093   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2094   auto &M = CGM.getModule();
2095   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2096                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2097     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2098     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2099     llvm::Value *Args[] = {
2100         RTLoc,
2101         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2102         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2103     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2104     RealArgs.append(std::begin(Args), std::end(Args));
2105     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2106 
2107     llvm::FunctionCallee RTLFn =
2108         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2109     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2110   };
2111   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2112                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2113     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2114     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2115     // Build calls:
2116     // __kmpc_serialized_parallel(&Loc, GTid);
2117     llvm::Value *Args[] = {RTLoc, ThreadID};
2118     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2119                             M, OMPRTL___kmpc_serialized_parallel),
2120                         Args);
2121 
2122     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2123     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2124     Address ZeroAddrBound =
2125         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2126                                          /*Name=*/".bound.zero.addr");
2127     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2128     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2129     // ThreadId for serialized parallels is 0.
2130     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2131     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2132     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2133 
2134     // Ensure we do not inline the function. This is trivially true for the ones
2135     // passed to __kmpc_fork_call but the ones called in serialized regions
2136     // could be inlined. This is not a perfect but it is closer to the invariant
2137     // we want, namely, every data environment starts with a new function.
2138     // TODO: We should pass the if condition to the runtime function and do the
2139     //       handling there. Much cleaner code.
2140     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2141     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2142     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2143 
2144     // __kmpc_end_serialized_parallel(&Loc, GTid);
2145     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2146     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2147                             M, OMPRTL___kmpc_end_serialized_parallel),
2148                         EndArgs);
2149   };
2150   if (IfCond) {
2151     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2152   } else {
2153     RegionCodeGenTy ThenRCG(ThenGen);
2154     ThenRCG(CGF);
2155   }
2156 }
2157 
2158 // If we're inside an (outlined) parallel region, use the region info's
2159 // thread-ID variable (it is passed in a first argument of the outlined function
2160 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2161 // regular serial code region, get thread ID by calling kmp_int32
2162 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2163 // return the address of that temp.
2164 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2165                                              SourceLocation Loc) {
2166   if (auto *OMPRegionInfo =
2167           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2168     if (OMPRegionInfo->getThreadIDVariable())
2169       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2170 
2171   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2172   QualType Int32Ty =
2173       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2174   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2175   CGF.EmitStoreOfScalar(ThreadID,
2176                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2177 
2178   return ThreadIDTemp;
2179 }
2180 
2181 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2182     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2183   SmallString<256> Buffer;
2184   llvm::raw_svector_ostream Out(Buffer);
2185   Out << Name;
2186   StringRef RuntimeName = Out.str();
2187   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2188   if (Elem.second) {
2189     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2190            "OMP internal variable has different type than requested");
2191     return &*Elem.second;
2192   }
2193 
2194   return Elem.second = new llvm::GlobalVariable(
2195              CGM.getModule(), Ty, /*IsConstant*/ false,
2196              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2197              Elem.first(), /*InsertBefore=*/nullptr,
2198              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2199 }
2200 
2201 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2202   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2203   std::string Name = getName({Prefix, "var"});
2204   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2205 }
2206 
2207 namespace {
2208 /// Common pre(post)-action for different OpenMP constructs.
2209 class CommonActionTy final : public PrePostActionTy {
2210   llvm::FunctionCallee EnterCallee;
2211   ArrayRef<llvm::Value *> EnterArgs;
2212   llvm::FunctionCallee ExitCallee;
2213   ArrayRef<llvm::Value *> ExitArgs;
2214   bool Conditional;
2215   llvm::BasicBlock *ContBlock = nullptr;
2216 
2217 public:
2218   CommonActionTy(llvm::FunctionCallee EnterCallee,
2219                  ArrayRef<llvm::Value *> EnterArgs,
2220                  llvm::FunctionCallee ExitCallee,
2221                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2222       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2223         ExitArgs(ExitArgs), Conditional(Conditional) {}
2224   void Enter(CodeGenFunction &CGF) override {
2225     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2226     if (Conditional) {
2227       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2228       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2229       ContBlock = CGF.createBasicBlock("omp_if.end");
2230       // Generate the branch (If-stmt)
2231       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2232       CGF.EmitBlock(ThenBlock);
2233     }
2234   }
2235   void Done(CodeGenFunction &CGF) {
2236     // Emit the rest of blocks/branches
2237     CGF.EmitBranch(ContBlock);
2238     CGF.EmitBlock(ContBlock, true);
2239   }
2240   void Exit(CodeGenFunction &CGF) override {
2241     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2242   }
2243 };
2244 } // anonymous namespace
2245 
2246 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2247                                          StringRef CriticalName,
2248                                          const RegionCodeGenTy &CriticalOpGen,
2249                                          SourceLocation Loc, const Expr *Hint) {
2250   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2251   // CriticalOpGen();
2252   // __kmpc_end_critical(ident_t *, gtid, Lock);
2253   // Prepare arguments and build a call to __kmpc_critical
2254   if (!CGF.HaveInsertPoint())
2255     return;
2256   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2257                          getCriticalRegionLock(CriticalName)};
2258   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2259                                                 std::end(Args));
2260   if (Hint) {
2261     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2262         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2263   }
2264   CommonActionTy Action(
2265       OMPBuilder.getOrCreateRuntimeFunction(
2266           CGM.getModule(),
2267           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2268       EnterArgs,
2269       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2270                                             OMPRTL___kmpc_end_critical),
2271       Args);
2272   CriticalOpGen.setAction(Action);
2273   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2274 }
2275 
2276 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2277                                        const RegionCodeGenTy &MasterOpGen,
2278                                        SourceLocation Loc) {
2279   if (!CGF.HaveInsertPoint())
2280     return;
2281   // if(__kmpc_master(ident_t *, gtid)) {
2282   //   MasterOpGen();
2283   //   __kmpc_end_master(ident_t *, gtid);
2284   // }
2285   // Prepare arguments and build a call to __kmpc_master
2286   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2287   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_master),
2289                         Args,
2290                         OMPBuilder.getOrCreateRuntimeFunction(
2291                             CGM.getModule(), OMPRTL___kmpc_end_master),
2292                         Args,
2293                         /*Conditional=*/true);
2294   MasterOpGen.setAction(Action);
2295   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2296   Action.Done(CGF);
2297 }
2298 
2299 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2300                                        const RegionCodeGenTy &MaskedOpGen,
2301                                        SourceLocation Loc, const Expr *Filter) {
2302   if (!CGF.HaveInsertPoint())
2303     return;
2304   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2305   //   MaskedOpGen();
2306   //   __kmpc_end_masked(iden_t *, gtid);
2307   // }
2308   // Prepare arguments and build a call to __kmpc_masked
2309   llvm::Value *FilterVal = Filter
2310                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2311                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2312   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2313                          FilterVal};
2314   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2315                             getThreadID(CGF, Loc)};
2316   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2317                             CGM.getModule(), OMPRTL___kmpc_masked),
2318                         Args,
2319                         OMPBuilder.getOrCreateRuntimeFunction(
2320                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2321                         ArgsEnd,
2322                         /*Conditional=*/true);
2323   MaskedOpGen.setAction(Action);
2324   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2325   Action.Done(CGF);
2326 }
2327 
2328 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2329                                         SourceLocation Loc) {
2330   if (!CGF.HaveInsertPoint())
2331     return;
2332   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2333     OMPBuilder.createTaskyield(CGF.Builder);
2334   } else {
2335     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2336     llvm::Value *Args[] = {
2337         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2338         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2339     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2340                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2341                         Args);
2342   }
2343 
2344   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2345     Region->emitUntiedSwitch(CGF);
2346 }
2347 
2348 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2349                                           const RegionCodeGenTy &TaskgroupOpGen,
2350                                           SourceLocation Loc) {
2351   if (!CGF.HaveInsertPoint())
2352     return;
2353   // __kmpc_taskgroup(ident_t *, gtid);
2354   // TaskgroupOpGen();
2355   // __kmpc_end_taskgroup(ident_t *, gtid);
2356   // Prepare arguments and build a call to __kmpc_taskgroup
2357   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2358   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2359                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2360                         Args,
2361                         OMPBuilder.getOrCreateRuntimeFunction(
2362                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2363                         Args);
2364   TaskgroupOpGen.setAction(Action);
2365   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2366 }
2367 
2368 /// Given an array of pointers to variables, project the address of a
2369 /// given variable.
2370 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2371                                       unsigned Index, const VarDecl *Var) {
2372   // Pull out the pointer to the variable.
2373   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2374   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2375 
2376   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2377   Addr = CGF.Builder.CreateElementBitCast(
2378       Addr, CGF.ConvertTypeForMem(Var->getType()));
2379   return Addr;
2380 }
2381 
2382 static llvm::Value *emitCopyprivateCopyFunction(
2383     CodeGenModule &CGM, llvm::Type *ArgsType,
2384     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2385     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2386     SourceLocation Loc) {
2387   ASTContext &C = CGM.getContext();
2388   // void copy_func(void *LHSArg, void *RHSArg);
2389   FunctionArgList Args;
2390   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2391                            ImplicitParamDecl::Other);
2392   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2393                            ImplicitParamDecl::Other);
2394   Args.push_back(&LHSArg);
2395   Args.push_back(&RHSArg);
2396   const auto &CGFI =
2397       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2398   std::string Name =
2399       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2400   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2401                                     llvm::GlobalValue::InternalLinkage, Name,
2402                                     &CGM.getModule());
2403   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2404   Fn->setDoesNotRecurse();
2405   CodeGenFunction CGF(CGM);
2406   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2407   // Dest = (void*[n])(LHSArg);
2408   // Src = (void*[n])(RHSArg);
2409   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2410       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2411       ArgsType), CGF.getPointerAlign());
2412   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2413       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2414       ArgsType), CGF.getPointerAlign());
2415   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2416   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2417   // ...
2418   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2419   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2420     const auto *DestVar =
2421         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2422     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2423 
2424     const auto *SrcVar =
2425         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2426     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2427 
2428     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2429     QualType Type = VD->getType();
2430     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2431   }
2432   CGF.FinishFunction();
2433   return Fn;
2434 }
2435 
2436 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2437                                        const RegionCodeGenTy &SingleOpGen,
2438                                        SourceLocation Loc,
2439                                        ArrayRef<const Expr *> CopyprivateVars,
2440                                        ArrayRef<const Expr *> SrcExprs,
2441                                        ArrayRef<const Expr *> DstExprs,
2442                                        ArrayRef<const Expr *> AssignmentOps) {
2443   if (!CGF.HaveInsertPoint())
2444     return;
2445   assert(CopyprivateVars.size() == SrcExprs.size() &&
2446          CopyprivateVars.size() == DstExprs.size() &&
2447          CopyprivateVars.size() == AssignmentOps.size());
2448   ASTContext &C = CGM.getContext();
2449   // int32 did_it = 0;
2450   // if(__kmpc_single(ident_t *, gtid)) {
2451   //   SingleOpGen();
2452   //   __kmpc_end_single(ident_t *, gtid);
2453   //   did_it = 1;
2454   // }
2455   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2456   // <copy_func>, did_it);
2457 
2458   Address DidIt = Address::invalid();
2459   if (!CopyprivateVars.empty()) {
2460     // int32 did_it = 0;
2461     QualType KmpInt32Ty =
2462         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2463     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2464     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2465   }
2466   // Prepare arguments and build a call to __kmpc_single
2467   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2468   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2469                             CGM.getModule(), OMPRTL___kmpc_single),
2470                         Args,
2471                         OMPBuilder.getOrCreateRuntimeFunction(
2472                             CGM.getModule(), OMPRTL___kmpc_end_single),
2473                         Args,
2474                         /*Conditional=*/true);
2475   SingleOpGen.setAction(Action);
2476   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2477   if (DidIt.isValid()) {
2478     // did_it = 1;
2479     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2480   }
2481   Action.Done(CGF);
2482   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2483   // <copy_func>, did_it);
2484   if (DidIt.isValid()) {
2485     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2486     QualType CopyprivateArrayTy = C.getConstantArrayType(
2487         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2488         /*IndexTypeQuals=*/0);
2489     // Create a list of all private variables for copyprivate.
2490     Address CopyprivateList =
2491         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2492     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2493       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2494       CGF.Builder.CreateStore(
2495           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2496               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2497               CGF.VoidPtrTy),
2498           Elem);
2499     }
2500     // Build function that copies private values from single region to all other
2501     // threads in the corresponding parallel region.
2502     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2503         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2504         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2505     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2506     Address CL =
2507       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2508                                                       CGF.VoidPtrTy);
2509     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2510     llvm::Value *Args[] = {
2511         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2512         getThreadID(CGF, Loc),        // i32 <gtid>
2513         BufSize,                      // size_t <buf_size>
2514         CL.getPointer(),              // void *<copyprivate list>
2515         CpyFn,                        // void (*) (void *, void *) <copy_func>
2516         DidItVal                      // i32 did_it
2517     };
2518     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2519                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2520                         Args);
2521   }
2522 }
2523 
2524 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2525                                         const RegionCodeGenTy &OrderedOpGen,
2526                                         SourceLocation Loc, bool IsThreads) {
2527   if (!CGF.HaveInsertPoint())
2528     return;
2529   // __kmpc_ordered(ident_t *, gtid);
2530   // OrderedOpGen();
2531   // __kmpc_end_ordered(ident_t *, gtid);
2532   // Prepare arguments and build a call to __kmpc_ordered
2533   if (IsThreads) {
2534     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2535     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2536                               CGM.getModule(), OMPRTL___kmpc_ordered),
2537                           Args,
2538                           OMPBuilder.getOrCreateRuntimeFunction(
2539                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2540                           Args);
2541     OrderedOpGen.setAction(Action);
2542     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543     return;
2544   }
2545   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2546 }
2547 
2548 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2549   unsigned Flags;
2550   if (Kind == OMPD_for)
2551     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2552   else if (Kind == OMPD_sections)
2553     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2554   else if (Kind == OMPD_single)
2555     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2556   else if (Kind == OMPD_barrier)
2557     Flags = OMP_IDENT_BARRIER_EXPL;
2558   else
2559     Flags = OMP_IDENT_BARRIER_IMPL;
2560   return Flags;
2561 }
2562 
2563 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2564     CodeGenFunction &CGF, const OMPLoopDirective &S,
2565     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2566   // Check if the loop directive is actually a doacross loop directive. In this
2567   // case choose static, 1 schedule.
2568   if (llvm::any_of(
2569           S.getClausesOfKind<OMPOrderedClause>(),
2570           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2571     ScheduleKind = OMPC_SCHEDULE_static;
2572     // Chunk size is 1 in this case.
2573     llvm::APInt ChunkSize(32, 1);
2574     ChunkExpr = IntegerLiteral::Create(
2575         CGF.getContext(), ChunkSize,
2576         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2577         SourceLocation());
2578   }
2579 }
2580 
2581 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2582                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2583                                       bool ForceSimpleCall) {
2584   // Check if we should use the OMPBuilder
2585   auto *OMPRegionInfo =
2586       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2587   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2588     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2589         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2590     return;
2591   }
2592 
2593   if (!CGF.HaveInsertPoint())
2594     return;
2595   // Build call __kmpc_cancel_barrier(loc, thread_id);
2596   // Build call __kmpc_barrier(loc, thread_id);
2597   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2598   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2599   // thread_id);
2600   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2601                          getThreadID(CGF, Loc)};
2602   if (OMPRegionInfo) {
2603     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2604       llvm::Value *Result = CGF.EmitRuntimeCall(
2605           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2606                                                 OMPRTL___kmpc_cancel_barrier),
2607           Args);
2608       if (EmitChecks) {
2609         // if (__kmpc_cancel_barrier()) {
2610         //   exit from construct;
2611         // }
2612         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2613         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2614         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2615         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2616         CGF.EmitBlock(ExitBB);
2617         //   exit from construct;
2618         CodeGenFunction::JumpDest CancelDestination =
2619             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2620         CGF.EmitBranchThroughCleanup(CancelDestination);
2621         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2622       }
2623       return;
2624     }
2625   }
2626   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2627                           CGM.getModule(), OMPRTL___kmpc_barrier),
2628                       Args);
2629 }
2630 
2631 /// Map the OpenMP loop schedule to the runtime enumeration.
2632 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2633                                           bool Chunked, bool Ordered) {
2634   switch (ScheduleKind) {
2635   case OMPC_SCHEDULE_static:
2636     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2637                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2638   case OMPC_SCHEDULE_dynamic:
2639     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2640   case OMPC_SCHEDULE_guided:
2641     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2642   case OMPC_SCHEDULE_runtime:
2643     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2644   case OMPC_SCHEDULE_auto:
2645     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2646   case OMPC_SCHEDULE_unknown:
2647     assert(!Chunked && "chunk was specified but schedule kind not known");
2648     return Ordered ? OMP_ord_static : OMP_sch_static;
2649   }
2650   llvm_unreachable("Unexpected runtime schedule");
2651 }
2652 
2653 /// Map the OpenMP distribute schedule to the runtime enumeration.
2654 static OpenMPSchedType
2655 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2656   // only static is allowed for dist_schedule
2657   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2658 }
2659 
2660 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2661                                          bool Chunked) const {
2662   OpenMPSchedType Schedule =
2663       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2664   return Schedule == OMP_sch_static;
2665 }
2666 
2667 bool CGOpenMPRuntime::isStaticNonchunked(
2668     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2669   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2670   return Schedule == OMP_dist_sch_static;
2671 }
2672 
2673 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2674                                       bool Chunked) const {
2675   OpenMPSchedType Schedule =
2676       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2677   return Schedule == OMP_sch_static_chunked;
2678 }
2679 
2680 bool CGOpenMPRuntime::isStaticChunked(
2681     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2682   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2683   return Schedule == OMP_dist_sch_static_chunked;
2684 }
2685 
2686 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2687   OpenMPSchedType Schedule =
2688       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2689   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2690   return Schedule != OMP_sch_static;
2691 }
2692 
2693 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2694                                   OpenMPScheduleClauseModifier M1,
2695                                   OpenMPScheduleClauseModifier M2) {
2696   int Modifier = 0;
2697   switch (M1) {
2698   case OMPC_SCHEDULE_MODIFIER_monotonic:
2699     Modifier = OMP_sch_modifier_monotonic;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2702     Modifier = OMP_sch_modifier_nonmonotonic;
2703     break;
2704   case OMPC_SCHEDULE_MODIFIER_simd:
2705     if (Schedule == OMP_sch_static_chunked)
2706       Schedule = OMP_sch_static_balanced_chunked;
2707     break;
2708   case OMPC_SCHEDULE_MODIFIER_last:
2709   case OMPC_SCHEDULE_MODIFIER_unknown:
2710     break;
2711   }
2712   switch (M2) {
2713   case OMPC_SCHEDULE_MODIFIER_monotonic:
2714     Modifier = OMP_sch_modifier_monotonic;
2715     break;
2716   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2717     Modifier = OMP_sch_modifier_nonmonotonic;
2718     break;
2719   case OMPC_SCHEDULE_MODIFIER_simd:
2720     if (Schedule == OMP_sch_static_chunked)
2721       Schedule = OMP_sch_static_balanced_chunked;
2722     break;
2723   case OMPC_SCHEDULE_MODIFIER_last:
2724   case OMPC_SCHEDULE_MODIFIER_unknown:
2725     break;
2726   }
2727   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2728   // If the static schedule kind is specified or if the ordered clause is
2729   // specified, and if the nonmonotonic modifier is not specified, the effect is
2730   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2731   // modifier is specified, the effect is as if the nonmonotonic modifier is
2732   // specified.
2733   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2734     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2735           Schedule == OMP_sch_static_balanced_chunked ||
2736           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2737           Schedule == OMP_dist_sch_static_chunked ||
2738           Schedule == OMP_dist_sch_static))
2739       Modifier = OMP_sch_modifier_nonmonotonic;
2740   }
2741   return Schedule | Modifier;
2742 }
2743 
2744 void CGOpenMPRuntime::emitForDispatchInit(
2745     CodeGenFunction &CGF, SourceLocation Loc,
2746     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2747     bool Ordered, const DispatchRTInput &DispatchValues) {
2748   if (!CGF.HaveInsertPoint())
2749     return;
2750   OpenMPSchedType Schedule = getRuntimeSchedule(
2751       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2752   assert(Ordered ||
2753          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2754           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2755           Schedule != OMP_sch_static_balanced_chunked));
2756   // Call __kmpc_dispatch_init(
2757   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2758   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2759   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2760 
2761   // If the Chunk was not specified in the clause - use default value 1.
2762   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2763                                             : CGF.Builder.getIntN(IVSize, 1);
2764   llvm::Value *Args[] = {
2765       emitUpdateLocation(CGF, Loc),
2766       getThreadID(CGF, Loc),
2767       CGF.Builder.getInt32(addMonoNonMonoModifier(
2768           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2769       DispatchValues.LB,                                     // Lower
2770       DispatchValues.UB,                                     // Upper
2771       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2772       Chunk                                                  // Chunk
2773   };
2774   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2775 }
2776 
2777 static void emitForStaticInitCall(
2778     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2779     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2780     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2781     const CGOpenMPRuntime::StaticRTInput &Values) {
2782   if (!CGF.HaveInsertPoint())
2783     return;
2784 
2785   assert(!Values.Ordered);
2786   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2787          Schedule == OMP_sch_static_balanced_chunked ||
2788          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2789          Schedule == OMP_dist_sch_static ||
2790          Schedule == OMP_dist_sch_static_chunked);
2791 
2792   // Call __kmpc_for_static_init(
2793   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2794   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2795   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2796   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2797   llvm::Value *Chunk = Values.Chunk;
2798   if (Chunk == nullptr) {
2799     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2800             Schedule == OMP_dist_sch_static) &&
2801            "expected static non-chunked schedule");
2802     // If the Chunk was not specified in the clause - use default value 1.
2803     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2804   } else {
2805     assert((Schedule == OMP_sch_static_chunked ||
2806             Schedule == OMP_sch_static_balanced_chunked ||
2807             Schedule == OMP_ord_static_chunked ||
2808             Schedule == OMP_dist_sch_static_chunked) &&
2809            "expected static chunked schedule");
2810   }
2811   llvm::Value *Args[] = {
2812       UpdateLocation,
2813       ThreadId,
2814       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2815                                                   M2)), // Schedule type
2816       Values.IL.getPointer(),                           // &isLastIter
2817       Values.LB.getPointer(),                           // &LB
2818       Values.UB.getPointer(),                           // &UB
2819       Values.ST.getPointer(),                           // &Stride
2820       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2821       Chunk                                             // Chunk
2822   };
2823   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2824 }
2825 
2826 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2827                                         SourceLocation Loc,
2828                                         OpenMPDirectiveKind DKind,
2829                                         const OpenMPScheduleTy &ScheduleKind,
2830                                         const StaticRTInput &Values) {
2831   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2832       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2833   assert(isOpenMPWorksharingDirective(DKind) &&
2834          "Expected loop-based or sections-based directive.");
2835   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2836                                              isOpenMPLoopDirective(DKind)
2837                                                  ? OMP_IDENT_WORK_LOOP
2838                                                  : OMP_IDENT_WORK_SECTIONS);
2839   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2840   llvm::FunctionCallee StaticInitFunction =
2841       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2842   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2843   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2844                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2845 }
2846 
2847 void CGOpenMPRuntime::emitDistributeStaticInit(
2848     CodeGenFunction &CGF, SourceLocation Loc,
2849     OpenMPDistScheduleClauseKind SchedKind,
2850     const CGOpenMPRuntime::StaticRTInput &Values) {
2851   OpenMPSchedType ScheduleNum =
2852       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2853   llvm::Value *UpdatedLocation =
2854       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2855   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2856   llvm::FunctionCallee StaticInitFunction;
2857   bool isGPUDistribute =
2858       CGM.getLangOpts().OpenMPIsDevice &&
2859       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2860   StaticInitFunction = createForStaticInitFunction(
2861       Values.IVSize, Values.IVSigned, isGPUDistribute);
2862 
2863   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2864                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2865                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2866 }
2867 
2868 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2869                                           SourceLocation Loc,
2870                                           OpenMPDirectiveKind DKind) {
2871   if (!CGF.HaveInsertPoint())
2872     return;
2873   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2874   llvm::Value *Args[] = {
2875       emitUpdateLocation(CGF, Loc,
2876                          isOpenMPDistributeDirective(DKind)
2877                              ? OMP_IDENT_WORK_DISTRIBUTE
2878                              : isOpenMPLoopDirective(DKind)
2879                                    ? OMP_IDENT_WORK_LOOP
2880                                    : OMP_IDENT_WORK_SECTIONS),
2881       getThreadID(CGF, Loc)};
2882   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2883   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2884       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2885     CGF.EmitRuntimeCall(
2886         OMPBuilder.getOrCreateRuntimeFunction(
2887             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2888         Args);
2889   else
2890     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2891                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2892                         Args);
2893 }
2894 
2895 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2896                                                  SourceLocation Loc,
2897                                                  unsigned IVSize,
2898                                                  bool IVSigned) {
2899   if (!CGF.HaveInsertPoint())
2900     return;
2901   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2902   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2903   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2904 }
2905 
2906 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2907                                           SourceLocation Loc, unsigned IVSize,
2908                                           bool IVSigned, Address IL,
2909                                           Address LB, Address UB,
2910                                           Address ST) {
2911   // Call __kmpc_dispatch_next(
2912   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2913   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2914   //          kmp_int[32|64] *p_stride);
2915   llvm::Value *Args[] = {
2916       emitUpdateLocation(CGF, Loc),
2917       getThreadID(CGF, Loc),
2918       IL.getPointer(), // &isLastIter
2919       LB.getPointer(), // &Lower
2920       UB.getPointer(), // &Upper
2921       ST.getPointer()  // &Stride
2922   };
2923   llvm::Value *Call =
2924       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2925   return CGF.EmitScalarConversion(
2926       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2927       CGF.getContext().BoolTy, Loc);
2928 }
2929 
2930 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2931                                            llvm::Value *NumThreads,
2932                                            SourceLocation Loc) {
2933   if (!CGF.HaveInsertPoint())
2934     return;
2935   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2936   llvm::Value *Args[] = {
2937       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2938       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2939   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2940                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2941                       Args);
2942 }
2943 
2944 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2945                                          ProcBindKind ProcBind,
2946                                          SourceLocation Loc) {
2947   if (!CGF.HaveInsertPoint())
2948     return;
2949   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2950   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2951   llvm::Value *Args[] = {
2952       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2953       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2954   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2955                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2956                       Args);
2957 }
2958 
2959 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2960                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2961   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2962     OMPBuilder.createFlush(CGF.Builder);
2963   } else {
2964     if (!CGF.HaveInsertPoint())
2965       return;
2966     // Build call void __kmpc_flush(ident_t *loc)
2967     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2968                             CGM.getModule(), OMPRTL___kmpc_flush),
2969                         emitUpdateLocation(CGF, Loc));
2970   }
2971 }
2972 
2973 namespace {
2974 /// Indexes of fields for type kmp_task_t.
2975 enum KmpTaskTFields {
2976   /// List of shared variables.
2977   KmpTaskTShareds,
2978   /// Task routine.
2979   KmpTaskTRoutine,
2980   /// Partition id for the untied tasks.
2981   KmpTaskTPartId,
2982   /// Function with call of destructors for private variables.
2983   Data1,
2984   /// Task priority.
2985   Data2,
2986   /// (Taskloops only) Lower bound.
2987   KmpTaskTLowerBound,
2988   /// (Taskloops only) Upper bound.
2989   KmpTaskTUpperBound,
2990   /// (Taskloops only) Stride.
2991   KmpTaskTStride,
2992   /// (Taskloops only) Is last iteration flag.
2993   KmpTaskTLastIter,
2994   /// (Taskloops only) Reduction data.
2995   KmpTaskTReductions,
2996 };
2997 } // anonymous namespace
2998 
2999 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3000   return OffloadEntriesTargetRegion.empty() &&
3001          OffloadEntriesDeviceGlobalVar.empty();
3002 }
3003 
3004 /// Initialize target region entry.
3005 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3006     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3007                                     StringRef ParentName, unsigned LineNum,
3008                                     unsigned Order) {
3009   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3010                                              "only required for the device "
3011                                              "code generation.");
3012   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3013       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3014                                    OMPTargetRegionEntryTargetRegion);
3015   ++OffloadingEntriesNum;
3016 }
3017 
3018 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3019     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3020                                   StringRef ParentName, unsigned LineNum,
3021                                   llvm::Constant *Addr, llvm::Constant *ID,
3022                                   OMPTargetRegionEntryKind Flags) {
3023   // If we are emitting code for a target, the entry is already initialized,
3024   // only has to be registered.
3025   if (CGM.getLangOpts().OpenMPIsDevice) {
3026     // This could happen if the device compilation is invoked standalone.
3027     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3028       return;
3029     auto &Entry =
3030         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3031     Entry.setAddress(Addr);
3032     Entry.setID(ID);
3033     Entry.setFlags(Flags);
3034   } else {
3035     if (Flags ==
3036             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3037         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3038                                  /*IgnoreAddressId*/ true))
3039       return;
3040     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3041            "Target region entry already registered!");
3042     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3043     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3044     ++OffloadingEntriesNum;
3045   }
3046 }
3047 
3048 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3049     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3050     bool IgnoreAddressId) const {
3051   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3052   if (PerDevice == OffloadEntriesTargetRegion.end())
3053     return false;
3054   auto PerFile = PerDevice->second.find(FileID);
3055   if (PerFile == PerDevice->second.end())
3056     return false;
3057   auto PerParentName = PerFile->second.find(ParentName);
3058   if (PerParentName == PerFile->second.end())
3059     return false;
3060   auto PerLine = PerParentName->second.find(LineNum);
3061   if (PerLine == PerParentName->second.end())
3062     return false;
3063   // Fail if this entry is already registered.
3064   if (!IgnoreAddressId &&
3065       (PerLine->second.getAddress() || PerLine->second.getID()))
3066     return false;
3067   return true;
3068 }
3069 
3070 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3071     const OffloadTargetRegionEntryInfoActTy &Action) {
3072   // Scan all target region entries and perform the provided action.
3073   for (const auto &D : OffloadEntriesTargetRegion)
3074     for (const auto &F : D.second)
3075       for (const auto &P : F.second)
3076         for (const auto &L : P.second)
3077           Action(D.first, F.first, P.first(), L.first, L.second);
3078 }
3079 
3080 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3081     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3082                                        OMPTargetGlobalVarEntryKind Flags,
3083                                        unsigned Order) {
3084   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3085                                              "only required for the device "
3086                                              "code generation.");
3087   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3088   ++OffloadingEntriesNum;
3089 }
3090 
3091 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3092     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3093                                      CharUnits VarSize,
3094                                      OMPTargetGlobalVarEntryKind Flags,
3095                                      llvm::GlobalValue::LinkageTypes Linkage) {
3096   if (CGM.getLangOpts().OpenMPIsDevice) {
3097     // This could happen if the device compilation is invoked standalone.
3098     if (!hasDeviceGlobalVarEntryInfo(VarName))
3099       return;
3100     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3101     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3102       if (Entry.getVarSize().isZero()) {
3103         Entry.setVarSize(VarSize);
3104         Entry.setLinkage(Linkage);
3105       }
3106       return;
3107     }
3108     Entry.setVarSize(VarSize);
3109     Entry.setLinkage(Linkage);
3110     Entry.setAddress(Addr);
3111   } else {
3112     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3113       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3114       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3115              "Entry not initialized!");
3116       if (Entry.getVarSize().isZero()) {
3117         Entry.setVarSize(VarSize);
3118         Entry.setLinkage(Linkage);
3119       }
3120       return;
3121     }
3122     OffloadEntriesDeviceGlobalVar.try_emplace(
3123         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3124     ++OffloadingEntriesNum;
3125   }
3126 }
3127 
3128 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3129     actOnDeviceGlobalVarEntriesInfo(
3130         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3131   // Scan all target region entries and perform the provided action.
3132   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3133     Action(E.getKey(), E.getValue());
3134 }
3135 
3136 void CGOpenMPRuntime::createOffloadEntry(
3137     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3138     llvm::GlobalValue::LinkageTypes Linkage) {
3139   StringRef Name = Addr->getName();
3140   llvm::Module &M = CGM.getModule();
3141   llvm::LLVMContext &C = M.getContext();
3142 
3143   // Create constant string with the name.
3144   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3145 
3146   std::string StringName = getName({"omp_offloading", "entry_name"});
3147   auto *Str = new llvm::GlobalVariable(
3148       M, StrPtrInit->getType(), /*isConstant=*/true,
3149       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3150   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3151 
3152   llvm::Constant *Data[] = {
3153       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3154       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3155       llvm::ConstantInt::get(CGM.SizeTy, Size),
3156       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3157       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3158   std::string EntryName = getName({"omp_offloading", "entry", ""});
3159   llvm::GlobalVariable *Entry = createGlobalStruct(
3160       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3161       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3162 
3163   // The entry has to be created in the section the linker expects it to be.
3164   Entry->setSection("omp_offloading_entries");
3165 }
3166 
3167 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3168   // Emit the offloading entries and metadata so that the device codegen side
3169   // can easily figure out what to emit. The produced metadata looks like
3170   // this:
3171   //
3172   // !omp_offload.info = !{!1, ...}
3173   //
3174   // Right now we only generate metadata for function that contain target
3175   // regions.
3176 
3177   // If we are in simd mode or there are no entries, we don't need to do
3178   // anything.
3179   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3180     return;
3181 
3182   llvm::Module &M = CGM.getModule();
3183   llvm::LLVMContext &C = M.getContext();
3184   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3185                          SourceLocation, StringRef>,
3186               16>
3187       OrderedEntries(OffloadEntriesInfoManager.size());
3188   llvm::SmallVector<StringRef, 16> ParentFunctions(
3189       OffloadEntriesInfoManager.size());
3190 
3191   // Auxiliary methods to create metadata values and strings.
3192   auto &&GetMDInt = [this](unsigned V) {
3193     return llvm::ConstantAsMetadata::get(
3194         llvm::ConstantInt::get(CGM.Int32Ty, V));
3195   };
3196 
3197   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3198 
3199   // Create the offloading info metadata node.
3200   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3201 
3202   // Create function that emits metadata for each target region entry;
3203   auto &&TargetRegionMetadataEmitter =
3204       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3205        &GetMDString](
3206           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3207           unsigned Line,
3208           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3209         // Generate metadata for target regions. Each entry of this metadata
3210         // contains:
3211         // - Entry 0 -> Kind of this type of metadata (0).
3212         // - Entry 1 -> Device ID of the file where the entry was identified.
3213         // - Entry 2 -> File ID of the file where the entry was identified.
3214         // - Entry 3 -> Mangled name of the function where the entry was
3215         // identified.
3216         // - Entry 4 -> Line in the file where the entry was identified.
3217         // - Entry 5 -> Order the entry was created.
3218         // The first element of the metadata node is the kind.
3219         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3220                                  GetMDInt(FileID),      GetMDString(ParentName),
3221                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3222 
3223         SourceLocation Loc;
3224         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3225                   E = CGM.getContext().getSourceManager().fileinfo_end();
3226              I != E; ++I) {
3227           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3228               I->getFirst()->getUniqueID().getFile() == FileID) {
3229             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3230                 I->getFirst(), Line, 1);
3231             break;
3232           }
3233         }
3234         // Save this entry in the right position of the ordered entries array.
3235         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3236         ParentFunctions[E.getOrder()] = ParentName;
3237 
3238         // Add metadata to the named metadata node.
3239         MD->addOperand(llvm::MDNode::get(C, Ops));
3240       };
3241 
3242   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3243       TargetRegionMetadataEmitter);
3244 
3245   // Create function that emits metadata for each device global variable entry;
3246   auto &&DeviceGlobalVarMetadataEmitter =
3247       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3248        MD](StringRef MangledName,
3249            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3250                &E) {
3251         // Generate metadata for global variables. Each entry of this metadata
3252         // contains:
3253         // - Entry 0 -> Kind of this type of metadata (1).
3254         // - Entry 1 -> Mangled name of the variable.
3255         // - Entry 2 -> Declare target kind.
3256         // - Entry 3 -> Order the entry was created.
3257         // The first element of the metadata node is the kind.
3258         llvm::Metadata *Ops[] = {
3259             GetMDInt(E.getKind()), GetMDString(MangledName),
3260             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3261 
3262         // Save this entry in the right position of the ordered entries array.
3263         OrderedEntries[E.getOrder()] =
3264             std::make_tuple(&E, SourceLocation(), MangledName);
3265 
3266         // Add metadata to the named metadata node.
3267         MD->addOperand(llvm::MDNode::get(C, Ops));
3268       };
3269 
3270   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3271       DeviceGlobalVarMetadataEmitter);
3272 
3273   for (const auto &E : OrderedEntries) {
3274     assert(std::get<0>(E) && "All ordered entries must exist!");
3275     if (const auto *CE =
3276             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3277                 std::get<0>(E))) {
3278       if (!CE->getID() || !CE->getAddress()) {
3279         // Do not blame the entry if the parent funtion is not emitted.
3280         StringRef FnName = ParentFunctions[CE->getOrder()];
3281         if (!CGM.GetGlobalValue(FnName))
3282           continue;
3283         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3284             DiagnosticsEngine::Error,
3285             "Offloading entry for target region in %0 is incorrect: either the "
3286             "address or the ID is invalid.");
3287         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3288         continue;
3289       }
3290       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3291                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3292     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3293                                              OffloadEntryInfoDeviceGlobalVar>(
3294                    std::get<0>(E))) {
3295       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3296           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3297               CE->getFlags());
3298       switch (Flags) {
3299       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3300         if (CGM.getLangOpts().OpenMPIsDevice &&
3301             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3302           continue;
3303         if (!CE->getAddress()) {
3304           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3305               DiagnosticsEngine::Error, "Offloading entry for declare target "
3306                                         "variable %0 is incorrect: the "
3307                                         "address is invalid.");
3308           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3309           continue;
3310         }
3311         // The vaiable has no definition - no need to add the entry.
3312         if (CE->getVarSize().isZero())
3313           continue;
3314         break;
3315       }
3316       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3317         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3318                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3319                "Declaret target link address is set.");
3320         if (CGM.getLangOpts().OpenMPIsDevice)
3321           continue;
3322         if (!CE->getAddress()) {
3323           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3324               DiagnosticsEngine::Error,
3325               "Offloading entry for declare target variable is incorrect: the "
3326               "address is invalid.");
3327           CGM.getDiags().Report(DiagID);
3328           continue;
3329         }
3330         break;
3331       }
3332       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3333                          CE->getVarSize().getQuantity(), Flags,
3334                          CE->getLinkage());
3335     } else {
3336       llvm_unreachable("Unsupported entry kind.");
3337     }
3338   }
3339 }
3340 
3341 /// Loads all the offload entries information from the host IR
3342 /// metadata.
3343 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3344   // If we are in target mode, load the metadata from the host IR. This code has
3345   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3346 
3347   if (!CGM.getLangOpts().OpenMPIsDevice)
3348     return;
3349 
3350   if (CGM.getLangOpts().OMPHostIRFile.empty())
3351     return;
3352 
3353   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3354   if (auto EC = Buf.getError()) {
3355     CGM.getDiags().Report(diag::err_cannot_open_file)
3356         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3357     return;
3358   }
3359 
3360   llvm::LLVMContext C;
3361   auto ME = expectedToErrorOrAndEmitErrors(
3362       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3363 
3364   if (auto EC = ME.getError()) {
3365     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3366         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3367     CGM.getDiags().Report(DiagID)
3368         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3369     return;
3370   }
3371 
3372   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3373   if (!MD)
3374     return;
3375 
3376   for (llvm::MDNode *MN : MD->operands()) {
3377     auto &&GetMDInt = [MN](unsigned Idx) {
3378       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3379       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3380     };
3381 
3382     auto &&GetMDString = [MN](unsigned Idx) {
3383       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3384       return V->getString();
3385     };
3386 
3387     switch (GetMDInt(0)) {
3388     default:
3389       llvm_unreachable("Unexpected metadata!");
3390       break;
3391     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3392         OffloadingEntryInfoTargetRegion:
3393       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3394           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3395           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3396           /*Order=*/GetMDInt(5));
3397       break;
3398     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3399         OffloadingEntryInfoDeviceGlobalVar:
3400       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3401           /*MangledName=*/GetMDString(1),
3402           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3403               /*Flags=*/GetMDInt(2)),
3404           /*Order=*/GetMDInt(3));
3405       break;
3406     }
3407   }
3408 }
3409 
3410 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3411   if (!KmpRoutineEntryPtrTy) {
3412     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3413     ASTContext &C = CGM.getContext();
3414     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3415     FunctionProtoType::ExtProtoInfo EPI;
3416     KmpRoutineEntryPtrQTy = C.getPointerType(
3417         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3418     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3419   }
3420 }
3421 
3422 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3423   // Make sure the type of the entry is already created. This is the type we
3424   // have to create:
3425   // struct __tgt_offload_entry{
3426   //   void      *addr;       // Pointer to the offload entry info.
3427   //                          // (function or global)
3428   //   char      *name;       // Name of the function or global.
3429   //   size_t     size;       // Size of the entry info (0 if it a function).
3430   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3431   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3432   // };
3433   if (TgtOffloadEntryQTy.isNull()) {
3434     ASTContext &C = CGM.getContext();
3435     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3436     RD->startDefinition();
3437     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3438     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3439     addFieldToRecordDecl(C, RD, C.getSizeType());
3440     addFieldToRecordDecl(
3441         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3442     addFieldToRecordDecl(
3443         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3444     RD->completeDefinition();
3445     RD->addAttr(PackedAttr::CreateImplicit(C));
3446     TgtOffloadEntryQTy = C.getRecordType(RD);
3447   }
3448   return TgtOffloadEntryQTy;
3449 }
3450 
3451 namespace {
3452 struct PrivateHelpersTy {
3453   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3454                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3455       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3456         PrivateElemInit(PrivateElemInit) {}
3457   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3458   const Expr *OriginalRef = nullptr;
3459   const VarDecl *Original = nullptr;
3460   const VarDecl *PrivateCopy = nullptr;
3461   const VarDecl *PrivateElemInit = nullptr;
3462   bool isLocalPrivate() const {
3463     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3464   }
3465 };
3466 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3467 } // anonymous namespace
3468 
3469 static bool isAllocatableDecl(const VarDecl *VD) {
3470   const VarDecl *CVD = VD->getCanonicalDecl();
3471   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3472     return false;
3473   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3474   // Use the default allocation.
3475   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3476             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3477            !AA->getAllocator());
3478 }
3479 
3480 static RecordDecl *
3481 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3482   if (!Privates.empty()) {
3483     ASTContext &C = CGM.getContext();
3484     // Build struct .kmp_privates_t. {
3485     //         /*  private vars  */
3486     //       };
3487     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3488     RD->startDefinition();
3489     for (const auto &Pair : Privates) {
3490       const VarDecl *VD = Pair.second.Original;
3491       QualType Type = VD->getType().getNonReferenceType();
3492       // If the private variable is a local variable with lvalue ref type,
3493       // allocate the pointer instead of the pointee type.
3494       if (Pair.second.isLocalPrivate()) {
3495         if (VD->getType()->isLValueReferenceType())
3496           Type = C.getPointerType(Type);
3497         if (isAllocatableDecl(VD))
3498           Type = C.getPointerType(Type);
3499       }
3500       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3501       if (VD->hasAttrs()) {
3502         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3503              E(VD->getAttrs().end());
3504              I != E; ++I)
3505           FD->addAttr(*I);
3506       }
3507     }
3508     RD->completeDefinition();
3509     return RD;
3510   }
3511   return nullptr;
3512 }
3513 
3514 static RecordDecl *
3515 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3516                          QualType KmpInt32Ty,
3517                          QualType KmpRoutineEntryPointerQTy) {
3518   ASTContext &C = CGM.getContext();
3519   // Build struct kmp_task_t {
3520   //         void *              shareds;
3521   //         kmp_routine_entry_t routine;
3522   //         kmp_int32           part_id;
3523   //         kmp_cmplrdata_t data1;
3524   //         kmp_cmplrdata_t data2;
3525   // For taskloops additional fields:
3526   //         kmp_uint64          lb;
3527   //         kmp_uint64          ub;
3528   //         kmp_int64           st;
3529   //         kmp_int32           liter;
3530   //         void *              reductions;
3531   //       };
3532   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3533   UD->startDefinition();
3534   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3535   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3536   UD->completeDefinition();
3537   QualType KmpCmplrdataTy = C.getRecordType(UD);
3538   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3539   RD->startDefinition();
3540   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3541   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3542   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3543   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3544   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3545   if (isOpenMPTaskLoopDirective(Kind)) {
3546     QualType KmpUInt64Ty =
3547         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3548     QualType KmpInt64Ty =
3549         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3550     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3551     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3552     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3553     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3554     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3555   }
3556   RD->completeDefinition();
3557   return RD;
3558 }
3559 
3560 static RecordDecl *
3561 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3562                                      ArrayRef<PrivateDataTy> Privates) {
3563   ASTContext &C = CGM.getContext();
3564   // Build struct kmp_task_t_with_privates {
3565   //         kmp_task_t task_data;
3566   //         .kmp_privates_t. privates;
3567   //       };
3568   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3569   RD->startDefinition();
3570   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3571   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3572     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3573   RD->completeDefinition();
3574   return RD;
3575 }
3576 
3577 /// Emit a proxy function which accepts kmp_task_t as the second
3578 /// argument.
3579 /// \code
3580 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3581 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3582 ///   For taskloops:
3583 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3584 ///   tt->reductions, tt->shareds);
3585 ///   return 0;
3586 /// }
3587 /// \endcode
3588 static llvm::Function *
3589 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3590                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3591                       QualType KmpTaskTWithPrivatesPtrQTy,
3592                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3593                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3594                       llvm::Value *TaskPrivatesMap) {
3595   ASTContext &C = CGM.getContext();
3596   FunctionArgList Args;
3597   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3598                             ImplicitParamDecl::Other);
3599   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3600                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3601                                 ImplicitParamDecl::Other);
3602   Args.push_back(&GtidArg);
3603   Args.push_back(&TaskTypeArg);
3604   const auto &TaskEntryFnInfo =
3605       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3606   llvm::FunctionType *TaskEntryTy =
3607       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3608   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3609   auto *TaskEntry = llvm::Function::Create(
3610       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3611   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3612   TaskEntry->setDoesNotRecurse();
3613   CodeGenFunction CGF(CGM);
3614   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3615                     Loc, Loc);
3616 
3617   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3618   // tt,
3619   // For taskloops:
3620   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3621   // tt->task_data.shareds);
3622   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3623       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3624   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3625       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3626       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3627   const auto *KmpTaskTWithPrivatesQTyRD =
3628       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3629   LValue Base =
3630       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3631   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3632   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3633   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3634   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3635 
3636   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3637   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3638   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3639       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3640       CGF.ConvertTypeForMem(SharedsPtrTy));
3641 
3642   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3643   llvm::Value *PrivatesParam;
3644   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3645     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3646     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3647         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3648   } else {
3649     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3650   }
3651 
3652   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3653                                TaskPrivatesMap,
3654                                CGF.Builder
3655                                    .CreatePointerBitCastOrAddrSpaceCast(
3656                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3657                                    .getPointer()};
3658   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3659                                           std::end(CommonArgs));
3660   if (isOpenMPTaskLoopDirective(Kind)) {
3661     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3662     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3663     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3664     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3665     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3666     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3667     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3668     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3669     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3670     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3671     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3672     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3673     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3674     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3675     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3676     CallArgs.push_back(LBParam);
3677     CallArgs.push_back(UBParam);
3678     CallArgs.push_back(StParam);
3679     CallArgs.push_back(LIParam);
3680     CallArgs.push_back(RParam);
3681   }
3682   CallArgs.push_back(SharedsParam);
3683 
3684   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3685                                                   CallArgs);
3686   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3687                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3688   CGF.FinishFunction();
3689   return TaskEntry;
3690 }
3691 
3692 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3693                                             SourceLocation Loc,
3694                                             QualType KmpInt32Ty,
3695                                             QualType KmpTaskTWithPrivatesPtrQTy,
3696                                             QualType KmpTaskTWithPrivatesQTy) {
3697   ASTContext &C = CGM.getContext();
3698   FunctionArgList Args;
3699   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3700                             ImplicitParamDecl::Other);
3701   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3702                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3703                                 ImplicitParamDecl::Other);
3704   Args.push_back(&GtidArg);
3705   Args.push_back(&TaskTypeArg);
3706   const auto &DestructorFnInfo =
3707       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3708   llvm::FunctionType *DestructorFnTy =
3709       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3710   std::string Name =
3711       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3712   auto *DestructorFn =
3713       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3714                              Name, &CGM.getModule());
3715   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3716                                     DestructorFnInfo);
3717   DestructorFn->setDoesNotRecurse();
3718   CodeGenFunction CGF(CGM);
3719   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3720                     Args, Loc, Loc);
3721 
3722   LValue Base = CGF.EmitLoadOfPointerLValue(
3723       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3724       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3725   const auto *KmpTaskTWithPrivatesQTyRD =
3726       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3727   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3728   Base = CGF.EmitLValueForField(Base, *FI);
3729   for (const auto *Field :
3730        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3731     if (QualType::DestructionKind DtorKind =
3732             Field->getType().isDestructedType()) {
3733       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3734       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3735     }
3736   }
3737   CGF.FinishFunction();
3738   return DestructorFn;
3739 }
3740 
3741 /// Emit a privates mapping function for correct handling of private and
3742 /// firstprivate variables.
3743 /// \code
3744 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3745 /// **noalias priv1,...,  <tyn> **noalias privn) {
3746 ///   *priv1 = &.privates.priv1;
3747 ///   ...;
3748 ///   *privn = &.privates.privn;
3749 /// }
3750 /// \endcode
3751 static llvm::Value *
3752 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3753                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3754                                ArrayRef<PrivateDataTy> Privates) {
3755   ASTContext &C = CGM.getContext();
3756   FunctionArgList Args;
3757   ImplicitParamDecl TaskPrivatesArg(
3758       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3759       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3760       ImplicitParamDecl::Other);
3761   Args.push_back(&TaskPrivatesArg);
3762   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3763   unsigned Counter = 1;
3764   for (const Expr *E : Data.PrivateVars) {
3765     Args.push_back(ImplicitParamDecl::Create(
3766         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3767         C.getPointerType(C.getPointerType(E->getType()))
3768             .withConst()
3769             .withRestrict(),
3770         ImplicitParamDecl::Other));
3771     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3772     PrivateVarsPos[VD] = Counter;
3773     ++Counter;
3774   }
3775   for (const Expr *E : Data.FirstprivateVars) {
3776     Args.push_back(ImplicitParamDecl::Create(
3777         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3778         C.getPointerType(C.getPointerType(E->getType()))
3779             .withConst()
3780             .withRestrict(),
3781         ImplicitParamDecl::Other));
3782     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3783     PrivateVarsPos[VD] = Counter;
3784     ++Counter;
3785   }
3786   for (const Expr *E : Data.LastprivateVars) {
3787     Args.push_back(ImplicitParamDecl::Create(
3788         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3789         C.getPointerType(C.getPointerType(E->getType()))
3790             .withConst()
3791             .withRestrict(),
3792         ImplicitParamDecl::Other));
3793     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3794     PrivateVarsPos[VD] = Counter;
3795     ++Counter;
3796   }
3797   for (const VarDecl *VD : Data.PrivateLocals) {
3798     QualType Ty = VD->getType().getNonReferenceType();
3799     if (VD->getType()->isLValueReferenceType())
3800       Ty = C.getPointerType(Ty);
3801     if (isAllocatableDecl(VD))
3802       Ty = C.getPointerType(Ty);
3803     Args.push_back(ImplicitParamDecl::Create(
3804         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3805         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3806         ImplicitParamDecl::Other));
3807     PrivateVarsPos[VD] = Counter;
3808     ++Counter;
3809   }
3810   const auto &TaskPrivatesMapFnInfo =
3811       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3812   llvm::FunctionType *TaskPrivatesMapTy =
3813       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3814   std::string Name =
3815       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3816   auto *TaskPrivatesMap = llvm::Function::Create(
3817       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3818       &CGM.getModule());
3819   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3820                                     TaskPrivatesMapFnInfo);
3821   if (CGM.getLangOpts().Optimize) {
3822     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3823     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3824     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3825   }
3826   CodeGenFunction CGF(CGM);
3827   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3828                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3829 
3830   // *privi = &.privates.privi;
3831   LValue Base = CGF.EmitLoadOfPointerLValue(
3832       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3833       TaskPrivatesArg.getType()->castAs<PointerType>());
3834   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3835   Counter = 0;
3836   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3837     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3838     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3839     LValue RefLVal =
3840         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3841     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3842         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3843     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3844     ++Counter;
3845   }
3846   CGF.FinishFunction();
3847   return TaskPrivatesMap;
3848 }
3849 
3850 /// Emit initialization for private variables in task-based directives.
3851 static void emitPrivatesInit(CodeGenFunction &CGF,
3852                              const OMPExecutableDirective &D,
3853                              Address KmpTaskSharedsPtr, LValue TDBase,
3854                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3855                              QualType SharedsTy, QualType SharedsPtrTy,
3856                              const OMPTaskDataTy &Data,
3857                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3858   ASTContext &C = CGF.getContext();
3859   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3860   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3861   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3862                                  ? OMPD_taskloop
3863                                  : OMPD_task;
3864   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3865   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3866   LValue SrcBase;
3867   bool IsTargetTask =
3868       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3869       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3870   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3871   // PointersArray, SizesArray, and MappersArray. The original variables for
3872   // these arrays are not captured and we get their addresses explicitly.
3873   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3874       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3875     SrcBase = CGF.MakeAddrLValue(
3876         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3877             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3878         SharedsTy);
3879   }
3880   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3881   for (const PrivateDataTy &Pair : Privates) {
3882     // Do not initialize private locals.
3883     if (Pair.second.isLocalPrivate()) {
3884       ++FI;
3885       continue;
3886     }
3887     const VarDecl *VD = Pair.second.PrivateCopy;
3888     const Expr *Init = VD->getAnyInitializer();
3889     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3890                              !CGF.isTrivialInitializer(Init)))) {
3891       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3892       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3893         const VarDecl *OriginalVD = Pair.second.Original;
3894         // Check if the variable is the target-based BasePointersArray,
3895         // PointersArray, SizesArray, or MappersArray.
3896         LValue SharedRefLValue;
3897         QualType Type = PrivateLValue.getType();
3898         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3899         if (IsTargetTask && !SharedField) {
3900           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3901                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3902                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3903                          ->getNumParams() == 0 &&
3904                  isa<TranslationUnitDecl>(
3905                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3906                          ->getDeclContext()) &&
3907                  "Expected artificial target data variable.");
3908           SharedRefLValue =
3909               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3910         } else if (ForDup) {
3911           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3912           SharedRefLValue = CGF.MakeAddrLValue(
3913               Address(SharedRefLValue.getPointer(CGF),
3914                       C.getDeclAlign(OriginalVD)),
3915               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3916               SharedRefLValue.getTBAAInfo());
3917         } else if (CGF.LambdaCaptureFields.count(
3918                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3919                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3920           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3921         } else {
3922           // Processing for implicitly captured variables.
3923           InlinedOpenMPRegionRAII Region(
3924               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3925               /*HasCancel=*/false, /*NoInheritance=*/true);
3926           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3927         }
3928         if (Type->isArrayType()) {
3929           // Initialize firstprivate array.
3930           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3931             // Perform simple memcpy.
3932             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3933           } else {
3934             // Initialize firstprivate array using element-by-element
3935             // initialization.
3936             CGF.EmitOMPAggregateAssign(
3937                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3938                 Type,
3939                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3940                                                   Address SrcElement) {
3941                   // Clean up any temporaries needed by the initialization.
3942                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3943                   InitScope.addPrivate(
3944                       Elem, [SrcElement]() -> Address { return SrcElement; });
3945                   (void)InitScope.Privatize();
3946                   // Emit initialization for single element.
3947                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3948                       CGF, &CapturesInfo);
3949                   CGF.EmitAnyExprToMem(Init, DestElement,
3950                                        Init->getType().getQualifiers(),
3951                                        /*IsInitializer=*/false);
3952                 });
3953           }
3954         } else {
3955           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3956           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3957             return SharedRefLValue.getAddress(CGF);
3958           });
3959           (void)InitScope.Privatize();
3960           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3961           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3962                              /*capturedByInit=*/false);
3963         }
3964       } else {
3965         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3966       }
3967     }
3968     ++FI;
3969   }
3970 }
3971 
3972 /// Check if duplication function is required for taskloops.
3973 static bool checkInitIsRequired(CodeGenFunction &CGF,
3974                                 ArrayRef<PrivateDataTy> Privates) {
3975   bool InitRequired = false;
3976   for (const PrivateDataTy &Pair : Privates) {
3977     if (Pair.second.isLocalPrivate())
3978       continue;
3979     const VarDecl *VD = Pair.second.PrivateCopy;
3980     const Expr *Init = VD->getAnyInitializer();
3981     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3982                                     !CGF.isTrivialInitializer(Init));
3983     if (InitRequired)
3984       break;
3985   }
3986   return InitRequired;
3987 }
3988 
3989 
3990 /// Emit task_dup function (for initialization of
3991 /// private/firstprivate/lastprivate vars and last_iter flag)
3992 /// \code
3993 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3994 /// lastpriv) {
3995 /// // setup lastprivate flag
3996 ///    task_dst->last = lastpriv;
3997 /// // could be constructor calls here...
3998 /// }
3999 /// \endcode
4000 static llvm::Value *
4001 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4002                     const OMPExecutableDirective &D,
4003                     QualType KmpTaskTWithPrivatesPtrQTy,
4004                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4005                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4006                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4007                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4008   ASTContext &C = CGM.getContext();
4009   FunctionArgList Args;
4010   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4011                            KmpTaskTWithPrivatesPtrQTy,
4012                            ImplicitParamDecl::Other);
4013   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4014                            KmpTaskTWithPrivatesPtrQTy,
4015                            ImplicitParamDecl::Other);
4016   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4017                                 ImplicitParamDecl::Other);
4018   Args.push_back(&DstArg);
4019   Args.push_back(&SrcArg);
4020   Args.push_back(&LastprivArg);
4021   const auto &TaskDupFnInfo =
4022       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4023   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4024   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4025   auto *TaskDup = llvm::Function::Create(
4026       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4027   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4028   TaskDup->setDoesNotRecurse();
4029   CodeGenFunction CGF(CGM);
4030   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4031                     Loc);
4032 
4033   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4034       CGF.GetAddrOfLocalVar(&DstArg),
4035       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4036   // task_dst->liter = lastpriv;
4037   if (WithLastIter) {
4038     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4039     LValue Base = CGF.EmitLValueForField(
4040         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4041     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4042     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4043         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4044     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4045   }
4046 
4047   // Emit initial values for private copies (if any).
4048   assert(!Privates.empty());
4049   Address KmpTaskSharedsPtr = Address::invalid();
4050   if (!Data.FirstprivateVars.empty()) {
4051     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4052         CGF.GetAddrOfLocalVar(&SrcArg),
4053         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4054     LValue Base = CGF.EmitLValueForField(
4055         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4056     KmpTaskSharedsPtr = Address(
4057         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4058                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4059                                                   KmpTaskTShareds)),
4060                              Loc),
4061         CGM.getNaturalTypeAlignment(SharedsTy));
4062   }
4063   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4064                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4065   CGF.FinishFunction();
4066   return TaskDup;
4067 }
4068 
4069 /// Checks if destructor function is required to be generated.
4070 /// \return true if cleanups are required, false otherwise.
4071 static bool
4072 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4073                          ArrayRef<PrivateDataTy> Privates) {
4074   for (const PrivateDataTy &P : Privates) {
4075     if (P.second.isLocalPrivate())
4076       continue;
4077     QualType Ty = P.second.Original->getType().getNonReferenceType();
4078     if (Ty.isDestructedType())
4079       return true;
4080   }
4081   return false;
4082 }
4083 
4084 namespace {
4085 /// Loop generator for OpenMP iterator expression.
4086 class OMPIteratorGeneratorScope final
4087     : public CodeGenFunction::OMPPrivateScope {
4088   CodeGenFunction &CGF;
4089   const OMPIteratorExpr *E = nullptr;
4090   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4091   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4092   OMPIteratorGeneratorScope() = delete;
4093   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4094 
4095 public:
4096   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4097       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4098     if (!E)
4099       return;
4100     SmallVector<llvm::Value *, 4> Uppers;
4101     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4102       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4103       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4104       addPrivate(VD, [&CGF, VD]() {
4105         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4106       });
4107       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4108       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4109         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4110                                  "counter.addr");
4111       });
4112     }
4113     Privatize();
4114 
4115     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4116       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4117       LValue CLVal =
4118           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4119                              HelperData.CounterVD->getType());
4120       // Counter = 0;
4121       CGF.EmitStoreOfScalar(
4122           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4123           CLVal);
4124       CodeGenFunction::JumpDest &ContDest =
4125           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4126       CodeGenFunction::JumpDest &ExitDest =
4127           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4128       // N = <number-of_iterations>;
4129       llvm::Value *N = Uppers[I];
4130       // cont:
4131       // if (Counter < N) goto body; else goto exit;
4132       CGF.EmitBlock(ContDest.getBlock());
4133       auto *CVal =
4134           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4135       llvm::Value *Cmp =
4136           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4137               ? CGF.Builder.CreateICmpSLT(CVal, N)
4138               : CGF.Builder.CreateICmpULT(CVal, N);
4139       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4140       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4141       // body:
4142       CGF.EmitBlock(BodyBB);
4143       // Iteri = Begini + Counter * Stepi;
4144       CGF.EmitIgnoredExpr(HelperData.Update);
4145     }
4146   }
4147   ~OMPIteratorGeneratorScope() {
4148     if (!E)
4149       return;
4150     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4151       // Counter = Counter + 1;
4152       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4153       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4154       // goto cont;
4155       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4156       // exit:
4157       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4158     }
4159   }
4160 };
4161 } // namespace
4162 
4163 static std::pair<llvm::Value *, llvm::Value *>
4164 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4165   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4166   llvm::Value *Addr;
4167   if (OASE) {
4168     const Expr *Base = OASE->getBase();
4169     Addr = CGF.EmitScalarExpr(Base);
4170   } else {
4171     Addr = CGF.EmitLValue(E).getPointer(CGF);
4172   }
4173   llvm::Value *SizeVal;
4174   QualType Ty = E->getType();
4175   if (OASE) {
4176     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4177     for (const Expr *SE : OASE->getDimensions()) {
4178       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4179       Sz = CGF.EmitScalarConversion(
4180           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4181       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4182     }
4183   } else if (const auto *ASE =
4184                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4185     LValue UpAddrLVal =
4186         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4187     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4188     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4189         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4190     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4191     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4192     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4193   } else {
4194     SizeVal = CGF.getTypeSize(Ty);
4195   }
4196   return std::make_pair(Addr, SizeVal);
4197 }
4198 
4199 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4200 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4201   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4202   if (KmpTaskAffinityInfoTy.isNull()) {
4203     RecordDecl *KmpAffinityInfoRD =
4204         C.buildImplicitRecord("kmp_task_affinity_info_t");
4205     KmpAffinityInfoRD->startDefinition();
4206     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4207     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4208     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4209     KmpAffinityInfoRD->completeDefinition();
4210     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4211   }
4212 }
4213 
4214 CGOpenMPRuntime::TaskResultTy
4215 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4216                               const OMPExecutableDirective &D,
4217                               llvm::Function *TaskFunction, QualType SharedsTy,
4218                               Address Shareds, const OMPTaskDataTy &Data) {
4219   ASTContext &C = CGM.getContext();
4220   llvm::SmallVector<PrivateDataTy, 4> Privates;
4221   // Aggregate privates and sort them by the alignment.
4222   const auto *I = Data.PrivateCopies.begin();
4223   for (const Expr *E : Data.PrivateVars) {
4224     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4225     Privates.emplace_back(
4226         C.getDeclAlign(VD),
4227         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4228                          /*PrivateElemInit=*/nullptr));
4229     ++I;
4230   }
4231   I = Data.FirstprivateCopies.begin();
4232   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4233   for (const Expr *E : Data.FirstprivateVars) {
4234     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4235     Privates.emplace_back(
4236         C.getDeclAlign(VD),
4237         PrivateHelpersTy(
4238             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4239             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4240     ++I;
4241     ++IElemInitRef;
4242   }
4243   I = Data.LastprivateCopies.begin();
4244   for (const Expr *E : Data.LastprivateVars) {
4245     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4246     Privates.emplace_back(
4247         C.getDeclAlign(VD),
4248         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4249                          /*PrivateElemInit=*/nullptr));
4250     ++I;
4251   }
4252   for (const VarDecl *VD : Data.PrivateLocals) {
4253     if (isAllocatableDecl(VD))
4254       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4255     else
4256       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4257   }
4258   llvm::stable_sort(Privates,
4259                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4260                       return L.first > R.first;
4261                     });
4262   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4263   // Build type kmp_routine_entry_t (if not built yet).
4264   emitKmpRoutineEntryT(KmpInt32Ty);
4265   // Build type kmp_task_t (if not built yet).
4266   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4267     if (SavedKmpTaskloopTQTy.isNull()) {
4268       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4269           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4270     }
4271     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4272   } else {
4273     assert((D.getDirectiveKind() == OMPD_task ||
4274             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4275             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4276            "Expected taskloop, task or target directive");
4277     if (SavedKmpTaskTQTy.isNull()) {
4278       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4279           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4280     }
4281     KmpTaskTQTy = SavedKmpTaskTQTy;
4282   }
4283   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4284   // Build particular struct kmp_task_t for the given task.
4285   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4286       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4287   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4288   QualType KmpTaskTWithPrivatesPtrQTy =
4289       C.getPointerType(KmpTaskTWithPrivatesQTy);
4290   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4291   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4292       KmpTaskTWithPrivatesTy->getPointerTo();
4293   llvm::Value *KmpTaskTWithPrivatesTySize =
4294       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4295   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4296 
4297   // Emit initial values for private copies (if any).
4298   llvm::Value *TaskPrivatesMap = nullptr;
4299   llvm::Type *TaskPrivatesMapTy =
4300       std::next(TaskFunction->arg_begin(), 3)->getType();
4301   if (!Privates.empty()) {
4302     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4303     TaskPrivatesMap =
4304         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4305     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4306         TaskPrivatesMap, TaskPrivatesMapTy);
4307   } else {
4308     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4309         cast<llvm::PointerType>(TaskPrivatesMapTy));
4310   }
4311   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4312   // kmp_task_t *tt);
4313   llvm::Function *TaskEntry = emitProxyTaskFunction(
4314       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4315       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4316       TaskPrivatesMap);
4317 
4318   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4319   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4320   // kmp_routine_entry_t *task_entry);
4321   // Task flags. Format is taken from
4322   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4323   // description of kmp_tasking_flags struct.
4324   enum {
4325     TiedFlag = 0x1,
4326     FinalFlag = 0x2,
4327     DestructorsFlag = 0x8,
4328     PriorityFlag = 0x20,
4329     DetachableFlag = 0x40,
4330   };
4331   unsigned Flags = Data.Tied ? TiedFlag : 0;
4332   bool NeedsCleanup = false;
4333   if (!Privates.empty()) {
4334     NeedsCleanup =
4335         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4336     if (NeedsCleanup)
4337       Flags = Flags | DestructorsFlag;
4338   }
4339   if (Data.Priority.getInt())
4340     Flags = Flags | PriorityFlag;
4341   if (D.hasClausesOfKind<OMPDetachClause>())
4342     Flags = Flags | DetachableFlag;
4343   llvm::Value *TaskFlags =
4344       Data.Final.getPointer()
4345           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4346                                      CGF.Builder.getInt32(FinalFlag),
4347                                      CGF.Builder.getInt32(/*C=*/0))
4348           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4349   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4350   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4351   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4352       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4353       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4354           TaskEntry, KmpRoutineEntryPtrTy)};
4355   llvm::Value *NewTask;
4356   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4357     // Check if we have any device clause associated with the directive.
4358     const Expr *Device = nullptr;
4359     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4360       Device = C->getDevice();
4361     // Emit device ID if any otherwise use default value.
4362     llvm::Value *DeviceID;
4363     if (Device)
4364       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4365                                            CGF.Int64Ty, /*isSigned=*/true);
4366     else
4367       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4368     AllocArgs.push_back(DeviceID);
4369     NewTask = CGF.EmitRuntimeCall(
4370         OMPBuilder.getOrCreateRuntimeFunction(
4371             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4372         AllocArgs);
4373   } else {
4374     NewTask =
4375         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4376                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4377                             AllocArgs);
4378   }
4379   // Emit detach clause initialization.
4380   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4381   // task_descriptor);
4382   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4383     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4384     LValue EvtLVal = CGF.EmitLValue(Evt);
4385 
4386     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4387     // int gtid, kmp_task_t *task);
4388     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4389     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4390     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4391     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4392         OMPBuilder.getOrCreateRuntimeFunction(
4393             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4394         {Loc, Tid, NewTask});
4395     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4396                                       Evt->getExprLoc());
4397     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4398   }
4399   // Process affinity clauses.
4400   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4401     // Process list of affinity data.
4402     ASTContext &C = CGM.getContext();
4403     Address AffinitiesArray = Address::invalid();
4404     // Calculate number of elements to form the array of affinity data.
4405     llvm::Value *NumOfElements = nullptr;
4406     unsigned NumAffinities = 0;
4407     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4408       if (const Expr *Modifier = C->getModifier()) {
4409         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4410         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4411           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4412           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4413           NumOfElements =
4414               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4415         }
4416       } else {
4417         NumAffinities += C->varlist_size();
4418       }
4419     }
4420     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4421     // Fields ids in kmp_task_affinity_info record.
4422     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4423 
4424     QualType KmpTaskAffinityInfoArrayTy;
4425     if (NumOfElements) {
4426       NumOfElements = CGF.Builder.CreateNUWAdd(
4427           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4428       auto *OVE = new (C) OpaqueValueExpr(
4429           Loc,
4430           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4431           VK_PRValue);
4432       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4433                                                     RValue::get(NumOfElements));
4434       KmpTaskAffinityInfoArrayTy =
4435           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4436                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4437       // Properly emit variable-sized array.
4438       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4439                                            ImplicitParamDecl::Other);
4440       CGF.EmitVarDecl(*PD);
4441       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4442       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4443                                                 /*isSigned=*/false);
4444     } else {
4445       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4446           KmpTaskAffinityInfoTy,
4447           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4448           ArrayType::Normal, /*IndexTypeQuals=*/0);
4449       AffinitiesArray =
4450           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4451       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4452       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4453                                              /*isSigned=*/false);
4454     }
4455 
4456     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4457     // Fill array by elements without iterators.
4458     unsigned Pos = 0;
4459     bool HasIterator = false;
4460     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4461       if (C->getModifier()) {
4462         HasIterator = true;
4463         continue;
4464       }
4465       for (const Expr *E : C->varlists()) {
4466         llvm::Value *Addr;
4467         llvm::Value *Size;
4468         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4469         LValue Base =
4470             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4471                                KmpTaskAffinityInfoTy);
4472         // affs[i].base_addr = &<Affinities[i].second>;
4473         LValue BaseAddrLVal = CGF.EmitLValueForField(
4474             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4475         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4476                               BaseAddrLVal);
4477         // affs[i].len = sizeof(<Affinities[i].second>);
4478         LValue LenLVal = CGF.EmitLValueForField(
4479             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4480         CGF.EmitStoreOfScalar(Size, LenLVal);
4481         ++Pos;
4482       }
4483     }
4484     LValue PosLVal;
4485     if (HasIterator) {
4486       PosLVal = CGF.MakeAddrLValue(
4487           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4488           C.getSizeType());
4489       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4490     }
4491     // Process elements with iterators.
4492     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4493       const Expr *Modifier = C->getModifier();
4494       if (!Modifier)
4495         continue;
4496       OMPIteratorGeneratorScope IteratorScope(
4497           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4498       for (const Expr *E : C->varlists()) {
4499         llvm::Value *Addr;
4500         llvm::Value *Size;
4501         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4502         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4503         LValue Base = CGF.MakeAddrLValue(
4504             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4505                                           AffinitiesArray.getPointer(), Idx),
4506                     AffinitiesArray.getAlignment()),
4507             KmpTaskAffinityInfoTy);
4508         // affs[i].base_addr = &<Affinities[i].second>;
4509         LValue BaseAddrLVal = CGF.EmitLValueForField(
4510             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4511         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4512                               BaseAddrLVal);
4513         // affs[i].len = sizeof(<Affinities[i].second>);
4514         LValue LenLVal = CGF.EmitLValueForField(
4515             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4516         CGF.EmitStoreOfScalar(Size, LenLVal);
4517         Idx = CGF.Builder.CreateNUWAdd(
4518             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4519         CGF.EmitStoreOfScalar(Idx, PosLVal);
4520       }
4521     }
4522     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4523     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4524     // naffins, kmp_task_affinity_info_t *affin_list);
4525     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4526     llvm::Value *GTid = getThreadID(CGF, Loc);
4527     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4528         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4529     // FIXME: Emit the function and ignore its result for now unless the
4530     // runtime function is properly implemented.
4531     (void)CGF.EmitRuntimeCall(
4532         OMPBuilder.getOrCreateRuntimeFunction(
4533             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4534         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4535   }
4536   llvm::Value *NewTaskNewTaskTTy =
4537       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4538           NewTask, KmpTaskTWithPrivatesPtrTy);
4539   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4540                                                KmpTaskTWithPrivatesQTy);
4541   LValue TDBase =
4542       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4543   // Fill the data in the resulting kmp_task_t record.
4544   // Copy shareds if there are any.
4545   Address KmpTaskSharedsPtr = Address::invalid();
4546   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4547     KmpTaskSharedsPtr =
4548         Address(CGF.EmitLoadOfScalar(
4549                     CGF.EmitLValueForField(
4550                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4551                                            KmpTaskTShareds)),
4552                     Loc),
4553                 CGM.getNaturalTypeAlignment(SharedsTy));
4554     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4555     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4556     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4557   }
4558   // Emit initial values for private copies (if any).
4559   TaskResultTy Result;
4560   if (!Privates.empty()) {
4561     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4562                      SharedsTy, SharedsPtrTy, Data, Privates,
4563                      /*ForDup=*/false);
4564     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4565         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4566       Result.TaskDupFn = emitTaskDupFunction(
4567           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4568           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4569           /*WithLastIter=*/!Data.LastprivateVars.empty());
4570     }
4571   }
4572   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4573   enum { Priority = 0, Destructors = 1 };
4574   // Provide pointer to function with destructors for privates.
4575   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4576   const RecordDecl *KmpCmplrdataUD =
4577       (*FI)->getType()->getAsUnionType()->getDecl();
4578   if (NeedsCleanup) {
4579     llvm::Value *DestructorFn = emitDestructorsFunction(
4580         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4581         KmpTaskTWithPrivatesQTy);
4582     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4583     LValue DestructorsLV = CGF.EmitLValueForField(
4584         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4585     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4586                               DestructorFn, KmpRoutineEntryPtrTy),
4587                           DestructorsLV);
4588   }
4589   // Set priority.
4590   if (Data.Priority.getInt()) {
4591     LValue Data2LV = CGF.EmitLValueForField(
4592         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4593     LValue PriorityLV = CGF.EmitLValueForField(
4594         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4595     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4596   }
4597   Result.NewTask = NewTask;
4598   Result.TaskEntry = TaskEntry;
4599   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4600   Result.TDBase = TDBase;
4601   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4602   return Result;
4603 }
4604 
4605 namespace {
4606 /// Dependence kind for RTL.
4607 enum RTLDependenceKindTy {
4608   DepIn = 0x01,
4609   DepInOut = 0x3,
4610   DepMutexInOutSet = 0x4
4611 };
4612 /// Fields ids in kmp_depend_info record.
4613 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4614 } // namespace
4615 
4616 /// Translates internal dependency kind into the runtime kind.
4617 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4618   RTLDependenceKindTy DepKind;
4619   switch (K) {
4620   case OMPC_DEPEND_in:
4621     DepKind = DepIn;
4622     break;
4623   // Out and InOut dependencies must use the same code.
4624   case OMPC_DEPEND_out:
4625   case OMPC_DEPEND_inout:
4626     DepKind = DepInOut;
4627     break;
4628   case OMPC_DEPEND_mutexinoutset:
4629     DepKind = DepMutexInOutSet;
4630     break;
4631   case OMPC_DEPEND_source:
4632   case OMPC_DEPEND_sink:
4633   case OMPC_DEPEND_depobj:
4634   case OMPC_DEPEND_unknown:
4635     llvm_unreachable("Unknown task dependence type");
4636   }
4637   return DepKind;
4638 }
4639 
4640 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4641 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4642                            QualType &FlagsTy) {
4643   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4644   if (KmpDependInfoTy.isNull()) {
4645     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4646     KmpDependInfoRD->startDefinition();
4647     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4648     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4649     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4650     KmpDependInfoRD->completeDefinition();
4651     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4652   }
4653 }
4654 
4655 std::pair<llvm::Value *, LValue>
4656 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4657                                    SourceLocation Loc) {
4658   ASTContext &C = CGM.getContext();
4659   QualType FlagsTy;
4660   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4661   RecordDecl *KmpDependInfoRD =
4662       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4663   LValue Base = CGF.EmitLoadOfPointerLValue(
4664       DepobjLVal.getAddress(CGF),
4665       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4666   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4667   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4668           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4669   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4670                             Base.getTBAAInfo());
4671   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4672       Addr.getElementType(), Addr.getPointer(),
4673       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4674   LValue NumDepsBase = CGF.MakeAddrLValue(
4675       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4676       Base.getBaseInfo(), Base.getTBAAInfo());
4677   // NumDeps = deps[i].base_addr;
4678   LValue BaseAddrLVal = CGF.EmitLValueForField(
4679       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4680   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4681   return std::make_pair(NumDeps, Base);
4682 }
4683 
4684 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4685                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4686                            const OMPTaskDataTy::DependData &Data,
4687                            Address DependenciesArray) {
4688   CodeGenModule &CGM = CGF.CGM;
4689   ASTContext &C = CGM.getContext();
4690   QualType FlagsTy;
4691   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4692   RecordDecl *KmpDependInfoRD =
4693       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4694   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4695 
4696   OMPIteratorGeneratorScope IteratorScope(
4697       CGF, cast_or_null<OMPIteratorExpr>(
4698                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4699                                  : nullptr));
4700   for (const Expr *E : Data.DepExprs) {
4701     llvm::Value *Addr;
4702     llvm::Value *Size;
4703     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4704     LValue Base;
4705     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4706       Base = CGF.MakeAddrLValue(
4707           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4708     } else {
4709       LValue &PosLVal = *Pos.get<LValue *>();
4710       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4711       Base = CGF.MakeAddrLValue(
4712           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4713                                         DependenciesArray.getPointer(), Idx),
4714                   DependenciesArray.getAlignment()),
4715           KmpDependInfoTy);
4716     }
4717     // deps[i].base_addr = &<Dependencies[i].second>;
4718     LValue BaseAddrLVal = CGF.EmitLValueForField(
4719         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4720     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4721                           BaseAddrLVal);
4722     // deps[i].len = sizeof(<Dependencies[i].second>);
4723     LValue LenLVal = CGF.EmitLValueForField(
4724         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4725     CGF.EmitStoreOfScalar(Size, LenLVal);
4726     // deps[i].flags = <Dependencies[i].first>;
4727     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4728     LValue FlagsLVal = CGF.EmitLValueForField(
4729         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4730     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4731                           FlagsLVal);
4732     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4733       ++(*P);
4734     } else {
4735       LValue &PosLVal = *Pos.get<LValue *>();
4736       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4737       Idx = CGF.Builder.CreateNUWAdd(Idx,
4738                                      llvm::ConstantInt::get(Idx->getType(), 1));
4739       CGF.EmitStoreOfScalar(Idx, PosLVal);
4740     }
4741   }
4742 }
4743 
4744 static SmallVector<llvm::Value *, 4>
4745 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4746                         const OMPTaskDataTy::DependData &Data) {
4747   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4748          "Expected depobj dependecy kind.");
4749   SmallVector<llvm::Value *, 4> Sizes;
4750   SmallVector<LValue, 4> SizeLVals;
4751   ASTContext &C = CGF.getContext();
4752   QualType FlagsTy;
4753   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4754   RecordDecl *KmpDependInfoRD =
4755       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4756   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4757   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4758   {
4759     OMPIteratorGeneratorScope IteratorScope(
4760         CGF, cast_or_null<OMPIteratorExpr>(
4761                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4762                                    : nullptr));
4763     for (const Expr *E : Data.DepExprs) {
4764       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4765       LValue Base = CGF.EmitLoadOfPointerLValue(
4766           DepobjLVal.getAddress(CGF),
4767           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4768       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4769           Base.getAddress(CGF), KmpDependInfoPtrT);
4770       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4771                                 Base.getTBAAInfo());
4772       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4773           Addr.getElementType(), Addr.getPointer(),
4774           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4775       LValue NumDepsBase = CGF.MakeAddrLValue(
4776           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4777           Base.getBaseInfo(), Base.getTBAAInfo());
4778       // NumDeps = deps[i].base_addr;
4779       LValue BaseAddrLVal = CGF.EmitLValueForField(
4780           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4781       llvm::Value *NumDeps =
4782           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4783       LValue NumLVal = CGF.MakeAddrLValue(
4784           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4785           C.getUIntPtrType());
4786       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4787                               NumLVal.getAddress(CGF));
4788       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4789       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4790       CGF.EmitStoreOfScalar(Add, NumLVal);
4791       SizeLVals.push_back(NumLVal);
4792     }
4793   }
4794   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4795     llvm::Value *Size =
4796         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4797     Sizes.push_back(Size);
4798   }
4799   return Sizes;
4800 }
4801 
4802 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4803                                LValue PosLVal,
4804                                const OMPTaskDataTy::DependData &Data,
4805                                Address DependenciesArray) {
4806   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4807          "Expected depobj dependecy kind.");
4808   ASTContext &C = CGF.getContext();
4809   QualType FlagsTy;
4810   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4811   RecordDecl *KmpDependInfoRD =
4812       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4813   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4814   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4815   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4816   {
4817     OMPIteratorGeneratorScope IteratorScope(
4818         CGF, cast_or_null<OMPIteratorExpr>(
4819                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4820                                    : nullptr));
4821     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4822       const Expr *E = Data.DepExprs[I];
4823       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4824       LValue Base = CGF.EmitLoadOfPointerLValue(
4825           DepobjLVal.getAddress(CGF),
4826           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4827       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4828           Base.getAddress(CGF), KmpDependInfoPtrT);
4829       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4830                                 Base.getTBAAInfo());
4831 
4832       // Get number of elements in a single depobj.
4833       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4834           Addr.getElementType(), Addr.getPointer(),
4835           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4836       LValue NumDepsBase = CGF.MakeAddrLValue(
4837           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4838           Base.getBaseInfo(), Base.getTBAAInfo());
4839       // NumDeps = deps[i].base_addr;
4840       LValue BaseAddrLVal = CGF.EmitLValueForField(
4841           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4842       llvm::Value *NumDeps =
4843           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4844 
4845       // memcopy dependency data.
4846       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4847           ElSize,
4848           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4849       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4850       Address DepAddr =
4851           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4852                                         DependenciesArray.getPointer(), Pos),
4853                   DependenciesArray.getAlignment());
4854       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4855 
4856       // Increase pos.
4857       // pos += size;
4858       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4859       CGF.EmitStoreOfScalar(Add, PosLVal);
4860     }
4861   }
4862 }
4863 
4864 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4865     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4866     SourceLocation Loc) {
4867   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4868         return D.DepExprs.empty();
4869       }))
4870     return std::make_pair(nullptr, Address::invalid());
4871   // Process list of dependencies.
4872   ASTContext &C = CGM.getContext();
4873   Address DependenciesArray = Address::invalid();
4874   llvm::Value *NumOfElements = nullptr;
4875   unsigned NumDependencies = std::accumulate(
4876       Dependencies.begin(), Dependencies.end(), 0,
4877       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4878         return D.DepKind == OMPC_DEPEND_depobj
4879                    ? V
4880                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4881       });
4882   QualType FlagsTy;
4883   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4884   bool HasDepobjDeps = false;
4885   bool HasRegularWithIterators = false;
4886   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4887   llvm::Value *NumOfRegularWithIterators =
4888       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4889   // Calculate number of depobj dependecies and regular deps with the iterators.
4890   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4891     if (D.DepKind == OMPC_DEPEND_depobj) {
4892       SmallVector<llvm::Value *, 4> Sizes =
4893           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4894       for (llvm::Value *Size : Sizes) {
4895         NumOfDepobjElements =
4896             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4897       }
4898       HasDepobjDeps = true;
4899       continue;
4900     }
4901     // Include number of iterations, if any.
4902 
4903     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4904       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4905         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4906         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4907         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4908             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4909         NumOfRegularWithIterators =
4910             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4911       }
4912       HasRegularWithIterators = true;
4913       continue;
4914     }
4915   }
4916 
4917   QualType KmpDependInfoArrayTy;
4918   if (HasDepobjDeps || HasRegularWithIterators) {
4919     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4920                                            /*isSigned=*/false);
4921     if (HasDepobjDeps) {
4922       NumOfElements =
4923           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4924     }
4925     if (HasRegularWithIterators) {
4926       NumOfElements =
4927           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4928     }
4929     auto *OVE = new (C) OpaqueValueExpr(
4930         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4931         VK_PRValue);
4932     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4933                                                   RValue::get(NumOfElements));
4934     KmpDependInfoArrayTy =
4935         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4936                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4937     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4938     // Properly emit variable-sized array.
4939     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4940                                          ImplicitParamDecl::Other);
4941     CGF.EmitVarDecl(*PD);
4942     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4943     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4944                                               /*isSigned=*/false);
4945   } else {
4946     KmpDependInfoArrayTy = C.getConstantArrayType(
4947         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4948         ArrayType::Normal, /*IndexTypeQuals=*/0);
4949     DependenciesArray =
4950         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4951     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4952     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4953                                            /*isSigned=*/false);
4954   }
4955   unsigned Pos = 0;
4956   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4957     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4958         Dependencies[I].IteratorExpr)
4959       continue;
4960     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4961                    DependenciesArray);
4962   }
4963   // Copy regular dependecies with iterators.
4964   LValue PosLVal = CGF.MakeAddrLValue(
4965       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4966   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4967   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4968     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4969         !Dependencies[I].IteratorExpr)
4970       continue;
4971     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4972                    DependenciesArray);
4973   }
4974   // Copy final depobj arrays without iterators.
4975   if (HasDepobjDeps) {
4976     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4977       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4978         continue;
4979       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4980                          DependenciesArray);
4981     }
4982   }
4983   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4984       DependenciesArray, CGF.VoidPtrTy);
4985   return std::make_pair(NumOfElements, DependenciesArray);
4986 }
4987 
4988 Address CGOpenMPRuntime::emitDepobjDependClause(
4989     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4990     SourceLocation Loc) {
4991   if (Dependencies.DepExprs.empty())
4992     return Address::invalid();
4993   // Process list of dependencies.
4994   ASTContext &C = CGM.getContext();
4995   Address DependenciesArray = Address::invalid();
4996   unsigned NumDependencies = Dependencies.DepExprs.size();
4997   QualType FlagsTy;
4998   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4999   RecordDecl *KmpDependInfoRD =
5000       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5001 
5002   llvm::Value *Size;
5003   // Define type kmp_depend_info[<Dependencies.size()>];
5004   // For depobj reserve one extra element to store the number of elements.
5005   // It is required to handle depobj(x) update(in) construct.
5006   // kmp_depend_info[<Dependencies.size()>] deps;
5007   llvm::Value *NumDepsVal;
5008   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5009   if (const auto *IE =
5010           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5011     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5012     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5013       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5014       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5015       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5016     }
5017     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5018                                     NumDepsVal);
5019     CharUnits SizeInBytes =
5020         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5021     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5022     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5023     NumDepsVal =
5024         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5025   } else {
5026     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5027         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5028         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5029     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5030     Size = CGM.getSize(Sz.alignTo(Align));
5031     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5032   }
5033   // Need to allocate on the dynamic memory.
5034   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5035   // Use default allocator.
5036   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5037   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5038 
5039   llvm::Value *Addr =
5040       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5041                               CGM.getModule(), OMPRTL___kmpc_alloc),
5042                           Args, ".dep.arr.addr");
5043   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5044       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5045   DependenciesArray = Address(Addr, Align);
5046   // Write number of elements in the first element of array for depobj.
5047   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5048   // deps[i].base_addr = NumDependencies;
5049   LValue BaseAddrLVal = CGF.EmitLValueForField(
5050       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5051   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5052   llvm::PointerUnion<unsigned *, LValue *> Pos;
5053   unsigned Idx = 1;
5054   LValue PosLVal;
5055   if (Dependencies.IteratorExpr) {
5056     PosLVal = CGF.MakeAddrLValue(
5057         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5058         C.getSizeType());
5059     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5060                           /*IsInit=*/true);
5061     Pos = &PosLVal;
5062   } else {
5063     Pos = &Idx;
5064   }
5065   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5066   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5067       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5068   return DependenciesArray;
5069 }
5070 
5071 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5072                                         SourceLocation Loc) {
5073   ASTContext &C = CGM.getContext();
5074   QualType FlagsTy;
5075   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5076   LValue Base = CGF.EmitLoadOfPointerLValue(
5077       DepobjLVal.getAddress(CGF),
5078       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5079   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5080   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5081       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5082   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5083       Addr.getElementType(), Addr.getPointer(),
5084       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5085   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5086                                                                CGF.VoidPtrTy);
5087   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5088   // Use default allocator.
5089   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5090   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5091 
5092   // _kmpc_free(gtid, addr, nullptr);
5093   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5094                                 CGM.getModule(), OMPRTL___kmpc_free),
5095                             Args);
5096 }
5097 
5098 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5099                                        OpenMPDependClauseKind NewDepKind,
5100                                        SourceLocation Loc) {
5101   ASTContext &C = CGM.getContext();
5102   QualType FlagsTy;
5103   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5104   RecordDecl *KmpDependInfoRD =
5105       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5106   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5107   llvm::Value *NumDeps;
5108   LValue Base;
5109   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5110 
5111   Address Begin = Base.getAddress(CGF);
5112   // Cast from pointer to array type to pointer to single element.
5113   llvm::Value *End = CGF.Builder.CreateGEP(
5114       Begin.getElementType(), Begin.getPointer(), NumDeps);
5115   // The basic structure here is a while-do loop.
5116   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5117   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5118   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5119   CGF.EmitBlock(BodyBB);
5120   llvm::PHINode *ElementPHI =
5121       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5122   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5123   Begin = Address(ElementPHI, Begin.getAlignment());
5124   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5125                             Base.getTBAAInfo());
5126   // deps[i].flags = NewDepKind;
5127   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5128   LValue FlagsLVal = CGF.EmitLValueForField(
5129       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5130   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5131                         FlagsLVal);
5132 
5133   // Shift the address forward by one element.
5134   Address ElementNext =
5135       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5136   ElementPHI->addIncoming(ElementNext.getPointer(),
5137                           CGF.Builder.GetInsertBlock());
5138   llvm::Value *IsEmpty =
5139       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5140   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5141   // Done.
5142   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5143 }
5144 
5145 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5146                                    const OMPExecutableDirective &D,
5147                                    llvm::Function *TaskFunction,
5148                                    QualType SharedsTy, Address Shareds,
5149                                    const Expr *IfCond,
5150                                    const OMPTaskDataTy &Data) {
5151   if (!CGF.HaveInsertPoint())
5152     return;
5153 
5154   TaskResultTy Result =
5155       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5156   llvm::Value *NewTask = Result.NewTask;
5157   llvm::Function *TaskEntry = Result.TaskEntry;
5158   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5159   LValue TDBase = Result.TDBase;
5160   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5161   // Process list of dependences.
5162   Address DependenciesArray = Address::invalid();
5163   llvm::Value *NumOfElements;
5164   std::tie(NumOfElements, DependenciesArray) =
5165       emitDependClause(CGF, Data.Dependences, Loc);
5166 
5167   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5168   // libcall.
5169   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5170   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5171   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5172   // list is not empty
5173   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5174   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5175   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5176   llvm::Value *DepTaskArgs[7];
5177   if (!Data.Dependences.empty()) {
5178     DepTaskArgs[0] = UpLoc;
5179     DepTaskArgs[1] = ThreadID;
5180     DepTaskArgs[2] = NewTask;
5181     DepTaskArgs[3] = NumOfElements;
5182     DepTaskArgs[4] = DependenciesArray.getPointer();
5183     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5184     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5185   }
5186   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5187                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5188     if (!Data.Tied) {
5189       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5190       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5191       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5192     }
5193     if (!Data.Dependences.empty()) {
5194       CGF.EmitRuntimeCall(
5195           OMPBuilder.getOrCreateRuntimeFunction(
5196               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5197           DepTaskArgs);
5198     } else {
5199       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5200                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5201                           TaskArgs);
5202     }
5203     // Check if parent region is untied and build return for untied task;
5204     if (auto *Region =
5205             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5206       Region->emitUntiedSwitch(CGF);
5207   };
5208 
5209   llvm::Value *DepWaitTaskArgs[6];
5210   if (!Data.Dependences.empty()) {
5211     DepWaitTaskArgs[0] = UpLoc;
5212     DepWaitTaskArgs[1] = ThreadID;
5213     DepWaitTaskArgs[2] = NumOfElements;
5214     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5215     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5216     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5217   }
5218   auto &M = CGM.getModule();
5219   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5220                         TaskEntry, &Data, &DepWaitTaskArgs,
5221                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5222     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5223     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5224     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5225     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5226     // is specified.
5227     if (!Data.Dependences.empty())
5228       CGF.EmitRuntimeCall(
5229           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5230           DepWaitTaskArgs);
5231     // Call proxy_task_entry(gtid, new_task);
5232     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5233                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5234       Action.Enter(CGF);
5235       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5236       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5237                                                           OutlinedFnArgs);
5238     };
5239 
5240     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5241     // kmp_task_t *new_task);
5242     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5243     // kmp_task_t *new_task);
5244     RegionCodeGenTy RCG(CodeGen);
5245     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5246                               M, OMPRTL___kmpc_omp_task_begin_if0),
5247                           TaskArgs,
5248                           OMPBuilder.getOrCreateRuntimeFunction(
5249                               M, OMPRTL___kmpc_omp_task_complete_if0),
5250                           TaskArgs);
5251     RCG.setAction(Action);
5252     RCG(CGF);
5253   };
5254 
5255   if (IfCond) {
5256     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5257   } else {
5258     RegionCodeGenTy ThenRCG(ThenCodeGen);
5259     ThenRCG(CGF);
5260   }
5261 }
5262 
5263 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5264                                        const OMPLoopDirective &D,
5265                                        llvm::Function *TaskFunction,
5266                                        QualType SharedsTy, Address Shareds,
5267                                        const Expr *IfCond,
5268                                        const OMPTaskDataTy &Data) {
5269   if (!CGF.HaveInsertPoint())
5270     return;
5271   TaskResultTy Result =
5272       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5273   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5274   // libcall.
5275   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5276   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5277   // sched, kmp_uint64 grainsize, void *task_dup);
5278   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5279   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5280   llvm::Value *IfVal;
5281   if (IfCond) {
5282     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5283                                       /*isSigned=*/true);
5284   } else {
5285     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5286   }
5287 
5288   LValue LBLVal = CGF.EmitLValueForField(
5289       Result.TDBase,
5290       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5291   const auto *LBVar =
5292       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5293   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5294                        LBLVal.getQuals(),
5295                        /*IsInitializer=*/true);
5296   LValue UBLVal = CGF.EmitLValueForField(
5297       Result.TDBase,
5298       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5299   const auto *UBVar =
5300       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5301   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5302                        UBLVal.getQuals(),
5303                        /*IsInitializer=*/true);
5304   LValue StLVal = CGF.EmitLValueForField(
5305       Result.TDBase,
5306       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5307   const auto *StVar =
5308       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5309   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5310                        StLVal.getQuals(),
5311                        /*IsInitializer=*/true);
5312   // Store reductions address.
5313   LValue RedLVal = CGF.EmitLValueForField(
5314       Result.TDBase,
5315       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5316   if (Data.Reductions) {
5317     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5318   } else {
5319     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5320                                CGF.getContext().VoidPtrTy);
5321   }
5322   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5323   llvm::Value *TaskArgs[] = {
5324       UpLoc,
5325       ThreadID,
5326       Result.NewTask,
5327       IfVal,
5328       LBLVal.getPointer(CGF),
5329       UBLVal.getPointer(CGF),
5330       CGF.EmitLoadOfScalar(StLVal, Loc),
5331       llvm::ConstantInt::getSigned(
5332           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5333       llvm::ConstantInt::getSigned(
5334           CGF.IntTy, Data.Schedule.getPointer()
5335                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5336                          : NoSchedule),
5337       Data.Schedule.getPointer()
5338           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5339                                       /*isSigned=*/false)
5340           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5341       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5342                              Result.TaskDupFn, CGF.VoidPtrTy)
5343                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5344   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5345                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5346                       TaskArgs);
5347 }
5348 
5349 /// Emit reduction operation for each element of array (required for
5350 /// array sections) LHS op = RHS.
5351 /// \param Type Type of array.
5352 /// \param LHSVar Variable on the left side of the reduction operation
5353 /// (references element of array in original variable).
5354 /// \param RHSVar Variable on the right side of the reduction operation
5355 /// (references element of array in original variable).
5356 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5357 /// RHSVar.
5358 static void EmitOMPAggregateReduction(
5359     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5360     const VarDecl *RHSVar,
5361     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5362                                   const Expr *, const Expr *)> &RedOpGen,
5363     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5364     const Expr *UpExpr = nullptr) {
5365   // Perform element-by-element initialization.
5366   QualType ElementTy;
5367   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5368   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5369 
5370   // Drill down to the base element type on both arrays.
5371   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5372   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5373 
5374   llvm::Value *RHSBegin = RHSAddr.getPointer();
5375   llvm::Value *LHSBegin = LHSAddr.getPointer();
5376   // Cast from pointer to array type to pointer to single element.
5377   llvm::Value *LHSEnd =
5378       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5379   // The basic structure here is a while-do loop.
5380   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5381   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5382   llvm::Value *IsEmpty =
5383       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5384   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5385 
5386   // Enter the loop body, making that address the current address.
5387   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5388   CGF.EmitBlock(BodyBB);
5389 
5390   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5391 
5392   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5393       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5394   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5395   Address RHSElementCurrent =
5396       Address(RHSElementPHI,
5397               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5398 
5399   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5400       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5401   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5402   Address LHSElementCurrent =
5403       Address(LHSElementPHI,
5404               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5405 
5406   // Emit copy.
5407   CodeGenFunction::OMPPrivateScope Scope(CGF);
5408   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5409   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5410   Scope.Privatize();
5411   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5412   Scope.ForceCleanup();
5413 
5414   // Shift the address forward by one element.
5415   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5416       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5417       "omp.arraycpy.dest.element");
5418   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5419       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5420       "omp.arraycpy.src.element");
5421   // Check whether we've reached the end.
5422   llvm::Value *Done =
5423       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5424   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5425   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5426   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5427 
5428   // Done.
5429   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5430 }
5431 
5432 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5433 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5434 /// UDR combiner function.
5435 static void emitReductionCombiner(CodeGenFunction &CGF,
5436                                   const Expr *ReductionOp) {
5437   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5438     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5439       if (const auto *DRE =
5440               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5441         if (const auto *DRD =
5442                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5443           std::pair<llvm::Function *, llvm::Function *> Reduction =
5444               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5445           RValue Func = RValue::get(Reduction.first);
5446           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5447           CGF.EmitIgnoredExpr(ReductionOp);
5448           return;
5449         }
5450   CGF.EmitIgnoredExpr(ReductionOp);
5451 }
5452 
5453 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5454     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5455     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5456     ArrayRef<const Expr *> ReductionOps) {
5457   ASTContext &C = CGM.getContext();
5458 
5459   // void reduction_func(void *LHSArg, void *RHSArg);
5460   FunctionArgList Args;
5461   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5462                            ImplicitParamDecl::Other);
5463   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5464                            ImplicitParamDecl::Other);
5465   Args.push_back(&LHSArg);
5466   Args.push_back(&RHSArg);
5467   const auto &CGFI =
5468       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5469   std::string Name = getName({"omp", "reduction", "reduction_func"});
5470   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5471                                     llvm::GlobalValue::InternalLinkage, Name,
5472                                     &CGM.getModule());
5473   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5474   Fn->setDoesNotRecurse();
5475   CodeGenFunction CGF(CGM);
5476   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5477 
5478   // Dst = (void*[n])(LHSArg);
5479   // Src = (void*[n])(RHSArg);
5480   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5481       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5482       ArgsType), CGF.getPointerAlign());
5483   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5484       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5485       ArgsType), CGF.getPointerAlign());
5486 
5487   //  ...
5488   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5489   //  ...
5490   CodeGenFunction::OMPPrivateScope Scope(CGF);
5491   auto IPriv = Privates.begin();
5492   unsigned Idx = 0;
5493   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5494     const auto *RHSVar =
5495         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5496     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5497       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5498     });
5499     const auto *LHSVar =
5500         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5501     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5502       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5503     });
5504     QualType PrivTy = (*IPriv)->getType();
5505     if (PrivTy->isVariablyModifiedType()) {
5506       // Get array size and emit VLA type.
5507       ++Idx;
5508       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5509       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5510       const VariableArrayType *VLA =
5511           CGF.getContext().getAsVariableArrayType(PrivTy);
5512       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5513       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5514           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5515       CGF.EmitVariablyModifiedType(PrivTy);
5516     }
5517   }
5518   Scope.Privatize();
5519   IPriv = Privates.begin();
5520   auto ILHS = LHSExprs.begin();
5521   auto IRHS = RHSExprs.begin();
5522   for (const Expr *E : ReductionOps) {
5523     if ((*IPriv)->getType()->isArrayType()) {
5524       // Emit reduction for array section.
5525       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5526       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5527       EmitOMPAggregateReduction(
5528           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5529           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5530             emitReductionCombiner(CGF, E);
5531           });
5532     } else {
5533       // Emit reduction for array subscript or single variable.
5534       emitReductionCombiner(CGF, E);
5535     }
5536     ++IPriv;
5537     ++ILHS;
5538     ++IRHS;
5539   }
5540   Scope.ForceCleanup();
5541   CGF.FinishFunction();
5542   return Fn;
5543 }
5544 
5545 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5546                                                   const Expr *ReductionOp,
5547                                                   const Expr *PrivateRef,
5548                                                   const DeclRefExpr *LHS,
5549                                                   const DeclRefExpr *RHS) {
5550   if (PrivateRef->getType()->isArrayType()) {
5551     // Emit reduction for array section.
5552     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5553     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5554     EmitOMPAggregateReduction(
5555         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5556         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5557           emitReductionCombiner(CGF, ReductionOp);
5558         });
5559   } else {
5560     // Emit reduction for array subscript or single variable.
5561     emitReductionCombiner(CGF, ReductionOp);
5562   }
5563 }
5564 
5565 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5566                                     ArrayRef<const Expr *> Privates,
5567                                     ArrayRef<const Expr *> LHSExprs,
5568                                     ArrayRef<const Expr *> RHSExprs,
5569                                     ArrayRef<const Expr *> ReductionOps,
5570                                     ReductionOptionsTy Options) {
5571   if (!CGF.HaveInsertPoint())
5572     return;
5573 
5574   bool WithNowait = Options.WithNowait;
5575   bool SimpleReduction = Options.SimpleReduction;
5576 
5577   // Next code should be emitted for reduction:
5578   //
5579   // static kmp_critical_name lock = { 0 };
5580   //
5581   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5582   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5583   //  ...
5584   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5585   //  *(Type<n>-1*)rhs[<n>-1]);
5586   // }
5587   //
5588   // ...
5589   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5590   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5591   // RedList, reduce_func, &<lock>)) {
5592   // case 1:
5593   //  ...
5594   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5595   //  ...
5596   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5597   // break;
5598   // case 2:
5599   //  ...
5600   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5601   //  ...
5602   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5603   // break;
5604   // default:;
5605   // }
5606   //
5607   // if SimpleReduction is true, only the next code is generated:
5608   //  ...
5609   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5610   //  ...
5611 
5612   ASTContext &C = CGM.getContext();
5613 
5614   if (SimpleReduction) {
5615     CodeGenFunction::RunCleanupsScope Scope(CGF);
5616     auto IPriv = Privates.begin();
5617     auto ILHS = LHSExprs.begin();
5618     auto IRHS = RHSExprs.begin();
5619     for (const Expr *E : ReductionOps) {
5620       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5621                                   cast<DeclRefExpr>(*IRHS));
5622       ++IPriv;
5623       ++ILHS;
5624       ++IRHS;
5625     }
5626     return;
5627   }
5628 
5629   // 1. Build a list of reduction variables.
5630   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5631   auto Size = RHSExprs.size();
5632   for (const Expr *E : Privates) {
5633     if (E->getType()->isVariablyModifiedType())
5634       // Reserve place for array size.
5635       ++Size;
5636   }
5637   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5638   QualType ReductionArrayTy =
5639       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5640                              /*IndexTypeQuals=*/0);
5641   Address ReductionList =
5642       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5643   auto IPriv = Privates.begin();
5644   unsigned Idx = 0;
5645   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5646     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5647     CGF.Builder.CreateStore(
5648         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5649             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5650         Elem);
5651     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5652       // Store array size.
5653       ++Idx;
5654       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5655       llvm::Value *Size = CGF.Builder.CreateIntCast(
5656           CGF.getVLASize(
5657                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5658               .NumElts,
5659           CGF.SizeTy, /*isSigned=*/false);
5660       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5661                               Elem);
5662     }
5663   }
5664 
5665   // 2. Emit reduce_func().
5666   llvm::Function *ReductionFn = emitReductionFunction(
5667       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5668       LHSExprs, RHSExprs, ReductionOps);
5669 
5670   // 3. Create static kmp_critical_name lock = { 0 };
5671   std::string Name = getName({"reduction"});
5672   llvm::Value *Lock = getCriticalRegionLock(Name);
5673 
5674   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5675   // RedList, reduce_func, &<lock>);
5676   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5677   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5678   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5679   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5680       ReductionList.getPointer(), CGF.VoidPtrTy);
5681   llvm::Value *Args[] = {
5682       IdentTLoc,                             // ident_t *<loc>
5683       ThreadId,                              // i32 <gtid>
5684       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5685       ReductionArrayTySize,                  // size_type sizeof(RedList)
5686       RL,                                    // void *RedList
5687       ReductionFn, // void (*) (void *, void *) <reduce_func>
5688       Lock         // kmp_critical_name *&<lock>
5689   };
5690   llvm::Value *Res = CGF.EmitRuntimeCall(
5691       OMPBuilder.getOrCreateRuntimeFunction(
5692           CGM.getModule(),
5693           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5694       Args);
5695 
5696   // 5. Build switch(res)
5697   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5698   llvm::SwitchInst *SwInst =
5699       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5700 
5701   // 6. Build case 1:
5702   //  ...
5703   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5704   //  ...
5705   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5706   // break;
5707   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5708   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5709   CGF.EmitBlock(Case1BB);
5710 
5711   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5712   llvm::Value *EndArgs[] = {
5713       IdentTLoc, // ident_t *<loc>
5714       ThreadId,  // i32 <gtid>
5715       Lock       // kmp_critical_name *&<lock>
5716   };
5717   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5718                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5719     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5720     auto IPriv = Privates.begin();
5721     auto ILHS = LHSExprs.begin();
5722     auto IRHS = RHSExprs.begin();
5723     for (const Expr *E : ReductionOps) {
5724       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5725                                      cast<DeclRefExpr>(*IRHS));
5726       ++IPriv;
5727       ++ILHS;
5728       ++IRHS;
5729     }
5730   };
5731   RegionCodeGenTy RCG(CodeGen);
5732   CommonActionTy Action(
5733       nullptr, llvm::None,
5734       OMPBuilder.getOrCreateRuntimeFunction(
5735           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5736                                       : OMPRTL___kmpc_end_reduce),
5737       EndArgs);
5738   RCG.setAction(Action);
5739   RCG(CGF);
5740 
5741   CGF.EmitBranch(DefaultBB);
5742 
5743   // 7. Build case 2:
5744   //  ...
5745   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5746   //  ...
5747   // break;
5748   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5749   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5750   CGF.EmitBlock(Case2BB);
5751 
5752   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5753                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5754     auto ILHS = LHSExprs.begin();
5755     auto IRHS = RHSExprs.begin();
5756     auto IPriv = Privates.begin();
5757     for (const Expr *E : ReductionOps) {
5758       const Expr *XExpr = nullptr;
5759       const Expr *EExpr = nullptr;
5760       const Expr *UpExpr = nullptr;
5761       BinaryOperatorKind BO = BO_Comma;
5762       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5763         if (BO->getOpcode() == BO_Assign) {
5764           XExpr = BO->getLHS();
5765           UpExpr = BO->getRHS();
5766         }
5767       }
5768       // Try to emit update expression as a simple atomic.
5769       const Expr *RHSExpr = UpExpr;
5770       if (RHSExpr) {
5771         // Analyze RHS part of the whole expression.
5772         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5773                 RHSExpr->IgnoreParenImpCasts())) {
5774           // If this is a conditional operator, analyze its condition for
5775           // min/max reduction operator.
5776           RHSExpr = ACO->getCond();
5777         }
5778         if (const auto *BORHS =
5779                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5780           EExpr = BORHS->getRHS();
5781           BO = BORHS->getOpcode();
5782         }
5783       }
5784       if (XExpr) {
5785         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5786         auto &&AtomicRedGen = [BO, VD,
5787                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5788                                     const Expr *EExpr, const Expr *UpExpr) {
5789           LValue X = CGF.EmitLValue(XExpr);
5790           RValue E;
5791           if (EExpr)
5792             E = CGF.EmitAnyExpr(EExpr);
5793           CGF.EmitOMPAtomicSimpleUpdateExpr(
5794               X, E, BO, /*IsXLHSInRHSPart=*/true,
5795               llvm::AtomicOrdering::Monotonic, Loc,
5796               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5797                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5798                 PrivateScope.addPrivate(
5799                     VD, [&CGF, VD, XRValue, Loc]() {
5800                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5801                       CGF.emitOMPSimpleStore(
5802                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5803                           VD->getType().getNonReferenceType(), Loc);
5804                       return LHSTemp;
5805                     });
5806                 (void)PrivateScope.Privatize();
5807                 return CGF.EmitAnyExpr(UpExpr);
5808               });
5809         };
5810         if ((*IPriv)->getType()->isArrayType()) {
5811           // Emit atomic reduction for array section.
5812           const auto *RHSVar =
5813               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5814           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5815                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5816         } else {
5817           // Emit atomic reduction for array subscript or single variable.
5818           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5819         }
5820       } else {
5821         // Emit as a critical region.
5822         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5823                                            const Expr *, const Expr *) {
5824           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5825           std::string Name = RT.getName({"atomic_reduction"});
5826           RT.emitCriticalRegion(
5827               CGF, Name,
5828               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5829                 Action.Enter(CGF);
5830                 emitReductionCombiner(CGF, E);
5831               },
5832               Loc);
5833         };
5834         if ((*IPriv)->getType()->isArrayType()) {
5835           const auto *LHSVar =
5836               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5837           const auto *RHSVar =
5838               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5839           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5840                                     CritRedGen);
5841         } else {
5842           CritRedGen(CGF, nullptr, nullptr, nullptr);
5843         }
5844       }
5845       ++ILHS;
5846       ++IRHS;
5847       ++IPriv;
5848     }
5849   };
5850   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5851   if (!WithNowait) {
5852     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5853     llvm::Value *EndArgs[] = {
5854         IdentTLoc, // ident_t *<loc>
5855         ThreadId,  // i32 <gtid>
5856         Lock       // kmp_critical_name *&<lock>
5857     };
5858     CommonActionTy Action(nullptr, llvm::None,
5859                           OMPBuilder.getOrCreateRuntimeFunction(
5860                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5861                           EndArgs);
5862     AtomicRCG.setAction(Action);
5863     AtomicRCG(CGF);
5864   } else {
5865     AtomicRCG(CGF);
5866   }
5867 
5868   CGF.EmitBranch(DefaultBB);
5869   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5870 }
5871 
5872 /// Generates unique name for artificial threadprivate variables.
5873 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5874 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5875                                       const Expr *Ref) {
5876   SmallString<256> Buffer;
5877   llvm::raw_svector_ostream Out(Buffer);
5878   const clang::DeclRefExpr *DE;
5879   const VarDecl *D = ::getBaseDecl(Ref, DE);
5880   if (!D)
5881     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5882   D = D->getCanonicalDecl();
5883   std::string Name = CGM.getOpenMPRuntime().getName(
5884       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5885   Out << Prefix << Name << "_"
5886       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5887   return std::string(Out.str());
5888 }
5889 
5890 /// Emits reduction initializer function:
5891 /// \code
5892 /// void @.red_init(void* %arg, void* %orig) {
5893 /// %0 = bitcast void* %arg to <type>*
5894 /// store <type> <init>, <type>* %0
5895 /// ret void
5896 /// }
5897 /// \endcode
5898 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5899                                            SourceLocation Loc,
5900                                            ReductionCodeGen &RCG, unsigned N) {
5901   ASTContext &C = CGM.getContext();
5902   QualType VoidPtrTy = C.VoidPtrTy;
5903   VoidPtrTy.addRestrict();
5904   FunctionArgList Args;
5905   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5906                           ImplicitParamDecl::Other);
5907   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5908                               ImplicitParamDecl::Other);
5909   Args.emplace_back(&Param);
5910   Args.emplace_back(&ParamOrig);
5911   const auto &FnInfo =
5912       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5913   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5914   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5915   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5916                                     Name, &CGM.getModule());
5917   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5918   Fn->setDoesNotRecurse();
5919   CodeGenFunction CGF(CGM);
5920   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5921   Address PrivateAddr = CGF.EmitLoadOfPointer(
5922       CGF.GetAddrOfLocalVar(&Param),
5923       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5924   llvm::Value *Size = nullptr;
5925   // If the size of the reduction item is non-constant, load it from global
5926   // threadprivate variable.
5927   if (RCG.getSizes(N).second) {
5928     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5929         CGF, CGM.getContext().getSizeType(),
5930         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5931     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5932                                 CGM.getContext().getSizeType(), Loc);
5933   }
5934   RCG.emitAggregateType(CGF, N, Size);
5935   LValue OrigLVal;
5936   // If initializer uses initializer from declare reduction construct, emit a
5937   // pointer to the address of the original reduction item (reuired by reduction
5938   // initializer)
5939   if (RCG.usesReductionInitializer(N)) {
5940     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5941     SharedAddr = CGF.EmitLoadOfPointer(
5942         SharedAddr,
5943         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5944     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5945   } else {
5946     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5947         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5948         CGM.getContext().VoidTy);
5949   }
5950   // Emit the initializer:
5951   // %0 = bitcast void* %arg to <type>*
5952   // store <type> <init>, <type>* %0
5953   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5954                          [](CodeGenFunction &) { return false; });
5955   CGF.FinishFunction();
5956   return Fn;
5957 }
5958 
5959 /// Emits reduction combiner function:
5960 /// \code
5961 /// void @.red_comb(void* %arg0, void* %arg1) {
5962 /// %lhs = bitcast void* %arg0 to <type>*
5963 /// %rhs = bitcast void* %arg1 to <type>*
5964 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5965 /// store <type> %2, <type>* %lhs
5966 /// ret void
5967 /// }
5968 /// \endcode
5969 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5970                                            SourceLocation Loc,
5971                                            ReductionCodeGen &RCG, unsigned N,
5972                                            const Expr *ReductionOp,
5973                                            const Expr *LHS, const Expr *RHS,
5974                                            const Expr *PrivateRef) {
5975   ASTContext &C = CGM.getContext();
5976   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5977   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5978   FunctionArgList Args;
5979   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5980                                C.VoidPtrTy, ImplicitParamDecl::Other);
5981   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5982                             ImplicitParamDecl::Other);
5983   Args.emplace_back(&ParamInOut);
5984   Args.emplace_back(&ParamIn);
5985   const auto &FnInfo =
5986       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5987   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5988   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5989   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5990                                     Name, &CGM.getModule());
5991   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5992   Fn->setDoesNotRecurse();
5993   CodeGenFunction CGF(CGM);
5994   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5995   llvm::Value *Size = nullptr;
5996   // If the size of the reduction item is non-constant, load it from global
5997   // threadprivate variable.
5998   if (RCG.getSizes(N).second) {
5999     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6000         CGF, CGM.getContext().getSizeType(),
6001         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6002     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6003                                 CGM.getContext().getSizeType(), Loc);
6004   }
6005   RCG.emitAggregateType(CGF, N, Size);
6006   // Remap lhs and rhs variables to the addresses of the function arguments.
6007   // %lhs = bitcast void* %arg0 to <type>*
6008   // %rhs = bitcast void* %arg1 to <type>*
6009   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6010   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6011     // Pull out the pointer to the variable.
6012     Address PtrAddr = CGF.EmitLoadOfPointer(
6013         CGF.GetAddrOfLocalVar(&ParamInOut),
6014         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6015     return CGF.Builder.CreateElementBitCast(
6016         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6017   });
6018   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6019     // Pull out the pointer to the variable.
6020     Address PtrAddr = CGF.EmitLoadOfPointer(
6021         CGF.GetAddrOfLocalVar(&ParamIn),
6022         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6023     return CGF.Builder.CreateElementBitCast(
6024         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6025   });
6026   PrivateScope.Privatize();
6027   // Emit the combiner body:
6028   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6029   // store <type> %2, <type>* %lhs
6030   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6031       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6032       cast<DeclRefExpr>(RHS));
6033   CGF.FinishFunction();
6034   return Fn;
6035 }
6036 
6037 /// Emits reduction finalizer function:
6038 /// \code
6039 /// void @.red_fini(void* %arg) {
6040 /// %0 = bitcast void* %arg to <type>*
6041 /// <destroy>(<type>* %0)
6042 /// ret void
6043 /// }
6044 /// \endcode
6045 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6046                                            SourceLocation Loc,
6047                                            ReductionCodeGen &RCG, unsigned N) {
6048   if (!RCG.needCleanups(N))
6049     return nullptr;
6050   ASTContext &C = CGM.getContext();
6051   FunctionArgList Args;
6052   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6053                           ImplicitParamDecl::Other);
6054   Args.emplace_back(&Param);
6055   const auto &FnInfo =
6056       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6057   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6058   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6059   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6060                                     Name, &CGM.getModule());
6061   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6062   Fn->setDoesNotRecurse();
6063   CodeGenFunction CGF(CGM);
6064   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6065   Address PrivateAddr = CGF.EmitLoadOfPointer(
6066       CGF.GetAddrOfLocalVar(&Param),
6067       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6068   llvm::Value *Size = nullptr;
6069   // If the size of the reduction item is non-constant, load it from global
6070   // threadprivate variable.
6071   if (RCG.getSizes(N).second) {
6072     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6073         CGF, CGM.getContext().getSizeType(),
6074         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6075     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6076                                 CGM.getContext().getSizeType(), Loc);
6077   }
6078   RCG.emitAggregateType(CGF, N, Size);
6079   // Emit the finalizer body:
6080   // <destroy>(<type>* %0)
6081   RCG.emitCleanups(CGF, N, PrivateAddr);
6082   CGF.FinishFunction(Loc);
6083   return Fn;
6084 }
6085 
6086 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6087     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6088     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6089   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6090     return nullptr;
6091 
6092   // Build typedef struct:
6093   // kmp_taskred_input {
6094   //   void *reduce_shar; // shared reduction item
6095   //   void *reduce_orig; // original reduction item used for initialization
6096   //   size_t reduce_size; // size of data item
6097   //   void *reduce_init; // data initialization routine
6098   //   void *reduce_fini; // data finalization routine
6099   //   void *reduce_comb; // data combiner routine
6100   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6101   // } kmp_taskred_input_t;
6102   ASTContext &C = CGM.getContext();
6103   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6104   RD->startDefinition();
6105   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6106   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6107   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6108   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6109   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6110   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6111   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6112       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6113   RD->completeDefinition();
6114   QualType RDType = C.getRecordType(RD);
6115   unsigned Size = Data.ReductionVars.size();
6116   llvm::APInt ArraySize(/*numBits=*/64, Size);
6117   QualType ArrayRDType = C.getConstantArrayType(
6118       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6119   // kmp_task_red_input_t .rd_input.[Size];
6120   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6121   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6122                        Data.ReductionCopies, Data.ReductionOps);
6123   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6124     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6125     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6126                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6127     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6128         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6129         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6130         ".rd_input.gep.");
6131     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6132     // ElemLVal.reduce_shar = &Shareds[Cnt];
6133     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6134     RCG.emitSharedOrigLValue(CGF, Cnt);
6135     llvm::Value *CastedShared =
6136         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6137     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6138     // ElemLVal.reduce_orig = &Origs[Cnt];
6139     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6140     llvm::Value *CastedOrig =
6141         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6142     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6143     RCG.emitAggregateType(CGF, Cnt);
6144     llvm::Value *SizeValInChars;
6145     llvm::Value *SizeVal;
6146     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6147     // We use delayed creation/initialization for VLAs and array sections. It is
6148     // required because runtime does not provide the way to pass the sizes of
6149     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6150     // threadprivate global variables are used to store these values and use
6151     // them in the functions.
6152     bool DelayedCreation = !!SizeVal;
6153     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6154                                                /*isSigned=*/false);
6155     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6156     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6157     // ElemLVal.reduce_init = init;
6158     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6159     llvm::Value *InitAddr =
6160         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6161     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6162     // ElemLVal.reduce_fini = fini;
6163     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6164     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6165     llvm::Value *FiniAddr = Fini
6166                                 ? CGF.EmitCastToVoidPtr(Fini)
6167                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6168     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6169     // ElemLVal.reduce_comb = comb;
6170     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6171     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6172         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6173         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6174     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6175     // ElemLVal.flags = 0;
6176     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6177     if (DelayedCreation) {
6178       CGF.EmitStoreOfScalar(
6179           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6180           FlagsLVal);
6181     } else
6182       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6183                                  FlagsLVal.getType());
6184   }
6185   if (Data.IsReductionWithTaskMod) {
6186     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6187     // is_ws, int num, void *data);
6188     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6189     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6190                                                   CGM.IntTy, /*isSigned=*/true);
6191     llvm::Value *Args[] = {
6192         IdentTLoc, GTid,
6193         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6194                                /*isSigned=*/true),
6195         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6196         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6197             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6198     return CGF.EmitRuntimeCall(
6199         OMPBuilder.getOrCreateRuntimeFunction(
6200             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6201         Args);
6202   }
6203   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6204   llvm::Value *Args[] = {
6205       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6206                                 /*isSigned=*/true),
6207       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6208       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6209                                                       CGM.VoidPtrTy)};
6210   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6211                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6212                              Args);
6213 }
6214 
6215 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6216                                             SourceLocation Loc,
6217                                             bool IsWorksharingReduction) {
6218   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6219   // is_ws, int num, void *data);
6220   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6221   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6222                                                 CGM.IntTy, /*isSigned=*/true);
6223   llvm::Value *Args[] = {IdentTLoc, GTid,
6224                          llvm::ConstantInt::get(CGM.IntTy,
6225                                                 IsWorksharingReduction ? 1 : 0,
6226                                                 /*isSigned=*/true)};
6227   (void)CGF.EmitRuntimeCall(
6228       OMPBuilder.getOrCreateRuntimeFunction(
6229           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6230       Args);
6231 }
6232 
6233 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6234                                               SourceLocation Loc,
6235                                               ReductionCodeGen &RCG,
6236                                               unsigned N) {
6237   auto Sizes = RCG.getSizes(N);
6238   // Emit threadprivate global variable if the type is non-constant
6239   // (Sizes.second = nullptr).
6240   if (Sizes.second) {
6241     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6242                                                      /*isSigned=*/false);
6243     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6244         CGF, CGM.getContext().getSizeType(),
6245         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6246     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6247   }
6248 }
6249 
6250 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6251                                               SourceLocation Loc,
6252                                               llvm::Value *ReductionsPtr,
6253                                               LValue SharedLVal) {
6254   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6255   // *d);
6256   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6257                                                    CGM.IntTy,
6258                                                    /*isSigned=*/true),
6259                          ReductionsPtr,
6260                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6261                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6262   return Address(
6263       CGF.EmitRuntimeCall(
6264           OMPBuilder.getOrCreateRuntimeFunction(
6265               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6266           Args),
6267       SharedLVal.getAlignment());
6268 }
6269 
6270 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6271                                        const OMPTaskDataTy &Data) {
6272   if (!CGF.HaveInsertPoint())
6273     return;
6274 
6275   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6276     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6277     OMPBuilder.createTaskwait(CGF.Builder);
6278   } else {
6279     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6280     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6281     auto &M = CGM.getModule();
6282     Address DependenciesArray = Address::invalid();
6283     llvm::Value *NumOfElements;
6284     std::tie(NumOfElements, DependenciesArray) =
6285         emitDependClause(CGF, Data.Dependences, Loc);
6286     llvm::Value *DepWaitTaskArgs[6];
6287     if (!Data.Dependences.empty()) {
6288       DepWaitTaskArgs[0] = UpLoc;
6289       DepWaitTaskArgs[1] = ThreadID;
6290       DepWaitTaskArgs[2] = NumOfElements;
6291       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6292       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6293       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6294 
6295       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6296 
6297       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6298       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6299       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6300       // is specified.
6301       CGF.EmitRuntimeCall(
6302           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6303           DepWaitTaskArgs);
6304 
6305     } else {
6306 
6307       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6308       // global_tid);
6309       llvm::Value *Args[] = {UpLoc, ThreadID};
6310       // Ignore return result until untied tasks are supported.
6311       CGF.EmitRuntimeCall(
6312           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6313           Args);
6314     }
6315   }
6316 
6317   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6318     Region->emitUntiedSwitch(CGF);
6319 }
6320 
6321 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6322                                            OpenMPDirectiveKind InnerKind,
6323                                            const RegionCodeGenTy &CodeGen,
6324                                            bool HasCancel) {
6325   if (!CGF.HaveInsertPoint())
6326     return;
6327   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6328                                  InnerKind != OMPD_critical &&
6329                                      InnerKind != OMPD_master &&
6330                                      InnerKind != OMPD_masked);
6331   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6332 }
6333 
6334 namespace {
6335 enum RTCancelKind {
6336   CancelNoreq = 0,
6337   CancelParallel = 1,
6338   CancelLoop = 2,
6339   CancelSections = 3,
6340   CancelTaskgroup = 4
6341 };
6342 } // anonymous namespace
6343 
6344 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6345   RTCancelKind CancelKind = CancelNoreq;
6346   if (CancelRegion == OMPD_parallel)
6347     CancelKind = CancelParallel;
6348   else if (CancelRegion == OMPD_for)
6349     CancelKind = CancelLoop;
6350   else if (CancelRegion == OMPD_sections)
6351     CancelKind = CancelSections;
6352   else {
6353     assert(CancelRegion == OMPD_taskgroup);
6354     CancelKind = CancelTaskgroup;
6355   }
6356   return CancelKind;
6357 }
6358 
6359 void CGOpenMPRuntime::emitCancellationPointCall(
6360     CodeGenFunction &CGF, SourceLocation Loc,
6361     OpenMPDirectiveKind CancelRegion) {
6362   if (!CGF.HaveInsertPoint())
6363     return;
6364   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6365   // global_tid, kmp_int32 cncl_kind);
6366   if (auto *OMPRegionInfo =
6367           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6368     // For 'cancellation point taskgroup', the task region info may not have a
6369     // cancel. This may instead happen in another adjacent task.
6370     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6371       llvm::Value *Args[] = {
6372           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6373           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6374       // Ignore return result until untied tasks are supported.
6375       llvm::Value *Result = CGF.EmitRuntimeCall(
6376           OMPBuilder.getOrCreateRuntimeFunction(
6377               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6378           Args);
6379       // if (__kmpc_cancellationpoint()) {
6380       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6381       //   exit from construct;
6382       // }
6383       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6384       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6385       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6386       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6387       CGF.EmitBlock(ExitBB);
6388       if (CancelRegion == OMPD_parallel)
6389         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6390       // exit from construct;
6391       CodeGenFunction::JumpDest CancelDest =
6392           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6393       CGF.EmitBranchThroughCleanup(CancelDest);
6394       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6395     }
6396   }
6397 }
6398 
6399 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6400                                      const Expr *IfCond,
6401                                      OpenMPDirectiveKind CancelRegion) {
6402   if (!CGF.HaveInsertPoint())
6403     return;
6404   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6405   // kmp_int32 cncl_kind);
6406   auto &M = CGM.getModule();
6407   if (auto *OMPRegionInfo =
6408           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6409     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6410                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6411       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6412       llvm::Value *Args[] = {
6413           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6414           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6415       // Ignore return result until untied tasks are supported.
6416       llvm::Value *Result = CGF.EmitRuntimeCall(
6417           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6418       // if (__kmpc_cancel()) {
6419       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6420       //   exit from construct;
6421       // }
6422       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6423       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6424       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6425       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6426       CGF.EmitBlock(ExitBB);
6427       if (CancelRegion == OMPD_parallel)
6428         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6429       // exit from construct;
6430       CodeGenFunction::JumpDest CancelDest =
6431           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6432       CGF.EmitBranchThroughCleanup(CancelDest);
6433       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6434     };
6435     if (IfCond) {
6436       emitIfClause(CGF, IfCond, ThenGen,
6437                    [](CodeGenFunction &, PrePostActionTy &) {});
6438     } else {
6439       RegionCodeGenTy ThenRCG(ThenGen);
6440       ThenRCG(CGF);
6441     }
6442   }
6443 }
6444 
6445 namespace {
6446 /// Cleanup action for uses_allocators support.
6447 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6448   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6449 
6450 public:
6451   OMPUsesAllocatorsActionTy(
6452       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6453       : Allocators(Allocators) {}
6454   void Enter(CodeGenFunction &CGF) override {
6455     if (!CGF.HaveInsertPoint())
6456       return;
6457     for (const auto &AllocatorData : Allocators) {
6458       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6459           CGF, AllocatorData.first, AllocatorData.second);
6460     }
6461   }
6462   void Exit(CodeGenFunction &CGF) override {
6463     if (!CGF.HaveInsertPoint())
6464       return;
6465     for (const auto &AllocatorData : Allocators) {
6466       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6467                                                         AllocatorData.first);
6468     }
6469   }
6470 };
6471 } // namespace
6472 
6473 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6474     const OMPExecutableDirective &D, StringRef ParentName,
6475     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6476     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6477   assert(!ParentName.empty() && "Invalid target region parent name!");
6478   HasEmittedTargetRegion = true;
6479   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6480   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6481     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6482       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6483       if (!D.AllocatorTraits)
6484         continue;
6485       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6486     }
6487   }
6488   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6489   CodeGen.setAction(UsesAllocatorAction);
6490   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6491                                    IsOffloadEntry, CodeGen);
6492 }
6493 
6494 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6495                                              const Expr *Allocator,
6496                                              const Expr *AllocatorTraits) {
6497   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6498   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6499   // Use default memspace handle.
6500   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6501   llvm::Value *NumTraits = llvm::ConstantInt::get(
6502       CGF.IntTy, cast<ConstantArrayType>(
6503                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6504                      ->getSize()
6505                      .getLimitedValue());
6506   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6507   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6508       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6509   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6510                                            AllocatorTraitsLVal.getBaseInfo(),
6511                                            AllocatorTraitsLVal.getTBAAInfo());
6512   llvm::Value *Traits =
6513       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6514 
6515   llvm::Value *AllocatorVal =
6516       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6517                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6518                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6519   // Store to allocator.
6520   CGF.EmitVarDecl(*cast<VarDecl>(
6521       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6522   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6523   AllocatorVal =
6524       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6525                                Allocator->getType(), Allocator->getExprLoc());
6526   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6527 }
6528 
6529 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6530                                              const Expr *Allocator) {
6531   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6532   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6533   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6534   llvm::Value *AllocatorVal =
6535       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6536   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6537                                           CGF.getContext().VoidPtrTy,
6538                                           Allocator->getExprLoc());
6539   (void)CGF.EmitRuntimeCall(
6540       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6541                                             OMPRTL___kmpc_destroy_allocator),
6542       {ThreadId, AllocatorVal});
6543 }
6544 
6545 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6546     const OMPExecutableDirective &D, StringRef ParentName,
6547     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6548     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6549   // Create a unique name for the entry function using the source location
6550   // information of the current target region. The name will be something like:
6551   //
6552   // __omp_offloading_DD_FFFF_PP_lBB
6553   //
6554   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6555   // mangled name of the function that encloses the target region and BB is the
6556   // line number of the target region.
6557 
6558   unsigned DeviceID;
6559   unsigned FileID;
6560   unsigned Line;
6561   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6562                            Line);
6563   SmallString<64> EntryFnName;
6564   {
6565     llvm::raw_svector_ostream OS(EntryFnName);
6566     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6567        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6568   }
6569 
6570   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6571 
6572   CodeGenFunction CGF(CGM, true);
6573   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6574   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6575 
6576   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6577 
6578   // If this target outline function is not an offload entry, we don't need to
6579   // register it.
6580   if (!IsOffloadEntry)
6581     return;
6582 
6583   // The target region ID is used by the runtime library to identify the current
6584   // target region, so it only has to be unique and not necessarily point to
6585   // anything. It could be the pointer to the outlined function that implements
6586   // the target region, but we aren't using that so that the compiler doesn't
6587   // need to keep that, and could therefore inline the host function if proven
6588   // worthwhile during optimization. In the other hand, if emitting code for the
6589   // device, the ID has to be the function address so that it can retrieved from
6590   // the offloading entry and launched by the runtime library. We also mark the
6591   // outlined function to have external linkage in case we are emitting code for
6592   // the device, because these functions will be entry points to the device.
6593 
6594   if (CGM.getLangOpts().OpenMPIsDevice) {
6595     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6596     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6597     OutlinedFn->setDSOLocal(false);
6598     if (CGM.getTriple().isAMDGCN())
6599       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6600   } else {
6601     std::string Name = getName({EntryFnName, "region_id"});
6602     OutlinedFnID = new llvm::GlobalVariable(
6603         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6604         llvm::GlobalValue::WeakAnyLinkage,
6605         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6606   }
6607 
6608   // Register the information for the entry associated with this target region.
6609   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6610       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6611       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6612 
6613   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6614   int32_t DefaultValTeams = -1;
6615   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6616   if (DefaultValTeams > 0) {
6617     OutlinedFn->addFnAttr("omp_target_num_teams",
6618                           std::to_string(DefaultValTeams));
6619   }
6620   int32_t DefaultValThreads = -1;
6621   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6622   if (DefaultValThreads > 0) {
6623     OutlinedFn->addFnAttr("omp_target_thread_limit",
6624                           std::to_string(DefaultValThreads));
6625   }
6626 
6627   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6628 }
6629 
6630 /// Checks if the expression is constant or does not have non-trivial function
6631 /// calls.
6632 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6633   // We can skip constant expressions.
6634   // We can skip expressions with trivial calls or simple expressions.
6635   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6636           !E->hasNonTrivialCall(Ctx)) &&
6637          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6638 }
6639 
6640 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6641                                                     const Stmt *Body) {
6642   const Stmt *Child = Body->IgnoreContainers();
6643   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6644     Child = nullptr;
6645     for (const Stmt *S : C->body()) {
6646       if (const auto *E = dyn_cast<Expr>(S)) {
6647         if (isTrivial(Ctx, E))
6648           continue;
6649       }
6650       // Some of the statements can be ignored.
6651       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6652           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6653         continue;
6654       // Analyze declarations.
6655       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6656         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6657               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6658                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6659                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6660                   isa<UsingDirectiveDecl>(D) ||
6661                   isa<OMPDeclareReductionDecl>(D) ||
6662                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6663                 return true;
6664               const auto *VD = dyn_cast<VarDecl>(D);
6665               if (!VD)
6666                 return false;
6667               return VD->hasGlobalStorage() || !VD->isUsed();
6668             }))
6669           continue;
6670       }
6671       // Found multiple children - cannot get the one child only.
6672       if (Child)
6673         return nullptr;
6674       Child = S;
6675     }
6676     if (Child)
6677       Child = Child->IgnoreContainers();
6678   }
6679   return Child;
6680 }
6681 
6682 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6683     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6684     int32_t &DefaultVal) {
6685 
6686   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6687   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6688          "Expected target-based executable directive.");
6689   switch (DirectiveKind) {
6690   case OMPD_target: {
6691     const auto *CS = D.getInnermostCapturedStmt();
6692     const auto *Body =
6693         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6694     const Stmt *ChildStmt =
6695         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6696     if (const auto *NestedDir =
6697             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6698       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6699         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6700           const Expr *NumTeams =
6701               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6702           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6703             if (auto Constant =
6704                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6705               DefaultVal = Constant->getExtValue();
6706           return NumTeams;
6707         }
6708         DefaultVal = 0;
6709         return nullptr;
6710       }
6711       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6712           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6713         DefaultVal = 1;
6714         return nullptr;
6715       }
6716       DefaultVal = 1;
6717       return nullptr;
6718     }
6719     // A value of -1 is used to check if we need to emit no teams region
6720     DefaultVal = -1;
6721     return nullptr;
6722   }
6723   case OMPD_target_teams:
6724   case OMPD_target_teams_distribute:
6725   case OMPD_target_teams_distribute_simd:
6726   case OMPD_target_teams_distribute_parallel_for:
6727   case OMPD_target_teams_distribute_parallel_for_simd: {
6728     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6729       const Expr *NumTeams =
6730           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6731       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6732         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6733           DefaultVal = Constant->getExtValue();
6734       return NumTeams;
6735     }
6736     DefaultVal = 0;
6737     return nullptr;
6738   }
6739   case OMPD_target_parallel:
6740   case OMPD_target_parallel_for:
6741   case OMPD_target_parallel_for_simd:
6742   case OMPD_target_simd:
6743     DefaultVal = 1;
6744     return nullptr;
6745   case OMPD_parallel:
6746   case OMPD_for:
6747   case OMPD_parallel_for:
6748   case OMPD_parallel_master:
6749   case OMPD_parallel_sections:
6750   case OMPD_for_simd:
6751   case OMPD_parallel_for_simd:
6752   case OMPD_cancel:
6753   case OMPD_cancellation_point:
6754   case OMPD_ordered:
6755   case OMPD_threadprivate:
6756   case OMPD_allocate:
6757   case OMPD_task:
6758   case OMPD_simd:
6759   case OMPD_tile:
6760   case OMPD_unroll:
6761   case OMPD_sections:
6762   case OMPD_section:
6763   case OMPD_single:
6764   case OMPD_master:
6765   case OMPD_critical:
6766   case OMPD_taskyield:
6767   case OMPD_barrier:
6768   case OMPD_taskwait:
6769   case OMPD_taskgroup:
6770   case OMPD_atomic:
6771   case OMPD_flush:
6772   case OMPD_depobj:
6773   case OMPD_scan:
6774   case OMPD_teams:
6775   case OMPD_target_data:
6776   case OMPD_target_exit_data:
6777   case OMPD_target_enter_data:
6778   case OMPD_distribute:
6779   case OMPD_distribute_simd:
6780   case OMPD_distribute_parallel_for:
6781   case OMPD_distribute_parallel_for_simd:
6782   case OMPD_teams_distribute:
6783   case OMPD_teams_distribute_simd:
6784   case OMPD_teams_distribute_parallel_for:
6785   case OMPD_teams_distribute_parallel_for_simd:
6786   case OMPD_target_update:
6787   case OMPD_declare_simd:
6788   case OMPD_declare_variant:
6789   case OMPD_begin_declare_variant:
6790   case OMPD_end_declare_variant:
6791   case OMPD_declare_target:
6792   case OMPD_end_declare_target:
6793   case OMPD_declare_reduction:
6794   case OMPD_declare_mapper:
6795   case OMPD_taskloop:
6796   case OMPD_taskloop_simd:
6797   case OMPD_master_taskloop:
6798   case OMPD_master_taskloop_simd:
6799   case OMPD_parallel_master_taskloop:
6800   case OMPD_parallel_master_taskloop_simd:
6801   case OMPD_requires:
6802   case OMPD_metadirective:
6803   case OMPD_unknown:
6804     break;
6805   default:
6806     break;
6807   }
6808   llvm_unreachable("Unexpected directive kind.");
6809 }
6810 
6811 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6812     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6813   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6814          "Clauses associated with the teams directive expected to be emitted "
6815          "only for the host!");
6816   CGBuilderTy &Bld = CGF.Builder;
6817   int32_t DefaultNT = -1;
6818   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6819   if (NumTeams != nullptr) {
6820     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6821 
6822     switch (DirectiveKind) {
6823     case OMPD_target: {
6824       const auto *CS = D.getInnermostCapturedStmt();
6825       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6826       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6827       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6828                                                   /*IgnoreResultAssign*/ true);
6829       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6830                              /*isSigned=*/true);
6831     }
6832     case OMPD_target_teams:
6833     case OMPD_target_teams_distribute:
6834     case OMPD_target_teams_distribute_simd:
6835     case OMPD_target_teams_distribute_parallel_for:
6836     case OMPD_target_teams_distribute_parallel_for_simd: {
6837       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6838       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6839                                                   /*IgnoreResultAssign*/ true);
6840       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6841                              /*isSigned=*/true);
6842     }
6843     default:
6844       break;
6845     }
6846   } else if (DefaultNT == -1) {
6847     return nullptr;
6848   }
6849 
6850   return Bld.getInt32(DefaultNT);
6851 }
6852 
6853 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6854                                   llvm::Value *DefaultThreadLimitVal) {
6855   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6856       CGF.getContext(), CS->getCapturedStmt());
6857   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6858     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6859       llvm::Value *NumThreads = nullptr;
6860       llvm::Value *CondVal = nullptr;
6861       // Handle if clause. If if clause present, the number of threads is
6862       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6863       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6864         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6865         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6866         const OMPIfClause *IfClause = nullptr;
6867         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6868           if (C->getNameModifier() == OMPD_unknown ||
6869               C->getNameModifier() == OMPD_parallel) {
6870             IfClause = C;
6871             break;
6872           }
6873         }
6874         if (IfClause) {
6875           const Expr *Cond = IfClause->getCondition();
6876           bool Result;
6877           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6878             if (!Result)
6879               return CGF.Builder.getInt32(1);
6880           } else {
6881             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6882             if (const auto *PreInit =
6883                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6884               for (const auto *I : PreInit->decls()) {
6885                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6886                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6887                 } else {
6888                   CodeGenFunction::AutoVarEmission Emission =
6889                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6890                   CGF.EmitAutoVarCleanups(Emission);
6891                 }
6892               }
6893             }
6894             CondVal = CGF.EvaluateExprAsBool(Cond);
6895           }
6896         }
6897       }
6898       // Check the value of num_threads clause iff if clause was not specified
6899       // or is not evaluated to false.
6900       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6901         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6902         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6903         const auto *NumThreadsClause =
6904             Dir->getSingleClause<OMPNumThreadsClause>();
6905         CodeGenFunction::LexicalScope Scope(
6906             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6907         if (const auto *PreInit =
6908                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6909           for (const auto *I : PreInit->decls()) {
6910             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6911               CGF.EmitVarDecl(cast<VarDecl>(*I));
6912             } else {
6913               CodeGenFunction::AutoVarEmission Emission =
6914                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6915               CGF.EmitAutoVarCleanups(Emission);
6916             }
6917           }
6918         }
6919         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6920         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6921                                                /*isSigned=*/false);
6922         if (DefaultThreadLimitVal)
6923           NumThreads = CGF.Builder.CreateSelect(
6924               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6925               DefaultThreadLimitVal, NumThreads);
6926       } else {
6927         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6928                                            : CGF.Builder.getInt32(0);
6929       }
6930       // Process condition of the if clause.
6931       if (CondVal) {
6932         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6933                                               CGF.Builder.getInt32(1));
6934       }
6935       return NumThreads;
6936     }
6937     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6938       return CGF.Builder.getInt32(1);
6939     return DefaultThreadLimitVal;
6940   }
6941   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6942                                : CGF.Builder.getInt32(0);
6943 }
6944 
6945 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6946     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6947     int32_t &DefaultVal) {
6948   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6949   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6950          "Expected target-based executable directive.");
6951 
6952   switch (DirectiveKind) {
6953   case OMPD_target:
6954     // Teams have no clause thread_limit
6955     return nullptr;
6956   case OMPD_target_teams:
6957   case OMPD_target_teams_distribute:
6958     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6959       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6960       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6961       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6962         if (auto Constant =
6963                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6964           DefaultVal = Constant->getExtValue();
6965       return ThreadLimit;
6966     }
6967     return nullptr;
6968   case OMPD_target_parallel:
6969   case OMPD_target_parallel_for:
6970   case OMPD_target_parallel_for_simd:
6971   case OMPD_target_teams_distribute_parallel_for:
6972   case OMPD_target_teams_distribute_parallel_for_simd: {
6973     Expr *ThreadLimit = nullptr;
6974     Expr *NumThreads = nullptr;
6975     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6976       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6977       ThreadLimit = ThreadLimitClause->getThreadLimit();
6978       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6979         if (auto Constant =
6980                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6981           DefaultVal = Constant->getExtValue();
6982     }
6983     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6984       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6985       NumThreads = NumThreadsClause->getNumThreads();
6986       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6987         if (auto Constant =
6988                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6989           if (Constant->getExtValue() < DefaultVal) {
6990             DefaultVal = Constant->getExtValue();
6991             ThreadLimit = NumThreads;
6992           }
6993         }
6994       }
6995     }
6996     return ThreadLimit;
6997   }
6998   case OMPD_target_teams_distribute_simd:
6999   case OMPD_target_simd:
7000     DefaultVal = 1;
7001     return nullptr;
7002   case OMPD_parallel:
7003   case OMPD_for:
7004   case OMPD_parallel_for:
7005   case OMPD_parallel_master:
7006   case OMPD_parallel_sections:
7007   case OMPD_for_simd:
7008   case OMPD_parallel_for_simd:
7009   case OMPD_cancel:
7010   case OMPD_cancellation_point:
7011   case OMPD_ordered:
7012   case OMPD_threadprivate:
7013   case OMPD_allocate:
7014   case OMPD_task:
7015   case OMPD_simd:
7016   case OMPD_tile:
7017   case OMPD_unroll:
7018   case OMPD_sections:
7019   case OMPD_section:
7020   case OMPD_single:
7021   case OMPD_master:
7022   case OMPD_critical:
7023   case OMPD_taskyield:
7024   case OMPD_barrier:
7025   case OMPD_taskwait:
7026   case OMPD_taskgroup:
7027   case OMPD_atomic:
7028   case OMPD_flush:
7029   case OMPD_depobj:
7030   case OMPD_scan:
7031   case OMPD_teams:
7032   case OMPD_target_data:
7033   case OMPD_target_exit_data:
7034   case OMPD_target_enter_data:
7035   case OMPD_distribute:
7036   case OMPD_distribute_simd:
7037   case OMPD_distribute_parallel_for:
7038   case OMPD_distribute_parallel_for_simd:
7039   case OMPD_teams_distribute:
7040   case OMPD_teams_distribute_simd:
7041   case OMPD_teams_distribute_parallel_for:
7042   case OMPD_teams_distribute_parallel_for_simd:
7043   case OMPD_target_update:
7044   case OMPD_declare_simd:
7045   case OMPD_declare_variant:
7046   case OMPD_begin_declare_variant:
7047   case OMPD_end_declare_variant:
7048   case OMPD_declare_target:
7049   case OMPD_end_declare_target:
7050   case OMPD_declare_reduction:
7051   case OMPD_declare_mapper:
7052   case OMPD_taskloop:
7053   case OMPD_taskloop_simd:
7054   case OMPD_master_taskloop:
7055   case OMPD_master_taskloop_simd:
7056   case OMPD_parallel_master_taskloop:
7057   case OMPD_parallel_master_taskloop_simd:
7058   case OMPD_requires:
7059   case OMPD_unknown:
7060     break;
7061   default:
7062     break;
7063   }
7064   llvm_unreachable("Unsupported directive kind.");
7065 }
7066 
7067 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7068     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7069   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7070          "Clauses associated with the teams directive expected to be emitted "
7071          "only for the host!");
7072   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7073   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7074          "Expected target-based executable directive.");
7075   CGBuilderTy &Bld = CGF.Builder;
7076   llvm::Value *ThreadLimitVal = nullptr;
7077   llvm::Value *NumThreadsVal = nullptr;
7078   switch (DirectiveKind) {
7079   case OMPD_target: {
7080     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7081     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7082       return NumThreads;
7083     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7084         CGF.getContext(), CS->getCapturedStmt());
7085     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7086       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7087         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7088         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7089         const auto *ThreadLimitClause =
7090             Dir->getSingleClause<OMPThreadLimitClause>();
7091         CodeGenFunction::LexicalScope Scope(
7092             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7093         if (const auto *PreInit =
7094                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7095           for (const auto *I : PreInit->decls()) {
7096             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7097               CGF.EmitVarDecl(cast<VarDecl>(*I));
7098             } else {
7099               CodeGenFunction::AutoVarEmission Emission =
7100                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7101               CGF.EmitAutoVarCleanups(Emission);
7102             }
7103           }
7104         }
7105         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7106             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7107         ThreadLimitVal =
7108             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7109       }
7110       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7111           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7112         CS = Dir->getInnermostCapturedStmt();
7113         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7114             CGF.getContext(), CS->getCapturedStmt());
7115         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7116       }
7117       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7118           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7119         CS = Dir->getInnermostCapturedStmt();
7120         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7121           return NumThreads;
7122       }
7123       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7124         return Bld.getInt32(1);
7125     }
7126     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7127   }
7128   case OMPD_target_teams: {
7129     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7130       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7131       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7132       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7133           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7134       ThreadLimitVal =
7135           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7136     }
7137     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7138     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7139       return NumThreads;
7140     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7141         CGF.getContext(), CS->getCapturedStmt());
7142     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7143       if (Dir->getDirectiveKind() == OMPD_distribute) {
7144         CS = Dir->getInnermostCapturedStmt();
7145         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7146           return NumThreads;
7147       }
7148     }
7149     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7150   }
7151   case OMPD_target_teams_distribute:
7152     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7153       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7154       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7155       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7156           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7157       ThreadLimitVal =
7158           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7159     }
7160     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7161   case OMPD_target_parallel:
7162   case OMPD_target_parallel_for:
7163   case OMPD_target_parallel_for_simd:
7164   case OMPD_target_teams_distribute_parallel_for:
7165   case OMPD_target_teams_distribute_parallel_for_simd: {
7166     llvm::Value *CondVal = nullptr;
7167     // Handle if clause. If if clause present, the number of threads is
7168     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7169     if (D.hasClausesOfKind<OMPIfClause>()) {
7170       const OMPIfClause *IfClause = nullptr;
7171       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7172         if (C->getNameModifier() == OMPD_unknown ||
7173             C->getNameModifier() == OMPD_parallel) {
7174           IfClause = C;
7175           break;
7176         }
7177       }
7178       if (IfClause) {
7179         const Expr *Cond = IfClause->getCondition();
7180         bool Result;
7181         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7182           if (!Result)
7183             return Bld.getInt32(1);
7184         } else {
7185           CodeGenFunction::RunCleanupsScope Scope(CGF);
7186           CondVal = CGF.EvaluateExprAsBool(Cond);
7187         }
7188       }
7189     }
7190     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7191       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7192       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7193       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7194           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7195       ThreadLimitVal =
7196           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7197     }
7198     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7199       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7200       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7201       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7202           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7203       NumThreadsVal =
7204           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7205       ThreadLimitVal = ThreadLimitVal
7206                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7207                                                                 ThreadLimitVal),
7208                                               NumThreadsVal, ThreadLimitVal)
7209                            : NumThreadsVal;
7210     }
7211     if (!ThreadLimitVal)
7212       ThreadLimitVal = Bld.getInt32(0);
7213     if (CondVal)
7214       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7215     return ThreadLimitVal;
7216   }
7217   case OMPD_target_teams_distribute_simd:
7218   case OMPD_target_simd:
7219     return Bld.getInt32(1);
7220   case OMPD_parallel:
7221   case OMPD_for:
7222   case OMPD_parallel_for:
7223   case OMPD_parallel_master:
7224   case OMPD_parallel_sections:
7225   case OMPD_for_simd:
7226   case OMPD_parallel_for_simd:
7227   case OMPD_cancel:
7228   case OMPD_cancellation_point:
7229   case OMPD_ordered:
7230   case OMPD_threadprivate:
7231   case OMPD_allocate:
7232   case OMPD_task:
7233   case OMPD_simd:
7234   case OMPD_tile:
7235   case OMPD_unroll:
7236   case OMPD_sections:
7237   case OMPD_section:
7238   case OMPD_single:
7239   case OMPD_master:
7240   case OMPD_critical:
7241   case OMPD_taskyield:
7242   case OMPD_barrier:
7243   case OMPD_taskwait:
7244   case OMPD_taskgroup:
7245   case OMPD_atomic:
7246   case OMPD_flush:
7247   case OMPD_depobj:
7248   case OMPD_scan:
7249   case OMPD_teams:
7250   case OMPD_target_data:
7251   case OMPD_target_exit_data:
7252   case OMPD_target_enter_data:
7253   case OMPD_distribute:
7254   case OMPD_distribute_simd:
7255   case OMPD_distribute_parallel_for:
7256   case OMPD_distribute_parallel_for_simd:
7257   case OMPD_teams_distribute:
7258   case OMPD_teams_distribute_simd:
7259   case OMPD_teams_distribute_parallel_for:
7260   case OMPD_teams_distribute_parallel_for_simd:
7261   case OMPD_target_update:
7262   case OMPD_declare_simd:
7263   case OMPD_declare_variant:
7264   case OMPD_begin_declare_variant:
7265   case OMPD_end_declare_variant:
7266   case OMPD_declare_target:
7267   case OMPD_end_declare_target:
7268   case OMPD_declare_reduction:
7269   case OMPD_declare_mapper:
7270   case OMPD_taskloop:
7271   case OMPD_taskloop_simd:
7272   case OMPD_master_taskloop:
7273   case OMPD_master_taskloop_simd:
7274   case OMPD_parallel_master_taskloop:
7275   case OMPD_parallel_master_taskloop_simd:
7276   case OMPD_requires:
7277   case OMPD_metadirective:
7278   case OMPD_unknown:
7279     break;
7280   default:
7281     break;
7282   }
7283   llvm_unreachable("Unsupported directive kind.");
7284 }
7285 
7286 namespace {
7287 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7288 
7289 // Utility to handle information from clauses associated with a given
7290 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7291 // It provides a convenient interface to obtain the information and generate
7292 // code for that information.
7293 class MappableExprsHandler {
7294 public:
7295   /// Values for bit flags used to specify the mapping type for
7296   /// offloading.
7297   enum OpenMPOffloadMappingFlags : uint64_t {
7298     /// No flags
7299     OMP_MAP_NONE = 0x0,
7300     /// Allocate memory on the device and move data from host to device.
7301     OMP_MAP_TO = 0x01,
7302     /// Allocate memory on the device and move data from device to host.
7303     OMP_MAP_FROM = 0x02,
7304     /// Always perform the requested mapping action on the element, even
7305     /// if it was already mapped before.
7306     OMP_MAP_ALWAYS = 0x04,
7307     /// Delete the element from the device environment, ignoring the
7308     /// current reference count associated with the element.
7309     OMP_MAP_DELETE = 0x08,
7310     /// The element being mapped is a pointer-pointee pair; both the
7311     /// pointer and the pointee should be mapped.
7312     OMP_MAP_PTR_AND_OBJ = 0x10,
7313     /// This flags signals that the base address of an entry should be
7314     /// passed to the target kernel as an argument.
7315     OMP_MAP_TARGET_PARAM = 0x20,
7316     /// Signal that the runtime library has to return the device pointer
7317     /// in the current position for the data being mapped. Used when we have the
7318     /// use_device_ptr or use_device_addr clause.
7319     OMP_MAP_RETURN_PARAM = 0x40,
7320     /// This flag signals that the reference being passed is a pointer to
7321     /// private data.
7322     OMP_MAP_PRIVATE = 0x80,
7323     /// Pass the element to the device by value.
7324     OMP_MAP_LITERAL = 0x100,
7325     /// Implicit map
7326     OMP_MAP_IMPLICIT = 0x200,
7327     /// Close is a hint to the runtime to allocate memory close to
7328     /// the target device.
7329     OMP_MAP_CLOSE = 0x400,
7330     /// 0x800 is reserved for compatibility with XLC.
7331     /// Produce a runtime error if the data is not already allocated.
7332     OMP_MAP_PRESENT = 0x1000,
7333     // Increment and decrement a separate reference counter so that the data
7334     // cannot be unmapped within the associated region.  Thus, this flag is
7335     // intended to be used on 'target' and 'target data' directives because they
7336     // are inherently structured.  It is not intended to be used on 'target
7337     // enter data' and 'target exit data' directives because they are inherently
7338     // dynamic.
7339     // This is an OpenMP extension for the sake of OpenACC support.
7340     OMP_MAP_OMPX_HOLD = 0x2000,
7341     /// Signal that the runtime library should use args as an array of
7342     /// descriptor_dim pointers and use args_size as dims. Used when we have
7343     /// non-contiguous list items in target update directive
7344     OMP_MAP_NON_CONTIG = 0x100000000000,
7345     /// The 16 MSBs of the flags indicate whether the entry is member of some
7346     /// struct/class.
7347     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7348     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7349   };
7350 
7351   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7352   static unsigned getFlagMemberOffset() {
7353     unsigned Offset = 0;
7354     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7355          Remain = Remain >> 1)
7356       Offset++;
7357     return Offset;
7358   }
7359 
7360   /// Class that holds debugging information for a data mapping to be passed to
7361   /// the runtime library.
7362   class MappingExprInfo {
7363     /// The variable declaration used for the data mapping.
7364     const ValueDecl *MapDecl = nullptr;
7365     /// The original expression used in the map clause, or null if there is
7366     /// none.
7367     const Expr *MapExpr = nullptr;
7368 
7369   public:
7370     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7371         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7372 
7373     const ValueDecl *getMapDecl() const { return MapDecl; }
7374     const Expr *getMapExpr() const { return MapExpr; }
7375   };
7376 
7377   /// Class that associates information with a base pointer to be passed to the
7378   /// runtime library.
7379   class BasePointerInfo {
7380     /// The base pointer.
7381     llvm::Value *Ptr = nullptr;
7382     /// The base declaration that refers to this device pointer, or null if
7383     /// there is none.
7384     const ValueDecl *DevPtrDecl = nullptr;
7385 
7386   public:
7387     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7388         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7389     llvm::Value *operator*() const { return Ptr; }
7390     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7391     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7392   };
7393 
7394   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7395   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7396   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7397   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7398   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7399   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7400   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7401 
7402   /// This structure contains combined information generated for mappable
7403   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7404   /// mappers, and non-contiguous information.
7405   struct MapCombinedInfoTy {
7406     struct StructNonContiguousInfo {
7407       bool IsNonContiguous = false;
7408       MapDimArrayTy Dims;
7409       MapNonContiguousArrayTy Offsets;
7410       MapNonContiguousArrayTy Counts;
7411       MapNonContiguousArrayTy Strides;
7412     };
7413     MapExprsArrayTy Exprs;
7414     MapBaseValuesArrayTy BasePointers;
7415     MapValuesArrayTy Pointers;
7416     MapValuesArrayTy Sizes;
7417     MapFlagsArrayTy Types;
7418     MapMappersArrayTy Mappers;
7419     StructNonContiguousInfo NonContigInfo;
7420 
7421     /// Append arrays in \a CurInfo.
7422     void append(MapCombinedInfoTy &CurInfo) {
7423       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7424       BasePointers.append(CurInfo.BasePointers.begin(),
7425                           CurInfo.BasePointers.end());
7426       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7427       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7428       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7429       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7430       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7431                                  CurInfo.NonContigInfo.Dims.end());
7432       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7433                                     CurInfo.NonContigInfo.Offsets.end());
7434       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7435                                    CurInfo.NonContigInfo.Counts.end());
7436       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7437                                     CurInfo.NonContigInfo.Strides.end());
7438     }
7439   };
7440 
7441   /// Map between a struct and the its lowest & highest elements which have been
7442   /// mapped.
7443   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7444   ///                    HE(FieldIndex, Pointer)}
7445   struct StructRangeInfoTy {
7446     MapCombinedInfoTy PreliminaryMapData;
7447     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7448         0, Address::invalid()};
7449     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7450         0, Address::invalid()};
7451     Address Base = Address::invalid();
7452     Address LB = Address::invalid();
7453     bool IsArraySection = false;
7454     bool HasCompleteRecord = false;
7455   };
7456 
7457 private:
7458   /// Kind that defines how a device pointer has to be returned.
7459   struct MapInfo {
7460     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7461     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7462     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7463     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7464     bool ReturnDevicePointer = false;
7465     bool IsImplicit = false;
7466     const ValueDecl *Mapper = nullptr;
7467     const Expr *VarRef = nullptr;
7468     bool ForDeviceAddr = false;
7469 
7470     MapInfo() = default;
7471     MapInfo(
7472         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7473         OpenMPMapClauseKind MapType,
7474         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7475         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7476         bool ReturnDevicePointer, bool IsImplicit,
7477         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7478         bool ForDeviceAddr = false)
7479         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7480           MotionModifiers(MotionModifiers),
7481           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7482           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7483   };
7484 
7485   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7486   /// member and there is no map information about it, then emission of that
7487   /// entry is deferred until the whole struct has been processed.
7488   struct DeferredDevicePtrEntryTy {
7489     const Expr *IE = nullptr;
7490     const ValueDecl *VD = nullptr;
7491     bool ForDeviceAddr = false;
7492 
7493     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7494                              bool ForDeviceAddr)
7495         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7496   };
7497 
7498   /// The target directive from where the mappable clauses were extracted. It
7499   /// is either a executable directive or a user-defined mapper directive.
7500   llvm::PointerUnion<const OMPExecutableDirective *,
7501                      const OMPDeclareMapperDecl *>
7502       CurDir;
7503 
7504   /// Function the directive is being generated for.
7505   CodeGenFunction &CGF;
7506 
7507   /// Set of all first private variables in the current directive.
7508   /// bool data is set to true if the variable is implicitly marked as
7509   /// firstprivate, false otherwise.
7510   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7511 
7512   /// Map between device pointer declarations and their expression components.
7513   /// The key value for declarations in 'this' is null.
7514   llvm::DenseMap<
7515       const ValueDecl *,
7516       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7517       DevPointersMap;
7518 
7519   /// Map between lambda declarations and their map type.
7520   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7521 
7522   llvm::Value *getExprTypeSize(const Expr *E) const {
7523     QualType ExprTy = E->getType().getCanonicalType();
7524 
7525     // Calculate the size for array shaping expression.
7526     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7527       llvm::Value *Size =
7528           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7529       for (const Expr *SE : OAE->getDimensions()) {
7530         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7531         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7532                                       CGF.getContext().getSizeType(),
7533                                       SE->getExprLoc());
7534         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7535       }
7536       return Size;
7537     }
7538 
7539     // Reference types are ignored for mapping purposes.
7540     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7541       ExprTy = RefTy->getPointeeType().getCanonicalType();
7542 
7543     // Given that an array section is considered a built-in type, we need to
7544     // do the calculation based on the length of the section instead of relying
7545     // on CGF.getTypeSize(E->getType()).
7546     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7547       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7548                             OAE->getBase()->IgnoreParenImpCasts())
7549                             .getCanonicalType();
7550 
7551       // If there is no length associated with the expression and lower bound is
7552       // not specified too, that means we are using the whole length of the
7553       // base.
7554       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7555           !OAE->getLowerBound())
7556         return CGF.getTypeSize(BaseTy);
7557 
7558       llvm::Value *ElemSize;
7559       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7560         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7561       } else {
7562         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7563         assert(ATy && "Expecting array type if not a pointer type.");
7564         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7565       }
7566 
7567       // If we don't have a length at this point, that is because we have an
7568       // array section with a single element.
7569       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7570         return ElemSize;
7571 
7572       if (const Expr *LenExpr = OAE->getLength()) {
7573         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7574         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7575                                              CGF.getContext().getSizeType(),
7576                                              LenExpr->getExprLoc());
7577         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7578       }
7579       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7580              OAE->getLowerBound() && "expected array_section[lb:].");
7581       // Size = sizetype - lb * elemtype;
7582       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7583       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7584       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7585                                        CGF.getContext().getSizeType(),
7586                                        OAE->getLowerBound()->getExprLoc());
7587       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7588       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7589       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7590       LengthVal = CGF.Builder.CreateSelect(
7591           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7592       return LengthVal;
7593     }
7594     return CGF.getTypeSize(ExprTy);
7595   }
7596 
7597   /// Return the corresponding bits for a given map clause modifier. Add
7598   /// a flag marking the map as a pointer if requested. Add a flag marking the
7599   /// map as the first one of a series of maps that relate to the same map
7600   /// expression.
7601   OpenMPOffloadMappingFlags getMapTypeBits(
7602       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7603       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7604       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7605     OpenMPOffloadMappingFlags Bits =
7606         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7607     switch (MapType) {
7608     case OMPC_MAP_alloc:
7609     case OMPC_MAP_release:
7610       // alloc and release is the default behavior in the runtime library,  i.e.
7611       // if we don't pass any bits alloc/release that is what the runtime is
7612       // going to do. Therefore, we don't need to signal anything for these two
7613       // type modifiers.
7614       break;
7615     case OMPC_MAP_to:
7616       Bits |= OMP_MAP_TO;
7617       break;
7618     case OMPC_MAP_from:
7619       Bits |= OMP_MAP_FROM;
7620       break;
7621     case OMPC_MAP_tofrom:
7622       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7623       break;
7624     case OMPC_MAP_delete:
7625       Bits |= OMP_MAP_DELETE;
7626       break;
7627     case OMPC_MAP_unknown:
7628       llvm_unreachable("Unexpected map type!");
7629     }
7630     if (AddPtrFlag)
7631       Bits |= OMP_MAP_PTR_AND_OBJ;
7632     if (AddIsTargetParamFlag)
7633       Bits |= OMP_MAP_TARGET_PARAM;
7634     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7635       Bits |= OMP_MAP_ALWAYS;
7636     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7637       Bits |= OMP_MAP_CLOSE;
7638     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7639         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7640       Bits |= OMP_MAP_PRESENT;
7641     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7642       Bits |= OMP_MAP_OMPX_HOLD;
7643     if (IsNonContiguous)
7644       Bits |= OMP_MAP_NON_CONTIG;
7645     return Bits;
7646   }
7647 
7648   /// Return true if the provided expression is a final array section. A
7649   /// final array section, is one whose length can't be proved to be one.
7650   bool isFinalArraySectionExpression(const Expr *E) const {
7651     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7652 
7653     // It is not an array section and therefore not a unity-size one.
7654     if (!OASE)
7655       return false;
7656 
7657     // An array section with no colon always refer to a single element.
7658     if (OASE->getColonLocFirst().isInvalid())
7659       return false;
7660 
7661     const Expr *Length = OASE->getLength();
7662 
7663     // If we don't have a length we have to check if the array has size 1
7664     // for this dimension. Also, we should always expect a length if the
7665     // base type is pointer.
7666     if (!Length) {
7667       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7668                              OASE->getBase()->IgnoreParenImpCasts())
7669                              .getCanonicalType();
7670       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7671         return ATy->getSize().getSExtValue() != 1;
7672       // If we don't have a constant dimension length, we have to consider
7673       // the current section as having any size, so it is not necessarily
7674       // unitary. If it happen to be unity size, that's user fault.
7675       return true;
7676     }
7677 
7678     // Check if the length evaluates to 1.
7679     Expr::EvalResult Result;
7680     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7681       return true; // Can have more that size 1.
7682 
7683     llvm::APSInt ConstLength = Result.Val.getInt();
7684     return ConstLength.getSExtValue() != 1;
7685   }
7686 
7687   /// Generate the base pointers, section pointers, sizes, map type bits, and
7688   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7689   /// map type, map or motion modifiers, and expression components.
7690   /// \a IsFirstComponent should be set to true if the provided set of
7691   /// components is the first associated with a capture.
7692   void generateInfoForComponentList(
7693       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7694       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7695       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7696       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7697       bool IsFirstComponentList, bool IsImplicit,
7698       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7699       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7700       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7701           OverlappedElements = llvm::None) const {
7702     // The following summarizes what has to be generated for each map and the
7703     // types below. The generated information is expressed in this order:
7704     // base pointer, section pointer, size, flags
7705     // (to add to the ones that come from the map type and modifier).
7706     //
7707     // double d;
7708     // int i[100];
7709     // float *p;
7710     //
7711     // struct S1 {
7712     //   int i;
7713     //   float f[50];
7714     // }
7715     // struct S2 {
7716     //   int i;
7717     //   float f[50];
7718     //   S1 s;
7719     //   double *p;
7720     //   struct S2 *ps;
7721     //   int &ref;
7722     // }
7723     // S2 s;
7724     // S2 *ps;
7725     //
7726     // map(d)
7727     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7728     //
7729     // map(i)
7730     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7731     //
7732     // map(i[1:23])
7733     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7734     //
7735     // map(p)
7736     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7737     //
7738     // map(p[1:24])
7739     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7740     // in unified shared memory mode or for local pointers
7741     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7742     //
7743     // map(s)
7744     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7745     //
7746     // map(s.i)
7747     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7748     //
7749     // map(s.s.f)
7750     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7751     //
7752     // map(s.p)
7753     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7754     //
7755     // map(to: s.p[:22])
7756     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7757     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7758     // &(s.p), &(s.p[0]), 22*sizeof(double),
7759     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7760     // (*) alloc space for struct members, only this is a target parameter
7761     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7762     //      optimizes this entry out, same in the examples below)
7763     // (***) map the pointee (map: to)
7764     //
7765     // map(to: s.ref)
7766     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7767     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7768     // (*) alloc space for struct members, only this is a target parameter
7769     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7770     //      optimizes this entry out, same in the examples below)
7771     // (***) map the pointee (map: to)
7772     //
7773     // map(s.ps)
7774     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7775     //
7776     // map(from: s.ps->s.i)
7777     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7778     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7779     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7780     //
7781     // map(to: s.ps->ps)
7782     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7783     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7784     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7785     //
7786     // map(s.ps->ps->ps)
7787     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7788     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7789     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7790     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7791     //
7792     // map(to: s.ps->ps->s.f[:22])
7793     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7794     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7795     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7796     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7797     //
7798     // map(ps)
7799     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7800     //
7801     // map(ps->i)
7802     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7803     //
7804     // map(ps->s.f)
7805     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7806     //
7807     // map(from: ps->p)
7808     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7809     //
7810     // map(to: ps->p[:22])
7811     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7812     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7813     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7814     //
7815     // map(ps->ps)
7816     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7817     //
7818     // map(from: ps->ps->s.i)
7819     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7820     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7821     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7822     //
7823     // map(from: ps->ps->ps)
7824     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7825     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7826     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7827     //
7828     // map(ps->ps->ps->ps)
7829     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7830     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7831     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7832     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7833     //
7834     // map(to: ps->ps->ps->s.f[:22])
7835     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7836     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7837     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7838     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7839     //
7840     // map(to: s.f[:22]) map(from: s.p[:33])
7841     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7842     //     sizeof(double*) (**), TARGET_PARAM
7843     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7844     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7845     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7846     // (*) allocate contiguous space needed to fit all mapped members even if
7847     //     we allocate space for members not mapped (in this example,
7848     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7849     //     them as well because they fall between &s.f[0] and &s.p)
7850     //
7851     // map(from: s.f[:22]) map(to: ps->p[:33])
7852     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7853     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7854     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7855     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7856     // (*) the struct this entry pertains to is the 2nd element in the list of
7857     //     arguments, hence MEMBER_OF(2)
7858     //
7859     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7860     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7861     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7862     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7863     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7864     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7865     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7866     // (*) the struct this entry pertains to is the 4th element in the list
7867     //     of arguments, hence MEMBER_OF(4)
7868 
7869     // Track if the map information being generated is the first for a capture.
7870     bool IsCaptureFirstInfo = IsFirstComponentList;
7871     // When the variable is on a declare target link or in a to clause with
7872     // unified memory, a reference is needed to hold the host/device address
7873     // of the variable.
7874     bool RequiresReference = false;
7875 
7876     // Scan the components from the base to the complete expression.
7877     auto CI = Components.rbegin();
7878     auto CE = Components.rend();
7879     auto I = CI;
7880 
7881     // Track if the map information being generated is the first for a list of
7882     // components.
7883     bool IsExpressionFirstInfo = true;
7884     bool FirstPointerInComplexData = false;
7885     Address BP = Address::invalid();
7886     const Expr *AssocExpr = I->getAssociatedExpression();
7887     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7888     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7889     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7890 
7891     if (isa<MemberExpr>(AssocExpr)) {
7892       // The base is the 'this' pointer. The content of the pointer is going
7893       // to be the base of the field being mapped.
7894       BP = CGF.LoadCXXThisAddress();
7895     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7896                (OASE &&
7897                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7898       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7899     } else if (OAShE &&
7900                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7901       BP = Address(
7902           CGF.EmitScalarExpr(OAShE->getBase()),
7903           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7904     } else {
7905       // The base is the reference to the variable.
7906       // BP = &Var.
7907       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7908       if (const auto *VD =
7909               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7910         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7911                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7912           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7913               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7914                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7915             RequiresReference = true;
7916             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7917           }
7918         }
7919       }
7920 
7921       // If the variable is a pointer and is being dereferenced (i.e. is not
7922       // the last component), the base has to be the pointer itself, not its
7923       // reference. References are ignored for mapping purposes.
7924       QualType Ty =
7925           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7926       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7927         // No need to generate individual map information for the pointer, it
7928         // can be associated with the combined storage if shared memory mode is
7929         // active or the base declaration is not global variable.
7930         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7931         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7932             !VD || VD->hasLocalStorage())
7933           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7934         else
7935           FirstPointerInComplexData = true;
7936         ++I;
7937       }
7938     }
7939 
7940     // Track whether a component of the list should be marked as MEMBER_OF some
7941     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7942     // in a component list should be marked as MEMBER_OF, all subsequent entries
7943     // do not belong to the base struct. E.g.
7944     // struct S2 s;
7945     // s.ps->ps->ps->f[:]
7946     //   (1) (2) (3) (4)
7947     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7948     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7949     // is the pointee of ps(2) which is not member of struct s, so it should not
7950     // be marked as such (it is still PTR_AND_OBJ).
7951     // The variable is initialized to false so that PTR_AND_OBJ entries which
7952     // are not struct members are not considered (e.g. array of pointers to
7953     // data).
7954     bool ShouldBeMemberOf = false;
7955 
7956     // Variable keeping track of whether or not we have encountered a component
7957     // in the component list which is a member expression. Useful when we have a
7958     // pointer or a final array section, in which case it is the previous
7959     // component in the list which tells us whether we have a member expression.
7960     // E.g. X.f[:]
7961     // While processing the final array section "[:]" it is "f" which tells us
7962     // whether we are dealing with a member of a declared struct.
7963     const MemberExpr *EncounteredME = nullptr;
7964 
7965     // Track for the total number of dimension. Start from one for the dummy
7966     // dimension.
7967     uint64_t DimSize = 1;
7968 
7969     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7970     bool IsPrevMemberReference = false;
7971 
7972     for (; I != CE; ++I) {
7973       // If the current component is member of a struct (parent struct) mark it.
7974       if (!EncounteredME) {
7975         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7976         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7977         // as MEMBER_OF the parent struct.
7978         if (EncounteredME) {
7979           ShouldBeMemberOf = true;
7980           // Do not emit as complex pointer if this is actually not array-like
7981           // expression.
7982           if (FirstPointerInComplexData) {
7983             QualType Ty = std::prev(I)
7984                               ->getAssociatedDeclaration()
7985                               ->getType()
7986                               .getNonReferenceType();
7987             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7988             FirstPointerInComplexData = false;
7989           }
7990         }
7991       }
7992 
7993       auto Next = std::next(I);
7994 
7995       // We need to generate the addresses and sizes if this is the last
7996       // component, if the component is a pointer or if it is an array section
7997       // whose length can't be proved to be one. If this is a pointer, it
7998       // becomes the base address for the following components.
7999 
8000       // A final array section, is one whose length can't be proved to be one.
8001       // If the map item is non-contiguous then we don't treat any array section
8002       // as final array section.
8003       bool IsFinalArraySection =
8004           !IsNonContiguous &&
8005           isFinalArraySectionExpression(I->getAssociatedExpression());
8006 
8007       // If we have a declaration for the mapping use that, otherwise use
8008       // the base declaration of the map clause.
8009       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8010                                      ? I->getAssociatedDeclaration()
8011                                      : BaseDecl;
8012       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8013                                                : MapExpr;
8014 
8015       // Get information on whether the element is a pointer. Have to do a
8016       // special treatment for array sections given that they are built-in
8017       // types.
8018       const auto *OASE =
8019           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8020       const auto *OAShE =
8021           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8022       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8023       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8024       bool IsPointer =
8025           OAShE ||
8026           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8027                        .getCanonicalType()
8028                        ->isAnyPointerType()) ||
8029           I->getAssociatedExpression()->getType()->isAnyPointerType();
8030       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8031                                MapDecl &&
8032                                MapDecl->getType()->isLValueReferenceType();
8033       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8034 
8035       if (OASE)
8036         ++DimSize;
8037 
8038       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8039           IsFinalArraySection) {
8040         // If this is not the last component, we expect the pointer to be
8041         // associated with an array expression or member expression.
8042         assert((Next == CE ||
8043                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8044                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8045                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8046                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8047                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8048                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8049                "Unexpected expression");
8050 
8051         Address LB = Address::invalid();
8052         Address LowestElem = Address::invalid();
8053         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8054                                        const MemberExpr *E) {
8055           const Expr *BaseExpr = E->getBase();
8056           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8057           // scalar.
8058           LValue BaseLV;
8059           if (E->isArrow()) {
8060             LValueBaseInfo BaseInfo;
8061             TBAAAccessInfo TBAAInfo;
8062             Address Addr =
8063                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8064             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8065             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8066           } else {
8067             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8068           }
8069           return BaseLV;
8070         };
8071         if (OAShE) {
8072           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8073                                     CGF.getContext().getTypeAlignInChars(
8074                                         OAShE->getBase()->getType()));
8075         } else if (IsMemberReference) {
8076           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8077           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8078           LowestElem = CGF.EmitLValueForFieldInitialization(
8079                               BaseLVal, cast<FieldDecl>(MapDecl))
8080                            .getAddress(CGF);
8081           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8082                    .getAddress(CGF);
8083         } else {
8084           LowestElem = LB =
8085               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8086                   .getAddress(CGF);
8087         }
8088 
8089         // If this component is a pointer inside the base struct then we don't
8090         // need to create any entry for it - it will be combined with the object
8091         // it is pointing to into a single PTR_AND_OBJ entry.
8092         bool IsMemberPointerOrAddr =
8093             EncounteredME &&
8094             (((IsPointer || ForDeviceAddr) &&
8095               I->getAssociatedExpression() == EncounteredME) ||
8096              (IsPrevMemberReference && !IsPointer) ||
8097              (IsMemberReference && Next != CE &&
8098               !Next->getAssociatedExpression()->getType()->isPointerType()));
8099         if (!OverlappedElements.empty() && Next == CE) {
8100           // Handle base element with the info for overlapped elements.
8101           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8102           assert(!IsPointer &&
8103                  "Unexpected base element with the pointer type.");
8104           // Mark the whole struct as the struct that requires allocation on the
8105           // device.
8106           PartialStruct.LowestElem = {0, LowestElem};
8107           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8108               I->getAssociatedExpression()->getType());
8109           Address HB = CGF.Builder.CreateConstGEP(
8110               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8111                                                               CGF.VoidPtrTy),
8112               TypeSize.getQuantity() - 1);
8113           PartialStruct.HighestElem = {
8114               std::numeric_limits<decltype(
8115                   PartialStruct.HighestElem.first)>::max(),
8116               HB};
8117           PartialStruct.Base = BP;
8118           PartialStruct.LB = LB;
8119           assert(
8120               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8121               "Overlapped elements must be used only once for the variable.");
8122           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8123           // Emit data for non-overlapped data.
8124           OpenMPOffloadMappingFlags Flags =
8125               OMP_MAP_MEMBER_OF |
8126               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8127                              /*AddPtrFlag=*/false,
8128                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8129           llvm::Value *Size = nullptr;
8130           // Do bitcopy of all non-overlapped structure elements.
8131           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8132                    Component : OverlappedElements) {
8133             Address ComponentLB = Address::invalid();
8134             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8135                  Component) {
8136               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8137                 const auto *FD = dyn_cast<FieldDecl>(VD);
8138                 if (FD && FD->getType()->isLValueReferenceType()) {
8139                   const auto *ME =
8140                       cast<MemberExpr>(MC.getAssociatedExpression());
8141                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8142                   ComponentLB =
8143                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8144                           .getAddress(CGF);
8145                 } else {
8146                   ComponentLB =
8147                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8148                           .getAddress(CGF);
8149                 }
8150                 Size = CGF.Builder.CreatePtrDiff(
8151                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8152                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8153                 break;
8154               }
8155             }
8156             assert(Size && "Failed to determine structure size");
8157             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8158             CombinedInfo.BasePointers.push_back(BP.getPointer());
8159             CombinedInfo.Pointers.push_back(LB.getPointer());
8160             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8161                 Size, CGF.Int64Ty, /*isSigned=*/true));
8162             CombinedInfo.Types.push_back(Flags);
8163             CombinedInfo.Mappers.push_back(nullptr);
8164             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8165                                                                       : 1);
8166             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8167           }
8168           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8169           CombinedInfo.BasePointers.push_back(BP.getPointer());
8170           CombinedInfo.Pointers.push_back(LB.getPointer());
8171           Size = CGF.Builder.CreatePtrDiff(
8172               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8173               CGF.EmitCastToVoidPtr(LB.getPointer()));
8174           CombinedInfo.Sizes.push_back(
8175               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8176           CombinedInfo.Types.push_back(Flags);
8177           CombinedInfo.Mappers.push_back(nullptr);
8178           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8179                                                                     : 1);
8180           break;
8181         }
8182         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8183         if (!IsMemberPointerOrAddr ||
8184             (Next == CE && MapType != OMPC_MAP_unknown)) {
8185           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8186           CombinedInfo.BasePointers.push_back(BP.getPointer());
8187           CombinedInfo.Pointers.push_back(LB.getPointer());
8188           CombinedInfo.Sizes.push_back(
8189               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8190           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8191                                                                     : 1);
8192 
8193           // If Mapper is valid, the last component inherits the mapper.
8194           bool HasMapper = Mapper && Next == CE;
8195           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8196 
8197           // We need to add a pointer flag for each map that comes from the
8198           // same expression except for the first one. We also need to signal
8199           // this map is the first one that relates with the current capture
8200           // (there is a set of entries for each capture).
8201           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8202               MapType, MapModifiers, MotionModifiers, IsImplicit,
8203               !IsExpressionFirstInfo || RequiresReference ||
8204                   FirstPointerInComplexData || IsMemberReference,
8205               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8206 
8207           if (!IsExpressionFirstInfo || IsMemberReference) {
8208             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8209             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8210             if (IsPointer || (IsMemberReference && Next != CE))
8211               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8212                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8213 
8214             if (ShouldBeMemberOf) {
8215               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8216               // should be later updated with the correct value of MEMBER_OF.
8217               Flags |= OMP_MAP_MEMBER_OF;
8218               // From now on, all subsequent PTR_AND_OBJ entries should not be
8219               // marked as MEMBER_OF.
8220               ShouldBeMemberOf = false;
8221             }
8222           }
8223 
8224           CombinedInfo.Types.push_back(Flags);
8225         }
8226 
8227         // If we have encountered a member expression so far, keep track of the
8228         // mapped member. If the parent is "*this", then the value declaration
8229         // is nullptr.
8230         if (EncounteredME) {
8231           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8232           unsigned FieldIndex = FD->getFieldIndex();
8233 
8234           // Update info about the lowest and highest elements for this struct
8235           if (!PartialStruct.Base.isValid()) {
8236             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8237             if (IsFinalArraySection) {
8238               Address HB =
8239                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8240                       .getAddress(CGF);
8241               PartialStruct.HighestElem = {FieldIndex, HB};
8242             } else {
8243               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8244             }
8245             PartialStruct.Base = BP;
8246             PartialStruct.LB = BP;
8247           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8248             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8249           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8250             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8251           }
8252         }
8253 
8254         // Need to emit combined struct for array sections.
8255         if (IsFinalArraySection || IsNonContiguous)
8256           PartialStruct.IsArraySection = true;
8257 
8258         // If we have a final array section, we are done with this expression.
8259         if (IsFinalArraySection)
8260           break;
8261 
8262         // The pointer becomes the base for the next element.
8263         if (Next != CE)
8264           BP = IsMemberReference ? LowestElem : LB;
8265 
8266         IsExpressionFirstInfo = false;
8267         IsCaptureFirstInfo = false;
8268         FirstPointerInComplexData = false;
8269         IsPrevMemberReference = IsMemberReference;
8270       } else if (FirstPointerInComplexData) {
8271         QualType Ty = Components.rbegin()
8272                           ->getAssociatedDeclaration()
8273                           ->getType()
8274                           .getNonReferenceType();
8275         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8276         FirstPointerInComplexData = false;
8277       }
8278     }
8279     // If ran into the whole component - allocate the space for the whole
8280     // record.
8281     if (!EncounteredME)
8282       PartialStruct.HasCompleteRecord = true;
8283 
8284     if (!IsNonContiguous)
8285       return;
8286 
8287     const ASTContext &Context = CGF.getContext();
8288 
8289     // For supporting stride in array section, we need to initialize the first
8290     // dimension size as 1, first offset as 0, and first count as 1
8291     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8292     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8293     MapValuesArrayTy CurStrides;
8294     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8295     uint64_t ElementTypeSize;
8296 
8297     // Collect Size information for each dimension and get the element size as
8298     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8299     // should be [10, 10] and the first stride is 4 btyes.
8300     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8301          Components) {
8302       const Expr *AssocExpr = Component.getAssociatedExpression();
8303       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8304 
8305       if (!OASE)
8306         continue;
8307 
8308       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8309       auto *CAT = Context.getAsConstantArrayType(Ty);
8310       auto *VAT = Context.getAsVariableArrayType(Ty);
8311 
8312       // We need all the dimension size except for the last dimension.
8313       assert((VAT || CAT || &Component == &*Components.begin()) &&
8314              "Should be either ConstantArray or VariableArray if not the "
8315              "first Component");
8316 
8317       // Get element size if CurStrides is empty.
8318       if (CurStrides.empty()) {
8319         const Type *ElementType = nullptr;
8320         if (CAT)
8321           ElementType = CAT->getElementType().getTypePtr();
8322         else if (VAT)
8323           ElementType = VAT->getElementType().getTypePtr();
8324         else
8325           assert(&Component == &*Components.begin() &&
8326                  "Only expect pointer (non CAT or VAT) when this is the "
8327                  "first Component");
8328         // If ElementType is null, then it means the base is a pointer
8329         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8330         // for next iteration.
8331         if (ElementType) {
8332           // For the case that having pointer as base, we need to remove one
8333           // level of indirection.
8334           if (&Component != &*Components.begin())
8335             ElementType = ElementType->getPointeeOrArrayElementType();
8336           ElementTypeSize =
8337               Context.getTypeSizeInChars(ElementType).getQuantity();
8338           CurStrides.push_back(
8339               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8340         }
8341       }
8342       // Get dimension value except for the last dimension since we don't need
8343       // it.
8344       if (DimSizes.size() < Components.size() - 1) {
8345         if (CAT)
8346           DimSizes.push_back(llvm::ConstantInt::get(
8347               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8348         else if (VAT)
8349           DimSizes.push_back(CGF.Builder.CreateIntCast(
8350               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8351               /*IsSigned=*/false));
8352       }
8353     }
8354 
8355     // Skip the dummy dimension since we have already have its information.
8356     auto DI = DimSizes.begin() + 1;
8357     // Product of dimension.
8358     llvm::Value *DimProd =
8359         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8360 
8361     // Collect info for non-contiguous. Notice that offset, count, and stride
8362     // are only meaningful for array-section, so we insert a null for anything
8363     // other than array-section.
8364     // Also, the size of offset, count, and stride are not the same as
8365     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8366     // count, and stride are the same as the number of non-contiguous
8367     // declaration in target update to/from clause.
8368     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8369          Components) {
8370       const Expr *AssocExpr = Component.getAssociatedExpression();
8371 
8372       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8373         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8374             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8375             /*isSigned=*/false);
8376         CurOffsets.push_back(Offset);
8377         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8378         CurStrides.push_back(CurStrides.back());
8379         continue;
8380       }
8381 
8382       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8383 
8384       if (!OASE)
8385         continue;
8386 
8387       // Offset
8388       const Expr *OffsetExpr = OASE->getLowerBound();
8389       llvm::Value *Offset = nullptr;
8390       if (!OffsetExpr) {
8391         // If offset is absent, then we just set it to zero.
8392         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8393       } else {
8394         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8395                                            CGF.Int64Ty,
8396                                            /*isSigned=*/false);
8397       }
8398       CurOffsets.push_back(Offset);
8399 
8400       // Count
8401       const Expr *CountExpr = OASE->getLength();
8402       llvm::Value *Count = nullptr;
8403       if (!CountExpr) {
8404         // In Clang, once a high dimension is an array section, we construct all
8405         // the lower dimension as array section, however, for case like
8406         // arr[0:2][2], Clang construct the inner dimension as an array section
8407         // but it actually is not in an array section form according to spec.
8408         if (!OASE->getColonLocFirst().isValid() &&
8409             !OASE->getColonLocSecond().isValid()) {
8410           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8411         } else {
8412           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8413           // When the length is absent it defaults to ⌈(size −
8414           // lower-bound)/stride⌉, where size is the size of the array
8415           // dimension.
8416           const Expr *StrideExpr = OASE->getStride();
8417           llvm::Value *Stride =
8418               StrideExpr
8419                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8420                                               CGF.Int64Ty, /*isSigned=*/false)
8421                   : nullptr;
8422           if (Stride)
8423             Count = CGF.Builder.CreateUDiv(
8424                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8425           else
8426             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8427         }
8428       } else {
8429         Count = CGF.EmitScalarExpr(CountExpr);
8430       }
8431       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8432       CurCounts.push_back(Count);
8433 
8434       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8435       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8436       //              Offset      Count     Stride
8437       //    D0          0           1         4    (int)    <- dummy dimension
8438       //    D1          0           2         8    (2 * (1) * 4)
8439       //    D2          1           2         20   (1 * (1 * 5) * 4)
8440       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8441       const Expr *StrideExpr = OASE->getStride();
8442       llvm::Value *Stride =
8443           StrideExpr
8444               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8445                                           CGF.Int64Ty, /*isSigned=*/false)
8446               : nullptr;
8447       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8448       if (Stride)
8449         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8450       else
8451         CurStrides.push_back(DimProd);
8452       if (DI != DimSizes.end())
8453         ++DI;
8454     }
8455 
8456     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8457     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8458     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8459   }
8460 
8461   /// Return the adjusted map modifiers if the declaration a capture refers to
8462   /// appears in a first-private clause. This is expected to be used only with
8463   /// directives that start with 'target'.
8464   MappableExprsHandler::OpenMPOffloadMappingFlags
8465   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8466     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8467 
8468     // A first private variable captured by reference will use only the
8469     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8470     // declaration is known as first-private in this handler.
8471     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8472       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8473         return MappableExprsHandler::OMP_MAP_TO |
8474                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8475       return MappableExprsHandler::OMP_MAP_PRIVATE |
8476              MappableExprsHandler::OMP_MAP_TO;
8477     }
8478     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8479     if (I != LambdasMap.end())
8480       // for map(to: lambda): using user specified map type.
8481       return getMapTypeBits(
8482           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8483           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8484           /*AddPtrFlag=*/false,
8485           /*AddIsTargetParamFlag=*/false,
8486           /*isNonContiguous=*/false);
8487     return MappableExprsHandler::OMP_MAP_TO |
8488            MappableExprsHandler::OMP_MAP_FROM;
8489   }
8490 
8491   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8492     // Rotate by getFlagMemberOffset() bits.
8493     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8494                                                   << getFlagMemberOffset());
8495   }
8496 
8497   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8498                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8499     // If the entry is PTR_AND_OBJ but has not been marked with the special
8500     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8501     // marked as MEMBER_OF.
8502     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8503         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8504       return;
8505 
8506     // Reset the placeholder value to prepare the flag for the assignment of the
8507     // proper MEMBER_OF value.
8508     Flags &= ~OMP_MAP_MEMBER_OF;
8509     Flags |= MemberOfFlag;
8510   }
8511 
8512   void getPlainLayout(const CXXRecordDecl *RD,
8513                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8514                       bool AsBase) const {
8515     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8516 
8517     llvm::StructType *St =
8518         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8519 
8520     unsigned NumElements = St->getNumElements();
8521     llvm::SmallVector<
8522         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8523         RecordLayout(NumElements);
8524 
8525     // Fill bases.
8526     for (const auto &I : RD->bases()) {
8527       if (I.isVirtual())
8528         continue;
8529       const auto *Base = I.getType()->getAsCXXRecordDecl();
8530       // Ignore empty bases.
8531       if (Base->isEmpty() || CGF.getContext()
8532                                  .getASTRecordLayout(Base)
8533                                  .getNonVirtualSize()
8534                                  .isZero())
8535         continue;
8536 
8537       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8538       RecordLayout[FieldIndex] = Base;
8539     }
8540     // Fill in virtual bases.
8541     for (const auto &I : RD->vbases()) {
8542       const auto *Base = I.getType()->getAsCXXRecordDecl();
8543       // Ignore empty bases.
8544       if (Base->isEmpty())
8545         continue;
8546       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8547       if (RecordLayout[FieldIndex])
8548         continue;
8549       RecordLayout[FieldIndex] = Base;
8550     }
8551     // Fill in all the fields.
8552     assert(!RD->isUnion() && "Unexpected union.");
8553     for (const auto *Field : RD->fields()) {
8554       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8555       // will fill in later.)
8556       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8557         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8558         RecordLayout[FieldIndex] = Field;
8559       }
8560     }
8561     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8562              &Data : RecordLayout) {
8563       if (Data.isNull())
8564         continue;
8565       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8566         getPlainLayout(Base, Layout, /*AsBase=*/true);
8567       else
8568         Layout.push_back(Data.get<const FieldDecl *>());
8569     }
8570   }
8571 
8572   /// Generate all the base pointers, section pointers, sizes, map types, and
8573   /// mappers for the extracted mappable expressions (all included in \a
8574   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8575   /// pair of the relevant declaration and index where it occurs is appended to
8576   /// the device pointers info array.
8577   void generateAllInfoForClauses(
8578       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8579       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8580           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8581     // We have to process the component lists that relate with the same
8582     // declaration in a single chunk so that we can generate the map flags
8583     // correctly. Therefore, we organize all lists in a map.
8584     enum MapKind { Present, Allocs, Other, Total };
8585     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8586                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8587         Info;
8588 
8589     // Helper function to fill the information map for the different supported
8590     // clauses.
8591     auto &&InfoGen =
8592         [&Info, &SkipVarSet](
8593             const ValueDecl *D, MapKind Kind,
8594             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8595             OpenMPMapClauseKind MapType,
8596             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8597             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8598             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8599             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8600           if (SkipVarSet.contains(D))
8601             return;
8602           auto It = Info.find(D);
8603           if (It == Info.end())
8604             It = Info
8605                      .insert(std::make_pair(
8606                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8607                      .first;
8608           It->second[Kind].emplace_back(
8609               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8610               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8611         };
8612 
8613     for (const auto *Cl : Clauses) {
8614       const auto *C = dyn_cast<OMPMapClause>(Cl);
8615       if (!C)
8616         continue;
8617       MapKind Kind = Other;
8618       if (llvm::is_contained(C->getMapTypeModifiers(),
8619                              OMPC_MAP_MODIFIER_present))
8620         Kind = Present;
8621       else if (C->getMapType() == OMPC_MAP_alloc)
8622         Kind = Allocs;
8623       const auto *EI = C->getVarRefs().begin();
8624       for (const auto L : C->component_lists()) {
8625         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8626         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8627                 C->getMapTypeModifiers(), llvm::None,
8628                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8629                 E);
8630         ++EI;
8631       }
8632     }
8633     for (const auto *Cl : Clauses) {
8634       const auto *C = dyn_cast<OMPToClause>(Cl);
8635       if (!C)
8636         continue;
8637       MapKind Kind = Other;
8638       if (llvm::is_contained(C->getMotionModifiers(),
8639                              OMPC_MOTION_MODIFIER_present))
8640         Kind = Present;
8641       const auto *EI = C->getVarRefs().begin();
8642       for (const auto L : C->component_lists()) {
8643         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8644                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8645                 C->isImplicit(), std::get<2>(L), *EI);
8646         ++EI;
8647       }
8648     }
8649     for (const auto *Cl : Clauses) {
8650       const auto *C = dyn_cast<OMPFromClause>(Cl);
8651       if (!C)
8652         continue;
8653       MapKind Kind = Other;
8654       if (llvm::is_contained(C->getMotionModifiers(),
8655                              OMPC_MOTION_MODIFIER_present))
8656         Kind = Present;
8657       const auto *EI = C->getVarRefs().begin();
8658       for (const auto L : C->component_lists()) {
8659         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8660                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8661                 C->isImplicit(), std::get<2>(L), *EI);
8662         ++EI;
8663       }
8664     }
8665 
8666     // Look at the use_device_ptr clause information and mark the existing map
8667     // entries as such. If there is no map information for an entry in the
8668     // use_device_ptr list, we create one with map type 'alloc' and zero size
8669     // section. It is the user fault if that was not mapped before. If there is
8670     // no map information and the pointer is a struct member, then we defer the
8671     // emission of that entry until the whole struct has been processed.
8672     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8673                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8674         DeferredInfo;
8675     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8676 
8677     for (const auto *Cl : Clauses) {
8678       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8679       if (!C)
8680         continue;
8681       for (const auto L : C->component_lists()) {
8682         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8683             std::get<1>(L);
8684         assert(!Components.empty() &&
8685                "Not expecting empty list of components!");
8686         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8687         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8688         const Expr *IE = Components.back().getAssociatedExpression();
8689         // If the first component is a member expression, we have to look into
8690         // 'this', which maps to null in the map of map information. Otherwise
8691         // look directly for the information.
8692         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8693 
8694         // We potentially have map information for this declaration already.
8695         // Look for the first set of components that refer to it.
8696         if (It != Info.end()) {
8697           bool Found = false;
8698           for (auto &Data : It->second) {
8699             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8700               return MI.Components.back().getAssociatedDeclaration() == VD;
8701             });
8702             // If we found a map entry, signal that the pointer has to be
8703             // returned and move on to the next declaration. Exclude cases where
8704             // the base pointer is mapped as array subscript, array section or
8705             // array shaping. The base address is passed as a pointer to base in
8706             // this case and cannot be used as a base for use_device_ptr list
8707             // item.
8708             if (CI != Data.end()) {
8709               auto PrevCI = std::next(CI->Components.rbegin());
8710               const auto *VarD = dyn_cast<VarDecl>(VD);
8711               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8712                   isa<MemberExpr>(IE) ||
8713                   !VD->getType().getNonReferenceType()->isPointerType() ||
8714                   PrevCI == CI->Components.rend() ||
8715                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8716                   VarD->hasLocalStorage()) {
8717                 CI->ReturnDevicePointer = true;
8718                 Found = true;
8719                 break;
8720               }
8721             }
8722           }
8723           if (Found)
8724             continue;
8725         }
8726 
8727         // We didn't find any match in our map information - generate a zero
8728         // size array section - if the pointer is a struct member we defer this
8729         // action until the whole struct has been processed.
8730         if (isa<MemberExpr>(IE)) {
8731           // Insert the pointer into Info to be processed by
8732           // generateInfoForComponentList. Because it is a member pointer
8733           // without a pointee, no entry will be generated for it, therefore
8734           // we need to generate one after the whole struct has been processed.
8735           // Nonetheless, generateInfoForComponentList must be called to take
8736           // the pointer into account for the calculation of the range of the
8737           // partial struct.
8738           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8739                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8740                   nullptr);
8741           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8742         } else {
8743           llvm::Value *Ptr =
8744               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8745           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8746           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8747           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8748           UseDevicePtrCombinedInfo.Sizes.push_back(
8749               llvm::Constant::getNullValue(CGF.Int64Ty));
8750           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8751           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8752         }
8753       }
8754     }
8755 
8756     // Look at the use_device_addr clause information and mark the existing map
8757     // entries as such. If there is no map information for an entry in the
8758     // use_device_addr list, we create one with map type 'alloc' and zero size
8759     // section. It is the user fault if that was not mapped before. If there is
8760     // no map information and the pointer is a struct member, then we defer the
8761     // emission of that entry until the whole struct has been processed.
8762     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8763     for (const auto *Cl : Clauses) {
8764       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8765       if (!C)
8766         continue;
8767       for (const auto L : C->component_lists()) {
8768         assert(!std::get<1>(L).empty() &&
8769                "Not expecting empty list of components!");
8770         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8771         if (!Processed.insert(VD).second)
8772           continue;
8773         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8774         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8775         // If the first component is a member expression, we have to look into
8776         // 'this', which maps to null in the map of map information. Otherwise
8777         // look directly for the information.
8778         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8779 
8780         // We potentially have map information for this declaration already.
8781         // Look for the first set of components that refer to it.
8782         if (It != Info.end()) {
8783           bool Found = false;
8784           for (auto &Data : It->second) {
8785             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8786               return MI.Components.back().getAssociatedDeclaration() == VD;
8787             });
8788             // If we found a map entry, signal that the pointer has to be
8789             // returned and move on to the next declaration.
8790             if (CI != Data.end()) {
8791               CI->ReturnDevicePointer = true;
8792               Found = true;
8793               break;
8794             }
8795           }
8796           if (Found)
8797             continue;
8798         }
8799 
8800         // We didn't find any match in our map information - generate a zero
8801         // size array section - if the pointer is a struct member we defer this
8802         // action until the whole struct has been processed.
8803         if (isa<MemberExpr>(IE)) {
8804           // Insert the pointer into Info to be processed by
8805           // generateInfoForComponentList. Because it is a member pointer
8806           // without a pointee, no entry will be generated for it, therefore
8807           // we need to generate one after the whole struct has been processed.
8808           // Nonetheless, generateInfoForComponentList must be called to take
8809           // the pointer into account for the calculation of the range of the
8810           // partial struct.
8811           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8812                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8813                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8814           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8815         } else {
8816           llvm::Value *Ptr;
8817           if (IE->isGLValue())
8818             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8819           else
8820             Ptr = CGF.EmitScalarExpr(IE);
8821           CombinedInfo.Exprs.push_back(VD);
8822           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8823           CombinedInfo.Pointers.push_back(Ptr);
8824           CombinedInfo.Sizes.push_back(
8825               llvm::Constant::getNullValue(CGF.Int64Ty));
8826           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8827           CombinedInfo.Mappers.push_back(nullptr);
8828         }
8829       }
8830     }
8831 
8832     for (const auto &Data : Info) {
8833       StructRangeInfoTy PartialStruct;
8834       // Temporary generated information.
8835       MapCombinedInfoTy CurInfo;
8836       const Decl *D = Data.first;
8837       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8838       for (const auto &M : Data.second) {
8839         for (const MapInfo &L : M) {
8840           assert(!L.Components.empty() &&
8841                  "Not expecting declaration with no component lists.");
8842 
8843           // Remember the current base pointer index.
8844           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8845           CurInfo.NonContigInfo.IsNonContiguous =
8846               L.Components.back().isNonContiguous();
8847           generateInfoForComponentList(
8848               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8849               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8850               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8851 
8852           // If this entry relates with a device pointer, set the relevant
8853           // declaration and add the 'return pointer' flag.
8854           if (L.ReturnDevicePointer) {
8855             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8856                    "Unexpected number of mapped base pointers.");
8857 
8858             const ValueDecl *RelevantVD =
8859                 L.Components.back().getAssociatedDeclaration();
8860             assert(RelevantVD &&
8861                    "No relevant declaration related with device pointer??");
8862 
8863             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8864                 RelevantVD);
8865             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8866           }
8867         }
8868       }
8869 
8870       // Append any pending zero-length pointers which are struct members and
8871       // used with use_device_ptr or use_device_addr.
8872       auto CI = DeferredInfo.find(Data.first);
8873       if (CI != DeferredInfo.end()) {
8874         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8875           llvm::Value *BasePtr;
8876           llvm::Value *Ptr;
8877           if (L.ForDeviceAddr) {
8878             if (L.IE->isGLValue())
8879               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8880             else
8881               Ptr = this->CGF.EmitScalarExpr(L.IE);
8882             BasePtr = Ptr;
8883             // Entry is RETURN_PARAM. Also, set the placeholder value
8884             // MEMBER_OF=FFFF so that the entry is later updated with the
8885             // correct value of MEMBER_OF.
8886             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8887           } else {
8888             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8889             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8890                                              L.IE->getExprLoc());
8891             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8892             // placeholder value MEMBER_OF=FFFF so that the entry is later
8893             // updated with the correct value of MEMBER_OF.
8894             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8895                                     OMP_MAP_MEMBER_OF);
8896           }
8897           CurInfo.Exprs.push_back(L.VD);
8898           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8899           CurInfo.Pointers.push_back(Ptr);
8900           CurInfo.Sizes.push_back(
8901               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8902           CurInfo.Mappers.push_back(nullptr);
8903         }
8904       }
8905       // If there is an entry in PartialStruct it means we have a struct with
8906       // individual members mapped. Emit an extra combined entry.
8907       if (PartialStruct.Base.isValid()) {
8908         CurInfo.NonContigInfo.Dims.push_back(0);
8909         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8910       }
8911 
8912       // We need to append the results of this capture to what we already
8913       // have.
8914       CombinedInfo.append(CurInfo);
8915     }
8916     // Append data for use_device_ptr clauses.
8917     CombinedInfo.append(UseDevicePtrCombinedInfo);
8918   }
8919 
8920 public:
8921   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8922       : CurDir(&Dir), CGF(CGF) {
8923     // Extract firstprivate clause information.
8924     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8925       for (const auto *D : C->varlists())
8926         FirstPrivateDecls.try_emplace(
8927             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8928     // Extract implicit firstprivates from uses_allocators clauses.
8929     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8930       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8931         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8932         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8933           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8934                                         /*Implicit=*/true);
8935         else if (const auto *VD = dyn_cast<VarDecl>(
8936                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8937                          ->getDecl()))
8938           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8939       }
8940     }
8941     // Extract device pointer clause information.
8942     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8943       for (auto L : C->component_lists())
8944         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8945     // Extract map information.
8946     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8947       if (C->getMapType() != OMPC_MAP_to)
8948         continue;
8949       for (auto L : C->component_lists()) {
8950         const ValueDecl *VD = std::get<0>(L);
8951         const auto *RD = VD ? VD->getType()
8952                                   .getCanonicalType()
8953                                   .getNonReferenceType()
8954                                   ->getAsCXXRecordDecl()
8955                             : nullptr;
8956         if (RD && RD->isLambda())
8957           LambdasMap.try_emplace(std::get<0>(L), C);
8958       }
8959     }
8960   }
8961 
8962   /// Constructor for the declare mapper directive.
8963   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8964       : CurDir(&Dir), CGF(CGF) {}
8965 
8966   /// Generate code for the combined entry if we have a partially mapped struct
8967   /// and take care of the mapping flags of the arguments corresponding to
8968   /// individual struct members.
8969   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8970                          MapFlagsArrayTy &CurTypes,
8971                          const StructRangeInfoTy &PartialStruct,
8972                          const ValueDecl *VD = nullptr,
8973                          bool NotTargetParams = true) const {
8974     if (CurTypes.size() == 1 &&
8975         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8976         !PartialStruct.IsArraySection)
8977       return;
8978     Address LBAddr = PartialStruct.LowestElem.second;
8979     Address HBAddr = PartialStruct.HighestElem.second;
8980     if (PartialStruct.HasCompleteRecord) {
8981       LBAddr = PartialStruct.LB;
8982       HBAddr = PartialStruct.LB;
8983     }
8984     CombinedInfo.Exprs.push_back(VD);
8985     // Base is the base of the struct
8986     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8987     // Pointer is the address of the lowest element
8988     llvm::Value *LB = LBAddr.getPointer();
8989     CombinedInfo.Pointers.push_back(LB);
8990     // There should not be a mapper for a combined entry.
8991     CombinedInfo.Mappers.push_back(nullptr);
8992     // Size is (addr of {highest+1} element) - (addr of lowest element)
8993     llvm::Value *HB = HBAddr.getPointer();
8994     llvm::Value *HAddr =
8995         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8996     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8997     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8998     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8999     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9000                                                   /*isSigned=*/false);
9001     CombinedInfo.Sizes.push_back(Size);
9002     // Map type is always TARGET_PARAM, if generate info for captures.
9003     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
9004                                                  : OMP_MAP_TARGET_PARAM);
9005     // If any element has the present modifier, then make sure the runtime
9006     // doesn't attempt to allocate the struct.
9007     if (CurTypes.end() !=
9008         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9009           return Type & OMP_MAP_PRESENT;
9010         }))
9011       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
9012     // Remove TARGET_PARAM flag from the first element
9013     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
9014     // If any element has the ompx_hold modifier, then make sure the runtime
9015     // uses the hold reference count for the struct as a whole so that it won't
9016     // be unmapped by an extra dynamic reference count decrement.  Add it to all
9017     // elements as well so the runtime knows which reference count to check
9018     // when determining whether it's time for device-to-host transfers of
9019     // individual elements.
9020     if (CurTypes.end() !=
9021         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9022           return Type & OMP_MAP_OMPX_HOLD;
9023         })) {
9024       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9025       for (auto &M : CurTypes)
9026         M |= OMP_MAP_OMPX_HOLD;
9027     }
9028 
9029     // All other current entries will be MEMBER_OF the combined entry
9030     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9031     // 0xFFFF in the MEMBER_OF field).
9032     OpenMPOffloadMappingFlags MemberOfFlag =
9033         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9034     for (auto &M : CurTypes)
9035       setCorrectMemberOfFlag(M, MemberOfFlag);
9036   }
9037 
9038   /// Generate all the base pointers, section pointers, sizes, map types, and
9039   /// mappers for the extracted mappable expressions (all included in \a
9040   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9041   /// pair of the relevant declaration and index where it occurs is appended to
9042   /// the device pointers info array.
9043   void generateAllInfo(
9044       MapCombinedInfoTy &CombinedInfo,
9045       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9046           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9047     assert(CurDir.is<const OMPExecutableDirective *>() &&
9048            "Expect a executable directive");
9049     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9050     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9051   }
9052 
9053   /// Generate all the base pointers, section pointers, sizes, map types, and
9054   /// mappers for the extracted map clauses of user-defined mapper (all included
9055   /// in \a CombinedInfo).
9056   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9057     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9058            "Expect a declare mapper directive");
9059     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9060     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9061   }
9062 
9063   /// Emit capture info for lambdas for variables captured by reference.
9064   void generateInfoForLambdaCaptures(
9065       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9066       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9067     const auto *RD = VD->getType()
9068                          .getCanonicalType()
9069                          .getNonReferenceType()
9070                          ->getAsCXXRecordDecl();
9071     if (!RD || !RD->isLambda())
9072       return;
9073     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9074     LValue VDLVal = CGF.MakeAddrLValue(
9075         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9076     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9077     FieldDecl *ThisCapture = nullptr;
9078     RD->getCaptureFields(Captures, ThisCapture);
9079     if (ThisCapture) {
9080       LValue ThisLVal =
9081           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9082       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9083       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9084                                  VDLVal.getPointer(CGF));
9085       CombinedInfo.Exprs.push_back(VD);
9086       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9087       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9088       CombinedInfo.Sizes.push_back(
9089           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9090                                     CGF.Int64Ty, /*isSigned=*/true));
9091       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9092                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9093       CombinedInfo.Mappers.push_back(nullptr);
9094     }
9095     for (const LambdaCapture &LC : RD->captures()) {
9096       if (!LC.capturesVariable())
9097         continue;
9098       const VarDecl *VD = LC.getCapturedVar();
9099       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9100         continue;
9101       auto It = Captures.find(VD);
9102       assert(It != Captures.end() && "Found lambda capture without field.");
9103       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9104       if (LC.getCaptureKind() == LCK_ByRef) {
9105         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9106         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9107                                    VDLVal.getPointer(CGF));
9108         CombinedInfo.Exprs.push_back(VD);
9109         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9110         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9111         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9112             CGF.getTypeSize(
9113                 VD->getType().getCanonicalType().getNonReferenceType()),
9114             CGF.Int64Ty, /*isSigned=*/true));
9115       } else {
9116         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9117         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9118                                    VDLVal.getPointer(CGF));
9119         CombinedInfo.Exprs.push_back(VD);
9120         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9121         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9122         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9123       }
9124       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9125                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9126       CombinedInfo.Mappers.push_back(nullptr);
9127     }
9128   }
9129 
9130   /// Set correct indices for lambdas captures.
9131   void adjustMemberOfForLambdaCaptures(
9132       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9133       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9134       MapFlagsArrayTy &Types) const {
9135     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9136       // Set correct member_of idx for all implicit lambda captures.
9137       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9138                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9139         continue;
9140       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9141       assert(BasePtr && "Unable to find base lambda address.");
9142       int TgtIdx = -1;
9143       for (unsigned J = I; J > 0; --J) {
9144         unsigned Idx = J - 1;
9145         if (Pointers[Idx] != BasePtr)
9146           continue;
9147         TgtIdx = Idx;
9148         break;
9149       }
9150       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9151       // All other current entries will be MEMBER_OF the combined entry
9152       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9153       // 0xFFFF in the MEMBER_OF field).
9154       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9155       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9156     }
9157   }
9158 
9159   /// Generate the base pointers, section pointers, sizes, map types, and
9160   /// mappers associated to a given capture (all included in \a CombinedInfo).
9161   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9162                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9163                               StructRangeInfoTy &PartialStruct) const {
9164     assert(!Cap->capturesVariableArrayType() &&
9165            "Not expecting to generate map info for a variable array type!");
9166 
9167     // We need to know when we generating information for the first component
9168     const ValueDecl *VD = Cap->capturesThis()
9169                               ? nullptr
9170                               : Cap->getCapturedVar()->getCanonicalDecl();
9171 
9172     // for map(to: lambda): skip here, processing it in
9173     // generateDefaultMapInfo
9174     if (LambdasMap.count(VD))
9175       return;
9176 
9177     // If this declaration appears in a is_device_ptr clause we just have to
9178     // pass the pointer by value. If it is a reference to a declaration, we just
9179     // pass its value.
9180     if (DevPointersMap.count(VD)) {
9181       CombinedInfo.Exprs.push_back(VD);
9182       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9183       CombinedInfo.Pointers.push_back(Arg);
9184       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9185           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9186           /*isSigned=*/true));
9187       CombinedInfo.Types.push_back(
9188           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9189           OMP_MAP_TARGET_PARAM);
9190       CombinedInfo.Mappers.push_back(nullptr);
9191       return;
9192     }
9193 
9194     using MapData =
9195         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9196                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9197                    const ValueDecl *, const Expr *>;
9198     SmallVector<MapData, 4> DeclComponentLists;
9199     assert(CurDir.is<const OMPExecutableDirective *>() &&
9200            "Expect a executable directive");
9201     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9202     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9203       const auto *EI = C->getVarRefs().begin();
9204       for (const auto L : C->decl_component_lists(VD)) {
9205         const ValueDecl *VDecl, *Mapper;
9206         // The Expression is not correct if the mapping is implicit
9207         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9208         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9209         std::tie(VDecl, Components, Mapper) = L;
9210         assert(VDecl == VD && "We got information for the wrong declaration??");
9211         assert(!Components.empty() &&
9212                "Not expecting declaration with no component lists.");
9213         DeclComponentLists.emplace_back(Components, C->getMapType(),
9214                                         C->getMapTypeModifiers(),
9215                                         C->isImplicit(), Mapper, E);
9216         ++EI;
9217       }
9218     }
9219     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9220                                              const MapData &RHS) {
9221       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9222       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9223       bool HasPresent =
9224           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9225       bool HasAllocs = MapType == OMPC_MAP_alloc;
9226       MapModifiers = std::get<2>(RHS);
9227       MapType = std::get<1>(LHS);
9228       bool HasPresentR =
9229           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9230       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9231       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9232     });
9233 
9234     // Find overlapping elements (including the offset from the base element).
9235     llvm::SmallDenseMap<
9236         const MapData *,
9237         llvm::SmallVector<
9238             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9239         4>
9240         OverlappedData;
9241     size_t Count = 0;
9242     for (const MapData &L : DeclComponentLists) {
9243       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9244       OpenMPMapClauseKind MapType;
9245       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9246       bool IsImplicit;
9247       const ValueDecl *Mapper;
9248       const Expr *VarRef;
9249       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9250           L;
9251       ++Count;
9252       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9253         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9254         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9255                  VarRef) = L1;
9256         auto CI = Components.rbegin();
9257         auto CE = Components.rend();
9258         auto SI = Components1.rbegin();
9259         auto SE = Components1.rend();
9260         for (; CI != CE && SI != SE; ++CI, ++SI) {
9261           if (CI->getAssociatedExpression()->getStmtClass() !=
9262               SI->getAssociatedExpression()->getStmtClass())
9263             break;
9264           // Are we dealing with different variables/fields?
9265           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9266             break;
9267         }
9268         // Found overlapping if, at least for one component, reached the head
9269         // of the components list.
9270         if (CI == CE || SI == SE) {
9271           // Ignore it if it is the same component.
9272           if (CI == CE && SI == SE)
9273             continue;
9274           const auto It = (SI == SE) ? CI : SI;
9275           // If one component is a pointer and another one is a kind of
9276           // dereference of this pointer (array subscript, section, dereference,
9277           // etc.), it is not an overlapping.
9278           // Same, if one component is a base and another component is a
9279           // dereferenced pointer memberexpr with the same base.
9280           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9281               (std::prev(It)->getAssociatedDeclaration() &&
9282                std::prev(It)
9283                    ->getAssociatedDeclaration()
9284                    ->getType()
9285                    ->isPointerType()) ||
9286               (It->getAssociatedDeclaration() &&
9287                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9288                std::next(It) != CE && std::next(It) != SE))
9289             continue;
9290           const MapData &BaseData = CI == CE ? L : L1;
9291           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9292               SI == SE ? Components : Components1;
9293           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9294           OverlappedElements.getSecond().push_back(SubData);
9295         }
9296       }
9297     }
9298     // Sort the overlapped elements for each item.
9299     llvm::SmallVector<const FieldDecl *, 4> Layout;
9300     if (!OverlappedData.empty()) {
9301       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9302       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9303       while (BaseType != OrigType) {
9304         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9305         OrigType = BaseType->getPointeeOrArrayElementType();
9306       }
9307 
9308       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9309         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9310       else {
9311         const auto *RD = BaseType->getAsRecordDecl();
9312         Layout.append(RD->field_begin(), RD->field_end());
9313       }
9314     }
9315     for (auto &Pair : OverlappedData) {
9316       llvm::stable_sort(
9317           Pair.getSecond(),
9318           [&Layout](
9319               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9320               OMPClauseMappableExprCommon::MappableExprComponentListRef
9321                   Second) {
9322             auto CI = First.rbegin();
9323             auto CE = First.rend();
9324             auto SI = Second.rbegin();
9325             auto SE = Second.rend();
9326             for (; CI != CE && SI != SE; ++CI, ++SI) {
9327               if (CI->getAssociatedExpression()->getStmtClass() !=
9328                   SI->getAssociatedExpression()->getStmtClass())
9329                 break;
9330               // Are we dealing with different variables/fields?
9331               if (CI->getAssociatedDeclaration() !=
9332                   SI->getAssociatedDeclaration())
9333                 break;
9334             }
9335 
9336             // Lists contain the same elements.
9337             if (CI == CE && SI == SE)
9338               return false;
9339 
9340             // List with less elements is less than list with more elements.
9341             if (CI == CE || SI == SE)
9342               return CI == CE;
9343 
9344             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9345             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9346             if (FD1->getParent() == FD2->getParent())
9347               return FD1->getFieldIndex() < FD2->getFieldIndex();
9348             const auto *It =
9349                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9350                   return FD == FD1 || FD == FD2;
9351                 });
9352             return *It == FD1;
9353           });
9354     }
9355 
9356     // Associated with a capture, because the mapping flags depend on it.
9357     // Go through all of the elements with the overlapped elements.
9358     bool IsFirstComponentList = true;
9359     for (const auto &Pair : OverlappedData) {
9360       const MapData &L = *Pair.getFirst();
9361       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9362       OpenMPMapClauseKind MapType;
9363       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9364       bool IsImplicit;
9365       const ValueDecl *Mapper;
9366       const Expr *VarRef;
9367       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9368           L;
9369       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9370           OverlappedComponents = Pair.getSecond();
9371       generateInfoForComponentList(
9372           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9373           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9374           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9375       IsFirstComponentList = false;
9376     }
9377     // Go through other elements without overlapped elements.
9378     for (const MapData &L : DeclComponentLists) {
9379       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9380       OpenMPMapClauseKind MapType;
9381       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9382       bool IsImplicit;
9383       const ValueDecl *Mapper;
9384       const Expr *VarRef;
9385       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9386           L;
9387       auto It = OverlappedData.find(&L);
9388       if (It == OverlappedData.end())
9389         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9390                                      Components, CombinedInfo, PartialStruct,
9391                                      IsFirstComponentList, IsImplicit, Mapper,
9392                                      /*ForDeviceAddr=*/false, VD, VarRef);
9393       IsFirstComponentList = false;
9394     }
9395   }
9396 
9397   /// Generate the default map information for a given capture \a CI,
9398   /// record field declaration \a RI and captured value \a CV.
9399   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9400                               const FieldDecl &RI, llvm::Value *CV,
9401                               MapCombinedInfoTy &CombinedInfo) const {
9402     bool IsImplicit = true;
9403     // Do the default mapping.
9404     if (CI.capturesThis()) {
9405       CombinedInfo.Exprs.push_back(nullptr);
9406       CombinedInfo.BasePointers.push_back(CV);
9407       CombinedInfo.Pointers.push_back(CV);
9408       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9409       CombinedInfo.Sizes.push_back(
9410           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9411                                     CGF.Int64Ty, /*isSigned=*/true));
9412       // Default map type.
9413       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9414     } else if (CI.capturesVariableByCopy()) {
9415       const VarDecl *VD = CI.getCapturedVar();
9416       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9417       CombinedInfo.BasePointers.push_back(CV);
9418       CombinedInfo.Pointers.push_back(CV);
9419       if (!RI.getType()->isAnyPointerType()) {
9420         // We have to signal to the runtime captures passed by value that are
9421         // not pointers.
9422         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9423         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9424             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9425       } else {
9426         // Pointers are implicitly mapped with a zero size and no flags
9427         // (other than first map that is added for all implicit maps).
9428         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9429         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9430       }
9431       auto I = FirstPrivateDecls.find(VD);
9432       if (I != FirstPrivateDecls.end())
9433         IsImplicit = I->getSecond();
9434     } else {
9435       assert(CI.capturesVariable() && "Expected captured reference.");
9436       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9437       QualType ElementType = PtrTy->getPointeeType();
9438       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9439           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9440       // The default map type for a scalar/complex type is 'to' because by
9441       // default the value doesn't have to be retrieved. For an aggregate
9442       // type, the default is 'tofrom'.
9443       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9444       const VarDecl *VD = CI.getCapturedVar();
9445       auto I = FirstPrivateDecls.find(VD);
9446       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9447       CombinedInfo.BasePointers.push_back(CV);
9448       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9449         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9450             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9451             AlignmentSource::Decl));
9452         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9453       } else {
9454         CombinedInfo.Pointers.push_back(CV);
9455       }
9456       if (I != FirstPrivateDecls.end())
9457         IsImplicit = I->getSecond();
9458     }
9459     // Every default map produces a single argument which is a target parameter.
9460     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9461 
9462     // Add flag stating this is an implicit map.
9463     if (IsImplicit)
9464       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9465 
9466     // No user-defined mapper for default mapping.
9467     CombinedInfo.Mappers.push_back(nullptr);
9468   }
9469 };
9470 } // anonymous namespace
9471 
9472 static void emitNonContiguousDescriptor(
9473     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9474     CGOpenMPRuntime::TargetDataInfo &Info) {
9475   CodeGenModule &CGM = CGF.CGM;
9476   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9477       &NonContigInfo = CombinedInfo.NonContigInfo;
9478 
9479   // Build an array of struct descriptor_dim and then assign it to
9480   // offload_args.
9481   //
9482   // struct descriptor_dim {
9483   //  uint64_t offset;
9484   //  uint64_t count;
9485   //  uint64_t stride
9486   // };
9487   ASTContext &C = CGF.getContext();
9488   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9489   RecordDecl *RD;
9490   RD = C.buildImplicitRecord("descriptor_dim");
9491   RD->startDefinition();
9492   addFieldToRecordDecl(C, RD, Int64Ty);
9493   addFieldToRecordDecl(C, RD, Int64Ty);
9494   addFieldToRecordDecl(C, RD, Int64Ty);
9495   RD->completeDefinition();
9496   QualType DimTy = C.getRecordType(RD);
9497 
9498   enum { OffsetFD = 0, CountFD, StrideFD };
9499   // We need two index variable here since the size of "Dims" is the same as the
9500   // size of Components, however, the size of offset, count, and stride is equal
9501   // to the size of base declaration that is non-contiguous.
9502   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9503     // Skip emitting ir if dimension size is 1 since it cannot be
9504     // non-contiguous.
9505     if (NonContigInfo.Dims[I] == 1)
9506       continue;
9507     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9508     QualType ArrayTy =
9509         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9510     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9511     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9512       unsigned RevIdx = EE - II - 1;
9513       LValue DimsLVal = CGF.MakeAddrLValue(
9514           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9515       // Offset
9516       LValue OffsetLVal = CGF.EmitLValueForField(
9517           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9518       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9519       // Count
9520       LValue CountLVal = CGF.EmitLValueForField(
9521           DimsLVal, *std::next(RD->field_begin(), CountFD));
9522       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9523       // Stride
9524       LValue StrideLVal = CGF.EmitLValueForField(
9525           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9526       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9527     }
9528     // args[I] = &dims
9529     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9530         DimsAddr, CGM.Int8PtrTy);
9531     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9532         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9533         Info.PointersArray, 0, I);
9534     Address PAddr(P, CGF.getPointerAlign());
9535     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9536     ++L;
9537   }
9538 }
9539 
9540 // Try to extract the base declaration from a `this->x` expression if possible.
9541 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9542   if (!E)
9543     return nullptr;
9544 
9545   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9546     if (const MemberExpr *ME =
9547             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9548       return ME->getMemberDecl();
9549   return nullptr;
9550 }
9551 
9552 /// Emit a string constant containing the names of the values mapped to the
9553 /// offloading runtime library.
9554 llvm::Constant *
9555 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9556                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9557 
9558   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9559     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9560 
9561   SourceLocation Loc;
9562   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9563     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9564       Loc = VD->getLocation();
9565     else
9566       Loc = MapExprs.getMapExpr()->getExprLoc();
9567   } else {
9568     Loc = MapExprs.getMapDecl()->getLocation();
9569   }
9570 
9571   std::string ExprName = "";
9572   if (MapExprs.getMapExpr()) {
9573     PrintingPolicy P(CGF.getContext().getLangOpts());
9574     llvm::raw_string_ostream OS(ExprName);
9575     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9576     OS.flush();
9577   } else {
9578     ExprName = MapExprs.getMapDecl()->getNameAsString();
9579   }
9580 
9581   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9582   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9583                                          PLoc.getLine(), PLoc.getColumn());
9584 }
9585 
9586 /// Emit the arrays used to pass the captures and map information to the
9587 /// offloading runtime library. If there is no map or capture information,
9588 /// return nullptr by reference.
9589 static void emitOffloadingArrays(
9590     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9591     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9592     bool IsNonContiguous = false) {
9593   CodeGenModule &CGM = CGF.CGM;
9594   ASTContext &Ctx = CGF.getContext();
9595 
9596   // Reset the array information.
9597   Info.clearArrayInfo();
9598   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9599 
9600   if (Info.NumberOfPtrs) {
9601     // Detect if we have any capture size requiring runtime evaluation of the
9602     // size so that a constant array could be eventually used.
9603     bool hasRuntimeEvaluationCaptureSize = false;
9604     for (llvm::Value *S : CombinedInfo.Sizes)
9605       if (!isa<llvm::Constant>(S)) {
9606         hasRuntimeEvaluationCaptureSize = true;
9607         break;
9608       }
9609 
9610     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9611     QualType PointerArrayType = Ctx.getConstantArrayType(
9612         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9613         /*IndexTypeQuals=*/0);
9614 
9615     Info.BasePointersArray =
9616         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9617     Info.PointersArray =
9618         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9619     Address MappersArray =
9620         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9621     Info.MappersArray = MappersArray.getPointer();
9622 
9623     // If we don't have any VLA types or other types that require runtime
9624     // evaluation, we can use a constant array for the map sizes, otherwise we
9625     // need to fill up the arrays as we do for the pointers.
9626     QualType Int64Ty =
9627         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9628     if (hasRuntimeEvaluationCaptureSize) {
9629       QualType SizeArrayType = Ctx.getConstantArrayType(
9630           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9631           /*IndexTypeQuals=*/0);
9632       Info.SizesArray =
9633           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9634     } else {
9635       // We expect all the sizes to be constant, so we collect them to create
9636       // a constant array.
9637       SmallVector<llvm::Constant *, 16> ConstSizes;
9638       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9639         if (IsNonContiguous &&
9640             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9641           ConstSizes.push_back(llvm::ConstantInt::get(
9642               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9643         } else {
9644           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9645         }
9646       }
9647 
9648       auto *SizesArrayInit = llvm::ConstantArray::get(
9649           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9650       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9651       auto *SizesArrayGbl = new llvm::GlobalVariable(
9652           CGM.getModule(), SizesArrayInit->getType(),
9653           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9654           SizesArrayInit, Name);
9655       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9656       Info.SizesArray = SizesArrayGbl;
9657     }
9658 
9659     // The map types are always constant so we don't need to generate code to
9660     // fill arrays. Instead, we create an array constant.
9661     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9662     llvm::copy(CombinedInfo.Types, Mapping.begin());
9663     std::string MaptypesName =
9664         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9665     auto *MapTypesArrayGbl =
9666         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9667     Info.MapTypesArray = MapTypesArrayGbl;
9668 
9669     // The information types are only built if there is debug information
9670     // requested.
9671     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9672       Info.MapNamesArray = llvm::Constant::getNullValue(
9673           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9674     } else {
9675       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9676         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9677       };
9678       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9679       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9680       std::string MapnamesName =
9681           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9682       auto *MapNamesArrayGbl =
9683           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9684       Info.MapNamesArray = MapNamesArrayGbl;
9685     }
9686 
9687     // If there's a present map type modifier, it must not be applied to the end
9688     // of a region, so generate a separate map type array in that case.
9689     if (Info.separateBeginEndCalls()) {
9690       bool EndMapTypesDiffer = false;
9691       for (uint64_t &Type : Mapping) {
9692         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9693           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9694           EndMapTypesDiffer = true;
9695         }
9696       }
9697       if (EndMapTypesDiffer) {
9698         MapTypesArrayGbl =
9699             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9700         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9701       }
9702     }
9703 
9704     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9705       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9706       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9707           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9708           Info.BasePointersArray, 0, I);
9709       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9710           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9711       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9712       CGF.Builder.CreateStore(BPVal, BPAddr);
9713 
9714       if (Info.requiresDevicePointerInfo())
9715         if (const ValueDecl *DevVD =
9716                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9717           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9718 
9719       llvm::Value *PVal = CombinedInfo.Pointers[I];
9720       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9721           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9722           Info.PointersArray, 0, I);
9723       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9724           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9725       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9726       CGF.Builder.CreateStore(PVal, PAddr);
9727 
9728       if (hasRuntimeEvaluationCaptureSize) {
9729         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9730             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9731             Info.SizesArray,
9732             /*Idx0=*/0,
9733             /*Idx1=*/I);
9734         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9735         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9736                                                           CGM.Int64Ty,
9737                                                           /*isSigned=*/true),
9738                                 SAddr);
9739       }
9740 
9741       // Fill up the mapper array.
9742       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9743       if (CombinedInfo.Mappers[I]) {
9744         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9745             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9746         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9747         Info.HasMapper = true;
9748       }
9749       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9750       CGF.Builder.CreateStore(MFunc, MAddr);
9751     }
9752   }
9753 
9754   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9755       Info.NumberOfPtrs == 0)
9756     return;
9757 
9758   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9759 }
9760 
9761 namespace {
9762 /// Additional arguments for emitOffloadingArraysArgument function.
9763 struct ArgumentsOptions {
9764   bool ForEndCall = false;
9765   ArgumentsOptions() = default;
9766   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9767 };
9768 } // namespace
9769 
9770 /// Emit the arguments to be passed to the runtime library based on the
9771 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9772 /// ForEndCall, emit map types to be passed for the end of the region instead of
9773 /// the beginning.
9774 static void emitOffloadingArraysArgument(
9775     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9776     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9777     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9778     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9779     const ArgumentsOptions &Options = ArgumentsOptions()) {
9780   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9781          "expected region end call to runtime only when end call is separate");
9782   CodeGenModule &CGM = CGF.CGM;
9783   if (Info.NumberOfPtrs) {
9784     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9785         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9786         Info.BasePointersArray,
9787         /*Idx0=*/0, /*Idx1=*/0);
9788     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9789         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9790         Info.PointersArray,
9791         /*Idx0=*/0,
9792         /*Idx1=*/0);
9793     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9794         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9795         /*Idx0=*/0, /*Idx1=*/0);
9796     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9797         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9798         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9799                                                     : Info.MapTypesArray,
9800         /*Idx0=*/0,
9801         /*Idx1=*/0);
9802 
9803     // Only emit the mapper information arrays if debug information is
9804     // requested.
9805     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9806       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9807     else
9808       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9809           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9810           Info.MapNamesArray,
9811           /*Idx0=*/0,
9812           /*Idx1=*/0);
9813     // If there is no user-defined mapper, set the mapper array to nullptr to
9814     // avoid an unnecessary data privatization
9815     if (!Info.HasMapper)
9816       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9817     else
9818       MappersArrayArg =
9819           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9820   } else {
9821     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9822     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9823     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9824     MapTypesArrayArg =
9825         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9826     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9827     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9828   }
9829 }
9830 
9831 /// Check for inner distribute directive.
9832 static const OMPExecutableDirective *
9833 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9834   const auto *CS = D.getInnermostCapturedStmt();
9835   const auto *Body =
9836       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9837   const Stmt *ChildStmt =
9838       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9839 
9840   if (const auto *NestedDir =
9841           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9842     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9843     switch (D.getDirectiveKind()) {
9844     case OMPD_target:
9845       if (isOpenMPDistributeDirective(DKind))
9846         return NestedDir;
9847       if (DKind == OMPD_teams) {
9848         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9849             /*IgnoreCaptured=*/true);
9850         if (!Body)
9851           return nullptr;
9852         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9853         if (const auto *NND =
9854                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9855           DKind = NND->getDirectiveKind();
9856           if (isOpenMPDistributeDirective(DKind))
9857             return NND;
9858         }
9859       }
9860       return nullptr;
9861     case OMPD_target_teams:
9862       if (isOpenMPDistributeDirective(DKind))
9863         return NestedDir;
9864       return nullptr;
9865     case OMPD_target_parallel:
9866     case OMPD_target_simd:
9867     case OMPD_target_parallel_for:
9868     case OMPD_target_parallel_for_simd:
9869       return nullptr;
9870     case OMPD_target_teams_distribute:
9871     case OMPD_target_teams_distribute_simd:
9872     case OMPD_target_teams_distribute_parallel_for:
9873     case OMPD_target_teams_distribute_parallel_for_simd:
9874     case OMPD_parallel:
9875     case OMPD_for:
9876     case OMPD_parallel_for:
9877     case OMPD_parallel_master:
9878     case OMPD_parallel_sections:
9879     case OMPD_for_simd:
9880     case OMPD_parallel_for_simd:
9881     case OMPD_cancel:
9882     case OMPD_cancellation_point:
9883     case OMPD_ordered:
9884     case OMPD_threadprivate:
9885     case OMPD_allocate:
9886     case OMPD_task:
9887     case OMPD_simd:
9888     case OMPD_tile:
9889     case OMPD_unroll:
9890     case OMPD_sections:
9891     case OMPD_section:
9892     case OMPD_single:
9893     case OMPD_master:
9894     case OMPD_critical:
9895     case OMPD_taskyield:
9896     case OMPD_barrier:
9897     case OMPD_taskwait:
9898     case OMPD_taskgroup:
9899     case OMPD_atomic:
9900     case OMPD_flush:
9901     case OMPD_depobj:
9902     case OMPD_scan:
9903     case OMPD_teams:
9904     case OMPD_target_data:
9905     case OMPD_target_exit_data:
9906     case OMPD_target_enter_data:
9907     case OMPD_distribute:
9908     case OMPD_distribute_simd:
9909     case OMPD_distribute_parallel_for:
9910     case OMPD_distribute_parallel_for_simd:
9911     case OMPD_teams_distribute:
9912     case OMPD_teams_distribute_simd:
9913     case OMPD_teams_distribute_parallel_for:
9914     case OMPD_teams_distribute_parallel_for_simd:
9915     case OMPD_target_update:
9916     case OMPD_declare_simd:
9917     case OMPD_declare_variant:
9918     case OMPD_begin_declare_variant:
9919     case OMPD_end_declare_variant:
9920     case OMPD_declare_target:
9921     case OMPD_end_declare_target:
9922     case OMPD_declare_reduction:
9923     case OMPD_declare_mapper:
9924     case OMPD_taskloop:
9925     case OMPD_taskloop_simd:
9926     case OMPD_master_taskloop:
9927     case OMPD_master_taskloop_simd:
9928     case OMPD_parallel_master_taskloop:
9929     case OMPD_parallel_master_taskloop_simd:
9930     case OMPD_requires:
9931     case OMPD_metadirective:
9932     case OMPD_unknown:
9933     default:
9934       llvm_unreachable("Unexpected directive.");
9935     }
9936   }
9937 
9938   return nullptr;
9939 }
9940 
9941 /// Emit the user-defined mapper function. The code generation follows the
9942 /// pattern in the example below.
9943 /// \code
9944 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9945 ///                                           void *base, void *begin,
9946 ///                                           int64_t size, int64_t type,
9947 ///                                           void *name = nullptr) {
9948 ///   // Allocate space for an array section first or add a base/begin for
9949 ///   // pointer dereference.
9950 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9951 ///       !maptype.IsDelete)
9952 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9953 ///                                 size*sizeof(Ty), clearToFromMember(type));
9954 ///   // Map members.
9955 ///   for (unsigned i = 0; i < size; i++) {
9956 ///     // For each component specified by this mapper:
9957 ///     for (auto c : begin[i]->all_components) {
9958 ///       if (c.hasMapper())
9959 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9960 ///                       c.arg_type, c.arg_name);
9961 ///       else
9962 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9963 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9964 ///                                     c.arg_name);
9965 ///     }
9966 ///   }
9967 ///   // Delete the array section.
9968 ///   if (size > 1 && maptype.IsDelete)
9969 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9970 ///                                 size*sizeof(Ty), clearToFromMember(type));
9971 /// }
9972 /// \endcode
9973 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9974                                             CodeGenFunction *CGF) {
9975   if (UDMMap.count(D) > 0)
9976     return;
9977   ASTContext &C = CGM.getContext();
9978   QualType Ty = D->getType();
9979   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9980   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9981   auto *MapperVarDecl =
9982       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9983   SourceLocation Loc = D->getLocation();
9984   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9985 
9986   // Prepare mapper function arguments and attributes.
9987   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9988                               C.VoidPtrTy, ImplicitParamDecl::Other);
9989   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9990                             ImplicitParamDecl::Other);
9991   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9992                              C.VoidPtrTy, ImplicitParamDecl::Other);
9993   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9994                             ImplicitParamDecl::Other);
9995   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9996                             ImplicitParamDecl::Other);
9997   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9998                             ImplicitParamDecl::Other);
9999   FunctionArgList Args;
10000   Args.push_back(&HandleArg);
10001   Args.push_back(&BaseArg);
10002   Args.push_back(&BeginArg);
10003   Args.push_back(&SizeArg);
10004   Args.push_back(&TypeArg);
10005   Args.push_back(&NameArg);
10006   const CGFunctionInfo &FnInfo =
10007       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
10008   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
10009   SmallString<64> TyStr;
10010   llvm::raw_svector_ostream Out(TyStr);
10011   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
10012   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10013   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
10014                                     Name, &CGM.getModule());
10015   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
10016   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
10017   // Start the mapper function code generation.
10018   CodeGenFunction MapperCGF(CGM);
10019   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10020   // Compute the starting and end addresses of array elements.
10021   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10022       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10023       C.getPointerType(Int64Ty), Loc);
10024   // Prepare common arguments for array initiation and deletion.
10025   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10026       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10027       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10028   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10029       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10030       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10031   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10032       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10033       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10034   // Convert the size in bytes into the number of array elements.
10035   Size = MapperCGF.Builder.CreateExactUDiv(
10036       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10037   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10038       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10039   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10040       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10041   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10042       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10043       C.getPointerType(Int64Ty), Loc);
10044   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10045       MapperCGF.GetAddrOfLocalVar(&NameArg),
10046       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10047 
10048   // Emit array initiation if this is an array section and \p MapType indicates
10049   // that memory allocation is required.
10050   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10051   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10052                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10053 
10054   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10055 
10056   // Emit the loop header block.
10057   MapperCGF.EmitBlock(HeadBB);
10058   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10059   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10060   // Evaluate whether the initial condition is satisfied.
10061   llvm::Value *IsEmpty =
10062       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10063   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10064   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10065 
10066   // Emit the loop body block.
10067   MapperCGF.EmitBlock(BodyBB);
10068   llvm::BasicBlock *LastBB = BodyBB;
10069   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10070       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10071   PtrPHI->addIncoming(PtrBegin, EntryBB);
10072   Address PtrCurrent =
10073       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10074                           .getAlignment()
10075                           .alignmentOfArrayElement(ElementSize));
10076   // Privatize the declared variable of mapper to be the current array element.
10077   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10078   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10079   (void)Scope.Privatize();
10080 
10081   // Get map clause information. Fill up the arrays with all mapped variables.
10082   MappableExprsHandler::MapCombinedInfoTy Info;
10083   MappableExprsHandler MEHandler(*D, MapperCGF);
10084   MEHandler.generateAllInfoForMapper(Info);
10085 
10086   // Call the runtime API __tgt_mapper_num_components to get the number of
10087   // pre-existing components.
10088   llvm::Value *OffloadingArgs[] = {Handle};
10089   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10090       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10091                                             OMPRTL___tgt_mapper_num_components),
10092       OffloadingArgs);
10093   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10094       PreviousSize,
10095       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10096 
10097   // Fill up the runtime mapper handle for all components.
10098   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10099     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10100         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10101     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10102         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10103     llvm::Value *CurSizeArg = Info.Sizes[I];
10104     llvm::Value *CurNameArg =
10105         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10106             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10107             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10108 
10109     // Extract the MEMBER_OF field from the map type.
10110     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10111     llvm::Value *MemberMapType =
10112         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10113 
10114     // Combine the map type inherited from user-defined mapper with that
10115     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10116     // bits of the \a MapType, which is the input argument of the mapper
10117     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10118     // bits of MemberMapType.
10119     // [OpenMP 5.0], 1.2.6. map-type decay.
10120     //        | alloc |  to   | from  | tofrom | release | delete
10121     // ----------------------------------------------------------
10122     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10123     // to     | alloc |  to   | alloc |   to   | release | delete
10124     // from   | alloc | alloc | from  |  from  | release | delete
10125     // tofrom | alloc |  to   | from  | tofrom | release | delete
10126     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10127         MapType,
10128         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10129                                    MappableExprsHandler::OMP_MAP_FROM));
10130     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10131     llvm::BasicBlock *AllocElseBB =
10132         MapperCGF.createBasicBlock("omp.type.alloc.else");
10133     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10134     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10135     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10136     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10137     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10138     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10139     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10140     MapperCGF.EmitBlock(AllocBB);
10141     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10142         MemberMapType,
10143         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10144                                      MappableExprsHandler::OMP_MAP_FROM)));
10145     MapperCGF.Builder.CreateBr(EndBB);
10146     MapperCGF.EmitBlock(AllocElseBB);
10147     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10148         LeftToFrom,
10149         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10150     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10151     // In case of to, clear OMP_MAP_FROM.
10152     MapperCGF.EmitBlock(ToBB);
10153     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10154         MemberMapType,
10155         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10156     MapperCGF.Builder.CreateBr(EndBB);
10157     MapperCGF.EmitBlock(ToElseBB);
10158     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10159         LeftToFrom,
10160         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10161     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10162     // In case of from, clear OMP_MAP_TO.
10163     MapperCGF.EmitBlock(FromBB);
10164     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10165         MemberMapType,
10166         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10167     // In case of tofrom, do nothing.
10168     MapperCGF.EmitBlock(EndBB);
10169     LastBB = EndBB;
10170     llvm::PHINode *CurMapType =
10171         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10172     CurMapType->addIncoming(AllocMapType, AllocBB);
10173     CurMapType->addIncoming(ToMapType, ToBB);
10174     CurMapType->addIncoming(FromMapType, FromBB);
10175     CurMapType->addIncoming(MemberMapType, ToElseBB);
10176 
10177     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10178                                      CurSizeArg, CurMapType, CurNameArg};
10179     if (Info.Mappers[I]) {
10180       // Call the corresponding mapper function.
10181       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10182           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10183       assert(MapperFunc && "Expect a valid mapper function is available.");
10184       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10185     } else {
10186       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10187       // data structure.
10188       MapperCGF.EmitRuntimeCall(
10189           OMPBuilder.getOrCreateRuntimeFunction(
10190               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10191           OffloadingArgs);
10192     }
10193   }
10194 
10195   // Update the pointer to point to the next element that needs to be mapped,
10196   // and check whether we have mapped all elements.
10197   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10198   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10199       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10200   PtrPHI->addIncoming(PtrNext, LastBB);
10201   llvm::Value *IsDone =
10202       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10203   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10204   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10205 
10206   MapperCGF.EmitBlock(ExitBB);
10207   // Emit array deletion if this is an array section and \p MapType indicates
10208   // that deletion is required.
10209   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10210                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10211 
10212   // Emit the function exit block.
10213   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10214   MapperCGF.FinishFunction();
10215   UDMMap.try_emplace(D, Fn);
10216   if (CGF) {
10217     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10218     Decls.second.push_back(D);
10219   }
10220 }
10221 
10222 /// Emit the array initialization or deletion portion for user-defined mapper
10223 /// code generation. First, it evaluates whether an array section is mapped and
10224 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10225 /// true, and \a MapType indicates to not delete this array, array
10226 /// initialization code is generated. If \a IsInit is false, and \a MapType
10227 /// indicates to not this array, array deletion code is generated.
10228 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10229     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10230     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10231     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10232     bool IsInit) {
10233   StringRef Prefix = IsInit ? ".init" : ".del";
10234 
10235   // Evaluate if this is an array section.
10236   llvm::BasicBlock *BodyBB =
10237       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10238   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10239       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10240   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10241       MapType,
10242       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10243   llvm::Value *DeleteCond;
10244   llvm::Value *Cond;
10245   if (IsInit) {
10246     // base != begin?
10247     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10248         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10249     // IsPtrAndObj?
10250     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10251         MapType,
10252         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10253     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10254     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10255     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10256     DeleteCond = MapperCGF.Builder.CreateIsNull(
10257         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10258   } else {
10259     Cond = IsArray;
10260     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10261         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10262   }
10263   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10264   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10265 
10266   MapperCGF.EmitBlock(BodyBB);
10267   // Get the array size by multiplying element size and element number (i.e., \p
10268   // Size).
10269   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10270       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10271   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10272   // memory allocation/deletion purpose only.
10273   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10274       MapType,
10275       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10276                                    MappableExprsHandler::OMP_MAP_FROM)));
10277   MapTypeArg = MapperCGF.Builder.CreateOr(
10278       MapTypeArg,
10279       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10280 
10281   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10282   // data structure.
10283   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10284                                    ArraySize, MapTypeArg, MapName};
10285   MapperCGF.EmitRuntimeCall(
10286       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10287                                             OMPRTL___tgt_push_mapper_component),
10288       OffloadingArgs);
10289 }
10290 
10291 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10292     const OMPDeclareMapperDecl *D) {
10293   auto I = UDMMap.find(D);
10294   if (I != UDMMap.end())
10295     return I->second;
10296   emitUserDefinedMapper(D);
10297   return UDMMap.lookup(D);
10298 }
10299 
10300 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10301     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10302     llvm::Value *DeviceID,
10303     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10304                                      const OMPLoopDirective &D)>
10305         SizeEmitter) {
10306   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10307   const OMPExecutableDirective *TD = &D;
10308   // Get nested teams distribute kind directive, if any.
10309   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10310     TD = getNestedDistributeDirective(CGM.getContext(), D);
10311   if (!TD)
10312     return;
10313   const auto *LD = cast<OMPLoopDirective>(TD);
10314   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10315                                                          PrePostActionTy &) {
10316     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10317       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10318       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10319       CGF.EmitRuntimeCall(
10320           OMPBuilder.getOrCreateRuntimeFunction(
10321               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10322           Args);
10323     }
10324   };
10325   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10326 }
10327 
10328 void CGOpenMPRuntime::emitTargetCall(
10329     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10330     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10331     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10332     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10333                                      const OMPLoopDirective &D)>
10334         SizeEmitter) {
10335   if (!CGF.HaveInsertPoint())
10336     return;
10337 
10338   assert(OutlinedFn && "Invalid outlined function!");
10339 
10340   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10341                                  D.hasClausesOfKind<OMPNowaitClause>();
10342   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10343   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10344   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10345                                             PrePostActionTy &) {
10346     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10347   };
10348   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10349 
10350   CodeGenFunction::OMPTargetDataInfo InputInfo;
10351   llvm::Value *MapTypesArray = nullptr;
10352   llvm::Value *MapNamesArray = nullptr;
10353   // Fill up the pointer arrays and transfer execution to the device.
10354   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10355                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10356                     &CapturedVars,
10357                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10358     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10359       // Reverse offloading is not supported, so just execute on the host.
10360       if (RequiresOuterTask) {
10361         CapturedVars.clear();
10362         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10363       }
10364       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10365       return;
10366     }
10367 
10368     // On top of the arrays that were filled up, the target offloading call
10369     // takes as arguments the device id as well as the host pointer. The host
10370     // pointer is used by the runtime library to identify the current target
10371     // region, so it only has to be unique and not necessarily point to
10372     // anything. It could be the pointer to the outlined function that
10373     // implements the target region, but we aren't using that so that the
10374     // compiler doesn't need to keep that, and could therefore inline the host
10375     // function if proven worthwhile during optimization.
10376 
10377     // From this point on, we need to have an ID of the target region defined.
10378     assert(OutlinedFnID && "Invalid outlined function ID!");
10379 
10380     // Emit device ID if any.
10381     llvm::Value *DeviceID;
10382     if (Device.getPointer()) {
10383       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10384               Device.getInt() == OMPC_DEVICE_device_num) &&
10385              "Expected device_num modifier.");
10386       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10387       DeviceID =
10388           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10389     } else {
10390       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10391     }
10392 
10393     // Emit the number of elements in the offloading arrays.
10394     llvm::Value *PointerNum =
10395         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10396 
10397     // Return value of the runtime offloading call.
10398     llvm::Value *Return;
10399 
10400     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10401     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10402 
10403     // Source location for the ident struct
10404     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10405 
10406     // Emit tripcount for the target loop-based directive.
10407     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10408 
10409     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10410     // The target region is an outlined function launched by the runtime
10411     // via calls __tgt_target() or __tgt_target_teams().
10412     //
10413     // __tgt_target() launches a target region with one team and one thread,
10414     // executing a serial region.  This master thread may in turn launch
10415     // more threads within its team upon encountering a parallel region,
10416     // however, no additional teams can be launched on the device.
10417     //
10418     // __tgt_target_teams() launches a target region with one or more teams,
10419     // each with one or more threads.  This call is required for target
10420     // constructs such as:
10421     //  'target teams'
10422     //  'target' / 'teams'
10423     //  'target teams distribute parallel for'
10424     //  'target parallel'
10425     // and so on.
10426     //
10427     // Note that on the host and CPU targets, the runtime implementation of
10428     // these calls simply call the outlined function without forking threads.
10429     // The outlined functions themselves have runtime calls to
10430     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10431     // the compiler in emitTeamsCall() and emitParallelCall().
10432     //
10433     // In contrast, on the NVPTX target, the implementation of
10434     // __tgt_target_teams() launches a GPU kernel with the requested number
10435     // of teams and threads so no additional calls to the runtime are required.
10436     if (NumTeams) {
10437       // If we have NumTeams defined this means that we have an enclosed teams
10438       // region. Therefore we also expect to have NumThreads defined. These two
10439       // values should be defined in the presence of a teams directive,
10440       // regardless of having any clauses associated. If the user is using teams
10441       // but no clauses, these two values will be the default that should be
10442       // passed to the runtime library - a 32-bit integer with the value zero.
10443       assert(NumThreads && "Thread limit expression should be available along "
10444                            "with number of teams.");
10445       SmallVector<llvm::Value *> OffloadingArgs = {
10446           RTLoc,
10447           DeviceID,
10448           OutlinedFnID,
10449           PointerNum,
10450           InputInfo.BasePointersArray.getPointer(),
10451           InputInfo.PointersArray.getPointer(),
10452           InputInfo.SizesArray.getPointer(),
10453           MapTypesArray,
10454           MapNamesArray,
10455           InputInfo.MappersArray.getPointer(),
10456           NumTeams,
10457           NumThreads};
10458       if (HasNowait) {
10459         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10460         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10461         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10462         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10463         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10464         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10465       }
10466       Return = CGF.EmitRuntimeCall(
10467           OMPBuilder.getOrCreateRuntimeFunction(
10468               CGM.getModule(), HasNowait
10469                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10470                                    : OMPRTL___tgt_target_teams_mapper),
10471           OffloadingArgs);
10472     } else {
10473       SmallVector<llvm::Value *> OffloadingArgs = {
10474           RTLoc,
10475           DeviceID,
10476           OutlinedFnID,
10477           PointerNum,
10478           InputInfo.BasePointersArray.getPointer(),
10479           InputInfo.PointersArray.getPointer(),
10480           InputInfo.SizesArray.getPointer(),
10481           MapTypesArray,
10482           MapNamesArray,
10483           InputInfo.MappersArray.getPointer()};
10484       if (HasNowait) {
10485         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10486         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10487         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10488         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10489         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10490         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10491       }
10492       Return = CGF.EmitRuntimeCall(
10493           OMPBuilder.getOrCreateRuntimeFunction(
10494               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10495                                          : OMPRTL___tgt_target_mapper),
10496           OffloadingArgs);
10497     }
10498 
10499     // Check the error code and execute the host version if required.
10500     llvm::BasicBlock *OffloadFailedBlock =
10501         CGF.createBasicBlock("omp_offload.failed");
10502     llvm::BasicBlock *OffloadContBlock =
10503         CGF.createBasicBlock("omp_offload.cont");
10504     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10505     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10506 
10507     CGF.EmitBlock(OffloadFailedBlock);
10508     if (RequiresOuterTask) {
10509       CapturedVars.clear();
10510       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10511     }
10512     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10513     CGF.EmitBranch(OffloadContBlock);
10514 
10515     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10516   };
10517 
10518   // Notify that the host version must be executed.
10519   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10520                     RequiresOuterTask](CodeGenFunction &CGF,
10521                                        PrePostActionTy &) {
10522     if (RequiresOuterTask) {
10523       CapturedVars.clear();
10524       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10525     }
10526     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10527   };
10528 
10529   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10530                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10531                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10532     // Fill up the arrays with all the captured variables.
10533     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10534 
10535     // Get mappable expression information.
10536     MappableExprsHandler MEHandler(D, CGF);
10537     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10538     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10539 
10540     auto RI = CS.getCapturedRecordDecl()->field_begin();
10541     auto *CV = CapturedVars.begin();
10542     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10543                                               CE = CS.capture_end();
10544          CI != CE; ++CI, ++RI, ++CV) {
10545       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10546       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10547 
10548       // VLA sizes are passed to the outlined region by copy and do not have map
10549       // information associated.
10550       if (CI->capturesVariableArrayType()) {
10551         CurInfo.Exprs.push_back(nullptr);
10552         CurInfo.BasePointers.push_back(*CV);
10553         CurInfo.Pointers.push_back(*CV);
10554         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10555             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10556         // Copy to the device as an argument. No need to retrieve it.
10557         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10558                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10559                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10560         CurInfo.Mappers.push_back(nullptr);
10561       } else {
10562         // If we have any information in the map clause, we use it, otherwise we
10563         // just do a default mapping.
10564         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10565         if (!CI->capturesThis())
10566           MappedVarSet.insert(CI->getCapturedVar());
10567         else
10568           MappedVarSet.insert(nullptr);
10569         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10570           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10571         // Generate correct mapping for variables captured by reference in
10572         // lambdas.
10573         if (CI->capturesVariable())
10574           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10575                                                   CurInfo, LambdaPointers);
10576       }
10577       // We expect to have at least an element of information for this capture.
10578       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10579              "Non-existing map pointer for capture!");
10580       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10581              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10582              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10583              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10584              "Inconsistent map information sizes!");
10585 
10586       // If there is an entry in PartialStruct it means we have a struct with
10587       // individual members mapped. Emit an extra combined entry.
10588       if (PartialStruct.Base.isValid()) {
10589         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10590         MEHandler.emitCombinedEntry(
10591             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10592             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10593       }
10594 
10595       // We need to append the results of this capture to what we already have.
10596       CombinedInfo.append(CurInfo);
10597     }
10598     // Adjust MEMBER_OF flags for the lambdas captures.
10599     MEHandler.adjustMemberOfForLambdaCaptures(
10600         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10601         CombinedInfo.Types);
10602     // Map any list items in a map clause that were not captures because they
10603     // weren't referenced within the construct.
10604     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10605 
10606     TargetDataInfo Info;
10607     // Fill up the arrays and create the arguments.
10608     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10609     emitOffloadingArraysArgument(
10610         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10611         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10612         {/*ForEndTask=*/false});
10613 
10614     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10615     InputInfo.BasePointersArray =
10616         Address(Info.BasePointersArray, CGM.getPointerAlign());
10617     InputInfo.PointersArray =
10618         Address(Info.PointersArray, CGM.getPointerAlign());
10619     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10620     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10621     MapTypesArray = Info.MapTypesArray;
10622     MapNamesArray = Info.MapNamesArray;
10623     if (RequiresOuterTask)
10624       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10625     else
10626       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10627   };
10628 
10629   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10630                              CodeGenFunction &CGF, PrePostActionTy &) {
10631     if (RequiresOuterTask) {
10632       CodeGenFunction::OMPTargetDataInfo InputInfo;
10633       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10634     } else {
10635       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10636     }
10637   };
10638 
10639   // If we have a target function ID it means that we need to support
10640   // offloading, otherwise, just execute on the host. We need to execute on host
10641   // regardless of the conditional in the if clause if, e.g., the user do not
10642   // specify target triples.
10643   if (OutlinedFnID) {
10644     if (IfCond) {
10645       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10646     } else {
10647       RegionCodeGenTy ThenRCG(TargetThenGen);
10648       ThenRCG(CGF);
10649     }
10650   } else {
10651     RegionCodeGenTy ElseRCG(TargetElseGen);
10652     ElseRCG(CGF);
10653   }
10654 }
10655 
10656 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10657                                                     StringRef ParentName) {
10658   if (!S)
10659     return;
10660 
10661   // Codegen OMP target directives that offload compute to the device.
10662   bool RequiresDeviceCodegen =
10663       isa<OMPExecutableDirective>(S) &&
10664       isOpenMPTargetExecutionDirective(
10665           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10666 
10667   if (RequiresDeviceCodegen) {
10668     const auto &E = *cast<OMPExecutableDirective>(S);
10669     unsigned DeviceID;
10670     unsigned FileID;
10671     unsigned Line;
10672     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10673                              FileID, Line);
10674 
10675     // Is this a target region that should not be emitted as an entry point? If
10676     // so just signal we are done with this target region.
10677     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10678                                                             ParentName, Line))
10679       return;
10680 
10681     switch (E.getDirectiveKind()) {
10682     case OMPD_target:
10683       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10684                                                    cast<OMPTargetDirective>(E));
10685       break;
10686     case OMPD_target_parallel:
10687       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10688           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10689       break;
10690     case OMPD_target_teams:
10691       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10692           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10693       break;
10694     case OMPD_target_teams_distribute:
10695       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10696           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10697       break;
10698     case OMPD_target_teams_distribute_simd:
10699       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10700           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10701       break;
10702     case OMPD_target_parallel_for:
10703       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10704           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10705       break;
10706     case OMPD_target_parallel_for_simd:
10707       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10708           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10709       break;
10710     case OMPD_target_simd:
10711       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10712           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10713       break;
10714     case OMPD_target_teams_distribute_parallel_for:
10715       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10716           CGM, ParentName,
10717           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10718       break;
10719     case OMPD_target_teams_distribute_parallel_for_simd:
10720       CodeGenFunction::
10721           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10722               CGM, ParentName,
10723               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10724       break;
10725     case OMPD_parallel:
10726     case OMPD_for:
10727     case OMPD_parallel_for:
10728     case OMPD_parallel_master:
10729     case OMPD_parallel_sections:
10730     case OMPD_for_simd:
10731     case OMPD_parallel_for_simd:
10732     case OMPD_cancel:
10733     case OMPD_cancellation_point:
10734     case OMPD_ordered:
10735     case OMPD_threadprivate:
10736     case OMPD_allocate:
10737     case OMPD_task:
10738     case OMPD_simd:
10739     case OMPD_tile:
10740     case OMPD_unroll:
10741     case OMPD_sections:
10742     case OMPD_section:
10743     case OMPD_single:
10744     case OMPD_master:
10745     case OMPD_critical:
10746     case OMPD_taskyield:
10747     case OMPD_barrier:
10748     case OMPD_taskwait:
10749     case OMPD_taskgroup:
10750     case OMPD_atomic:
10751     case OMPD_flush:
10752     case OMPD_depobj:
10753     case OMPD_scan:
10754     case OMPD_teams:
10755     case OMPD_target_data:
10756     case OMPD_target_exit_data:
10757     case OMPD_target_enter_data:
10758     case OMPD_distribute:
10759     case OMPD_distribute_simd:
10760     case OMPD_distribute_parallel_for:
10761     case OMPD_distribute_parallel_for_simd:
10762     case OMPD_teams_distribute:
10763     case OMPD_teams_distribute_simd:
10764     case OMPD_teams_distribute_parallel_for:
10765     case OMPD_teams_distribute_parallel_for_simd:
10766     case OMPD_target_update:
10767     case OMPD_declare_simd:
10768     case OMPD_declare_variant:
10769     case OMPD_begin_declare_variant:
10770     case OMPD_end_declare_variant:
10771     case OMPD_declare_target:
10772     case OMPD_end_declare_target:
10773     case OMPD_declare_reduction:
10774     case OMPD_declare_mapper:
10775     case OMPD_taskloop:
10776     case OMPD_taskloop_simd:
10777     case OMPD_master_taskloop:
10778     case OMPD_master_taskloop_simd:
10779     case OMPD_parallel_master_taskloop:
10780     case OMPD_parallel_master_taskloop_simd:
10781     case OMPD_requires:
10782     case OMPD_metadirective:
10783     case OMPD_unknown:
10784     default:
10785       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10786     }
10787     return;
10788   }
10789 
10790   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10791     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10792       return;
10793 
10794     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10795     return;
10796   }
10797 
10798   // If this is a lambda function, look into its body.
10799   if (const auto *L = dyn_cast<LambdaExpr>(S))
10800     S = L->getBody();
10801 
10802   // Keep looking for target regions recursively.
10803   for (const Stmt *II : S->children())
10804     scanForTargetRegionsFunctions(II, ParentName);
10805 }
10806 
10807 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10808   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10809       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10810   if (!DevTy)
10811     return false;
10812   // Do not emit device_type(nohost) functions for the host.
10813   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10814     return true;
10815   // Do not emit device_type(host) functions for the device.
10816   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10817     return true;
10818   return false;
10819 }
10820 
10821 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10822   // If emitting code for the host, we do not process FD here. Instead we do
10823   // the normal code generation.
10824   if (!CGM.getLangOpts().OpenMPIsDevice) {
10825     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10826       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10827                                   CGM.getLangOpts().OpenMPIsDevice))
10828         return true;
10829     return false;
10830   }
10831 
10832   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10833   // Try to detect target regions in the function.
10834   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10835     StringRef Name = CGM.getMangledName(GD);
10836     scanForTargetRegionsFunctions(FD->getBody(), Name);
10837     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10838                                 CGM.getLangOpts().OpenMPIsDevice))
10839       return true;
10840   }
10841 
10842   // Do not to emit function if it is not marked as declare target.
10843   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10844          AlreadyEmittedTargetDecls.count(VD) == 0;
10845 }
10846 
10847 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10848   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10849                               CGM.getLangOpts().OpenMPIsDevice))
10850     return true;
10851 
10852   if (!CGM.getLangOpts().OpenMPIsDevice)
10853     return false;
10854 
10855   // Check if there are Ctors/Dtors in this declaration and look for target
10856   // regions in it. We use the complete variant to produce the kernel name
10857   // mangling.
10858   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10859   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10860     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10861       StringRef ParentName =
10862           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10863       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10864     }
10865     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10866       StringRef ParentName =
10867           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10868       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10869     }
10870   }
10871 
10872   // Do not to emit variable if it is not marked as declare target.
10873   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10874       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10875           cast<VarDecl>(GD.getDecl()));
10876   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10877       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10878        HasRequiresUnifiedSharedMemory)) {
10879     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10880     return true;
10881   }
10882   return false;
10883 }
10884 
10885 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10886                                                    llvm::Constant *Addr) {
10887   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10888       !CGM.getLangOpts().OpenMPIsDevice)
10889     return;
10890 
10891   // If we have host/nohost variables, they do not need to be registered.
10892   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10893       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10894   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10895     return;
10896 
10897   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10898       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10899   if (!Res) {
10900     if (CGM.getLangOpts().OpenMPIsDevice) {
10901       // Register non-target variables being emitted in device code (debug info
10902       // may cause this).
10903       StringRef VarName = CGM.getMangledName(VD);
10904       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10905     }
10906     return;
10907   }
10908   // Register declare target variables.
10909   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10910   StringRef VarName;
10911   CharUnits VarSize;
10912   llvm::GlobalValue::LinkageTypes Linkage;
10913 
10914   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10915       !HasRequiresUnifiedSharedMemory) {
10916     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10917     VarName = CGM.getMangledName(VD);
10918     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10919       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10920       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10921     } else {
10922       VarSize = CharUnits::Zero();
10923     }
10924     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10925     // Temp solution to prevent optimizations of the internal variables.
10926     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10927       // Do not create a "ref-variable" if the original is not also available
10928       // on the host.
10929       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10930         return;
10931       std::string RefName = getName({VarName, "ref"});
10932       if (!CGM.GetGlobalValue(RefName)) {
10933         llvm::Constant *AddrRef =
10934             getOrCreateInternalVariable(Addr->getType(), RefName);
10935         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10936         GVAddrRef->setConstant(/*Val=*/true);
10937         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10938         GVAddrRef->setInitializer(Addr);
10939         CGM.addCompilerUsedGlobal(GVAddrRef);
10940       }
10941     }
10942   } else {
10943     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10944             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10945              HasRequiresUnifiedSharedMemory)) &&
10946            "Declare target attribute must link or to with unified memory.");
10947     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10948       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10949     else
10950       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10951 
10952     if (CGM.getLangOpts().OpenMPIsDevice) {
10953       VarName = Addr->getName();
10954       Addr = nullptr;
10955     } else {
10956       VarName = getAddrOfDeclareTargetVar(VD).getName();
10957       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10958     }
10959     VarSize = CGM.getPointerSize();
10960     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10961   }
10962 
10963   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10964       VarName, Addr, VarSize, Flags, Linkage);
10965 }
10966 
10967 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10968   if (isa<FunctionDecl>(GD.getDecl()) ||
10969       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10970     return emitTargetFunctions(GD);
10971 
10972   return emitTargetGlobalVariable(GD);
10973 }
10974 
10975 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10976   for (const VarDecl *VD : DeferredGlobalVariables) {
10977     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10978         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10979     if (!Res)
10980       continue;
10981     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10982         !HasRequiresUnifiedSharedMemory) {
10983       CGM.EmitGlobal(VD);
10984     } else {
10985       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10986               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10987                HasRequiresUnifiedSharedMemory)) &&
10988              "Expected link clause or to clause with unified memory.");
10989       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10990     }
10991   }
10992 }
10993 
10994 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10995     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10996   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10997          " Expected target-based directive.");
10998 }
10999 
11000 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11001   for (const OMPClause *Clause : D->clauselists()) {
11002     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11003       HasRequiresUnifiedSharedMemory = true;
11004     } else if (const auto *AC =
11005                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11006       switch (AC->getAtomicDefaultMemOrderKind()) {
11007       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11008         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11009         break;
11010       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11011         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11012         break;
11013       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11014         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11015         break;
11016       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11017         break;
11018       }
11019     }
11020   }
11021 }
11022 
11023 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11024   return RequiresAtomicOrdering;
11025 }
11026 
11027 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11028                                                        LangAS &AS) {
11029   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11030     return false;
11031   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11032   switch(A->getAllocatorType()) {
11033   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11034   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11035   // Not supported, fallback to the default mem space.
11036   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11037   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11038   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11039   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11040   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11041   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11042   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11043     AS = LangAS::Default;
11044     return true;
11045   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11046     llvm_unreachable("Expected predefined allocator for the variables with the "
11047                      "static storage.");
11048   }
11049   return false;
11050 }
11051 
11052 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11053   return HasRequiresUnifiedSharedMemory;
11054 }
11055 
11056 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11057     CodeGenModule &CGM)
11058     : CGM(CGM) {
11059   if (CGM.getLangOpts().OpenMPIsDevice) {
11060     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11061     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11062   }
11063 }
11064 
11065 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11066   if (CGM.getLangOpts().OpenMPIsDevice)
11067     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11068 }
11069 
11070 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11071   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11072     return true;
11073 
11074   const auto *D = cast<FunctionDecl>(GD.getDecl());
11075   // Do not to emit function if it is marked as declare target as it was already
11076   // emitted.
11077   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11078     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11079       if (auto *F = dyn_cast_or_null<llvm::Function>(
11080               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11081         return !F->isDeclaration();
11082       return false;
11083     }
11084     return true;
11085   }
11086 
11087   return !AlreadyEmittedTargetDecls.insert(D).second;
11088 }
11089 
11090 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11091   // If we don't have entries or if we are emitting code for the device, we
11092   // don't need to do anything.
11093   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11094       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11095       (OffloadEntriesInfoManager.empty() &&
11096        !HasEmittedDeclareTargetRegion &&
11097        !HasEmittedTargetRegion))
11098     return nullptr;
11099 
11100   // Create and register the function that handles the requires directives.
11101   ASTContext &C = CGM.getContext();
11102 
11103   llvm::Function *RequiresRegFn;
11104   {
11105     CodeGenFunction CGF(CGM);
11106     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11107     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11108     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11109     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11110     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11111     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11112     // TODO: check for other requires clauses.
11113     // The requires directive takes effect only when a target region is
11114     // present in the compilation unit. Otherwise it is ignored and not
11115     // passed to the runtime. This avoids the runtime from throwing an error
11116     // for mismatching requires clauses across compilation units that don't
11117     // contain at least 1 target region.
11118     assert((HasEmittedTargetRegion ||
11119             HasEmittedDeclareTargetRegion ||
11120             !OffloadEntriesInfoManager.empty()) &&
11121            "Target or declare target region expected.");
11122     if (HasRequiresUnifiedSharedMemory)
11123       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11124     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11125                             CGM.getModule(), OMPRTL___tgt_register_requires),
11126                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11127     CGF.FinishFunction();
11128   }
11129   return RequiresRegFn;
11130 }
11131 
11132 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11133                                     const OMPExecutableDirective &D,
11134                                     SourceLocation Loc,
11135                                     llvm::Function *OutlinedFn,
11136                                     ArrayRef<llvm::Value *> CapturedVars) {
11137   if (!CGF.HaveInsertPoint())
11138     return;
11139 
11140   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11141   CodeGenFunction::RunCleanupsScope Scope(CGF);
11142 
11143   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11144   llvm::Value *Args[] = {
11145       RTLoc,
11146       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11147       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11148   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11149   RealArgs.append(std::begin(Args), std::end(Args));
11150   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11151 
11152   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11153       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11154   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11155 }
11156 
11157 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11158                                          const Expr *NumTeams,
11159                                          const Expr *ThreadLimit,
11160                                          SourceLocation Loc) {
11161   if (!CGF.HaveInsertPoint())
11162     return;
11163 
11164   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11165 
11166   llvm::Value *NumTeamsVal =
11167       NumTeams
11168           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11169                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11170           : CGF.Builder.getInt32(0);
11171 
11172   llvm::Value *ThreadLimitVal =
11173       ThreadLimit
11174           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11175                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11176           : CGF.Builder.getInt32(0);
11177 
11178   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11179   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11180                                      ThreadLimitVal};
11181   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11182                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11183                       PushNumTeamsArgs);
11184 }
11185 
11186 void CGOpenMPRuntime::emitTargetDataCalls(
11187     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11188     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11189   if (!CGF.HaveInsertPoint())
11190     return;
11191 
11192   // Action used to replace the default codegen action and turn privatization
11193   // off.
11194   PrePostActionTy NoPrivAction;
11195 
11196   // Generate the code for the opening of the data environment. Capture all the
11197   // arguments of the runtime call by reference because they are used in the
11198   // closing of the region.
11199   auto &&BeginThenGen = [this, &D, Device, &Info,
11200                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11201     // Fill up the arrays with all the mapped variables.
11202     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11203 
11204     // Get map clause information.
11205     MappableExprsHandler MEHandler(D, CGF);
11206     MEHandler.generateAllInfo(CombinedInfo);
11207 
11208     // Fill up the arrays and create the arguments.
11209     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11210                          /*IsNonContiguous=*/true);
11211 
11212     llvm::Value *BasePointersArrayArg = nullptr;
11213     llvm::Value *PointersArrayArg = nullptr;
11214     llvm::Value *SizesArrayArg = nullptr;
11215     llvm::Value *MapTypesArrayArg = nullptr;
11216     llvm::Value *MapNamesArrayArg = nullptr;
11217     llvm::Value *MappersArrayArg = nullptr;
11218     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11219                                  SizesArrayArg, MapTypesArrayArg,
11220                                  MapNamesArrayArg, MappersArrayArg, Info);
11221 
11222     // Emit device ID if any.
11223     llvm::Value *DeviceID = nullptr;
11224     if (Device) {
11225       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11226                                            CGF.Int64Ty, /*isSigned=*/true);
11227     } else {
11228       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11229     }
11230 
11231     // Emit the number of elements in the offloading arrays.
11232     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11233     //
11234     // Source location for the ident struct
11235     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11236 
11237     llvm::Value *OffloadingArgs[] = {RTLoc,
11238                                      DeviceID,
11239                                      PointerNum,
11240                                      BasePointersArrayArg,
11241                                      PointersArrayArg,
11242                                      SizesArrayArg,
11243                                      MapTypesArrayArg,
11244                                      MapNamesArrayArg,
11245                                      MappersArrayArg};
11246     CGF.EmitRuntimeCall(
11247         OMPBuilder.getOrCreateRuntimeFunction(
11248             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11249         OffloadingArgs);
11250 
11251     // If device pointer privatization is required, emit the body of the region
11252     // here. It will have to be duplicated: with and without privatization.
11253     if (!Info.CaptureDeviceAddrMap.empty())
11254       CodeGen(CGF);
11255   };
11256 
11257   // Generate code for the closing of the data region.
11258   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11259                                                 PrePostActionTy &) {
11260     assert(Info.isValid() && "Invalid data environment closing arguments.");
11261 
11262     llvm::Value *BasePointersArrayArg = nullptr;
11263     llvm::Value *PointersArrayArg = nullptr;
11264     llvm::Value *SizesArrayArg = nullptr;
11265     llvm::Value *MapTypesArrayArg = nullptr;
11266     llvm::Value *MapNamesArrayArg = nullptr;
11267     llvm::Value *MappersArrayArg = nullptr;
11268     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11269                                  SizesArrayArg, MapTypesArrayArg,
11270                                  MapNamesArrayArg, MappersArrayArg, Info,
11271                                  {/*ForEndCall=*/true});
11272 
11273     // Emit device ID if any.
11274     llvm::Value *DeviceID = nullptr;
11275     if (Device) {
11276       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11277                                            CGF.Int64Ty, /*isSigned=*/true);
11278     } else {
11279       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11280     }
11281 
11282     // Emit the number of elements in the offloading arrays.
11283     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11284 
11285     // Source location for the ident struct
11286     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11287 
11288     llvm::Value *OffloadingArgs[] = {RTLoc,
11289                                      DeviceID,
11290                                      PointerNum,
11291                                      BasePointersArrayArg,
11292                                      PointersArrayArg,
11293                                      SizesArrayArg,
11294                                      MapTypesArrayArg,
11295                                      MapNamesArrayArg,
11296                                      MappersArrayArg};
11297     CGF.EmitRuntimeCall(
11298         OMPBuilder.getOrCreateRuntimeFunction(
11299             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11300         OffloadingArgs);
11301   };
11302 
11303   // If we need device pointer privatization, we need to emit the body of the
11304   // region with no privatization in the 'else' branch of the conditional.
11305   // Otherwise, we don't have to do anything.
11306   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11307                                                          PrePostActionTy &) {
11308     if (!Info.CaptureDeviceAddrMap.empty()) {
11309       CodeGen.setAction(NoPrivAction);
11310       CodeGen(CGF);
11311     }
11312   };
11313 
11314   // We don't have to do anything to close the region if the if clause evaluates
11315   // to false.
11316   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11317 
11318   if (IfCond) {
11319     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11320   } else {
11321     RegionCodeGenTy RCG(BeginThenGen);
11322     RCG(CGF);
11323   }
11324 
11325   // If we don't require privatization of device pointers, we emit the body in
11326   // between the runtime calls. This avoids duplicating the body code.
11327   if (Info.CaptureDeviceAddrMap.empty()) {
11328     CodeGen.setAction(NoPrivAction);
11329     CodeGen(CGF);
11330   }
11331 
11332   if (IfCond) {
11333     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11334   } else {
11335     RegionCodeGenTy RCG(EndThenGen);
11336     RCG(CGF);
11337   }
11338 }
11339 
11340 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11341     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11342     const Expr *Device) {
11343   if (!CGF.HaveInsertPoint())
11344     return;
11345 
11346   assert((isa<OMPTargetEnterDataDirective>(D) ||
11347           isa<OMPTargetExitDataDirective>(D) ||
11348           isa<OMPTargetUpdateDirective>(D)) &&
11349          "Expecting either target enter, exit data, or update directives.");
11350 
11351   CodeGenFunction::OMPTargetDataInfo InputInfo;
11352   llvm::Value *MapTypesArray = nullptr;
11353   llvm::Value *MapNamesArray = nullptr;
11354   // Generate the code for the opening of the data environment.
11355   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11356                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11357     // Emit device ID if any.
11358     llvm::Value *DeviceID = nullptr;
11359     if (Device) {
11360       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11361                                            CGF.Int64Ty, /*isSigned=*/true);
11362     } else {
11363       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11364     }
11365 
11366     // Emit the number of elements in the offloading arrays.
11367     llvm::Constant *PointerNum =
11368         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11369 
11370     // Source location for the ident struct
11371     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11372 
11373     llvm::Value *OffloadingArgs[] = {RTLoc,
11374                                      DeviceID,
11375                                      PointerNum,
11376                                      InputInfo.BasePointersArray.getPointer(),
11377                                      InputInfo.PointersArray.getPointer(),
11378                                      InputInfo.SizesArray.getPointer(),
11379                                      MapTypesArray,
11380                                      MapNamesArray,
11381                                      InputInfo.MappersArray.getPointer()};
11382 
11383     // Select the right runtime function call for each standalone
11384     // directive.
11385     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11386     RuntimeFunction RTLFn;
11387     switch (D.getDirectiveKind()) {
11388     case OMPD_target_enter_data:
11389       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11390                         : OMPRTL___tgt_target_data_begin_mapper;
11391       break;
11392     case OMPD_target_exit_data:
11393       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11394                         : OMPRTL___tgt_target_data_end_mapper;
11395       break;
11396     case OMPD_target_update:
11397       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11398                         : OMPRTL___tgt_target_data_update_mapper;
11399       break;
11400     case OMPD_parallel:
11401     case OMPD_for:
11402     case OMPD_parallel_for:
11403     case OMPD_parallel_master:
11404     case OMPD_parallel_sections:
11405     case OMPD_for_simd:
11406     case OMPD_parallel_for_simd:
11407     case OMPD_cancel:
11408     case OMPD_cancellation_point:
11409     case OMPD_ordered:
11410     case OMPD_threadprivate:
11411     case OMPD_allocate:
11412     case OMPD_task:
11413     case OMPD_simd:
11414     case OMPD_tile:
11415     case OMPD_unroll:
11416     case OMPD_sections:
11417     case OMPD_section:
11418     case OMPD_single:
11419     case OMPD_master:
11420     case OMPD_critical:
11421     case OMPD_taskyield:
11422     case OMPD_barrier:
11423     case OMPD_taskwait:
11424     case OMPD_taskgroup:
11425     case OMPD_atomic:
11426     case OMPD_flush:
11427     case OMPD_depobj:
11428     case OMPD_scan:
11429     case OMPD_teams:
11430     case OMPD_target_data:
11431     case OMPD_distribute:
11432     case OMPD_distribute_simd:
11433     case OMPD_distribute_parallel_for:
11434     case OMPD_distribute_parallel_for_simd:
11435     case OMPD_teams_distribute:
11436     case OMPD_teams_distribute_simd:
11437     case OMPD_teams_distribute_parallel_for:
11438     case OMPD_teams_distribute_parallel_for_simd:
11439     case OMPD_declare_simd:
11440     case OMPD_declare_variant:
11441     case OMPD_begin_declare_variant:
11442     case OMPD_end_declare_variant:
11443     case OMPD_declare_target:
11444     case OMPD_end_declare_target:
11445     case OMPD_declare_reduction:
11446     case OMPD_declare_mapper:
11447     case OMPD_taskloop:
11448     case OMPD_taskloop_simd:
11449     case OMPD_master_taskloop:
11450     case OMPD_master_taskloop_simd:
11451     case OMPD_parallel_master_taskloop:
11452     case OMPD_parallel_master_taskloop_simd:
11453     case OMPD_target:
11454     case OMPD_target_simd:
11455     case OMPD_target_teams_distribute:
11456     case OMPD_target_teams_distribute_simd:
11457     case OMPD_target_teams_distribute_parallel_for:
11458     case OMPD_target_teams_distribute_parallel_for_simd:
11459     case OMPD_target_teams:
11460     case OMPD_target_parallel:
11461     case OMPD_target_parallel_for:
11462     case OMPD_target_parallel_for_simd:
11463     case OMPD_requires:
11464     case OMPD_metadirective:
11465     case OMPD_unknown:
11466     default:
11467       llvm_unreachable("Unexpected standalone target data directive.");
11468       break;
11469     }
11470     CGF.EmitRuntimeCall(
11471         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11472         OffloadingArgs);
11473   };
11474 
11475   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11476                           &MapNamesArray](CodeGenFunction &CGF,
11477                                           PrePostActionTy &) {
11478     // Fill up the arrays with all the mapped variables.
11479     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11480 
11481     // Get map clause information.
11482     MappableExprsHandler MEHandler(D, CGF);
11483     MEHandler.generateAllInfo(CombinedInfo);
11484 
11485     TargetDataInfo Info;
11486     // Fill up the arrays and create the arguments.
11487     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11488                          /*IsNonContiguous=*/true);
11489     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11490                              D.hasClausesOfKind<OMPNowaitClause>();
11491     emitOffloadingArraysArgument(
11492         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11493         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11494         {/*ForEndTask=*/false});
11495     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11496     InputInfo.BasePointersArray =
11497         Address(Info.BasePointersArray, CGM.getPointerAlign());
11498     InputInfo.PointersArray =
11499         Address(Info.PointersArray, CGM.getPointerAlign());
11500     InputInfo.SizesArray =
11501         Address(Info.SizesArray, CGM.getPointerAlign());
11502     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11503     MapTypesArray = Info.MapTypesArray;
11504     MapNamesArray = Info.MapNamesArray;
11505     if (RequiresOuterTask)
11506       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11507     else
11508       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11509   };
11510 
11511   if (IfCond) {
11512     emitIfClause(CGF, IfCond, TargetThenGen,
11513                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11514   } else {
11515     RegionCodeGenTy ThenRCG(TargetThenGen);
11516     ThenRCG(CGF);
11517   }
11518 }
11519 
11520 namespace {
11521   /// Kind of parameter in a function with 'declare simd' directive.
11522   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11523   /// Attribute set of the parameter.
11524   struct ParamAttrTy {
11525     ParamKindTy Kind = Vector;
11526     llvm::APSInt StrideOrArg;
11527     llvm::APSInt Alignment;
11528   };
11529 } // namespace
11530 
11531 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11532                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11533   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11534   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11535   // of that clause. The VLEN value must be power of 2.
11536   // In other case the notion of the function`s "characteristic data type" (CDT)
11537   // is used to compute the vector length.
11538   // CDT is defined in the following order:
11539   //   a) For non-void function, the CDT is the return type.
11540   //   b) If the function has any non-uniform, non-linear parameters, then the
11541   //   CDT is the type of the first such parameter.
11542   //   c) If the CDT determined by a) or b) above is struct, union, or class
11543   //   type which is pass-by-value (except for the type that maps to the
11544   //   built-in complex data type), the characteristic data type is int.
11545   //   d) If none of the above three cases is applicable, the CDT is int.
11546   // The VLEN is then determined based on the CDT and the size of vector
11547   // register of that ISA for which current vector version is generated. The
11548   // VLEN is computed using the formula below:
11549   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11550   // where vector register size specified in section 3.2.1 Registers and the
11551   // Stack Frame of original AMD64 ABI document.
11552   QualType RetType = FD->getReturnType();
11553   if (RetType.isNull())
11554     return 0;
11555   ASTContext &C = FD->getASTContext();
11556   QualType CDT;
11557   if (!RetType.isNull() && !RetType->isVoidType()) {
11558     CDT = RetType;
11559   } else {
11560     unsigned Offset = 0;
11561     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11562       if (ParamAttrs[Offset].Kind == Vector)
11563         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11564       ++Offset;
11565     }
11566     if (CDT.isNull()) {
11567       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11568         if (ParamAttrs[I + Offset].Kind == Vector) {
11569           CDT = FD->getParamDecl(I)->getType();
11570           break;
11571         }
11572       }
11573     }
11574   }
11575   if (CDT.isNull())
11576     CDT = C.IntTy;
11577   CDT = CDT->getCanonicalTypeUnqualified();
11578   if (CDT->isRecordType() || CDT->isUnionType())
11579     CDT = C.IntTy;
11580   return C.getTypeSize(CDT);
11581 }
11582 
11583 static void
11584 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11585                            const llvm::APSInt &VLENVal,
11586                            ArrayRef<ParamAttrTy> ParamAttrs,
11587                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11588   struct ISADataTy {
11589     char ISA;
11590     unsigned VecRegSize;
11591   };
11592   ISADataTy ISAData[] = {
11593       {
11594           'b', 128
11595       }, // SSE
11596       {
11597           'c', 256
11598       }, // AVX
11599       {
11600           'd', 256
11601       }, // AVX2
11602       {
11603           'e', 512
11604       }, // AVX512
11605   };
11606   llvm::SmallVector<char, 2> Masked;
11607   switch (State) {
11608   case OMPDeclareSimdDeclAttr::BS_Undefined:
11609     Masked.push_back('N');
11610     Masked.push_back('M');
11611     break;
11612   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11613     Masked.push_back('N');
11614     break;
11615   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11616     Masked.push_back('M');
11617     break;
11618   }
11619   for (char Mask : Masked) {
11620     for (const ISADataTy &Data : ISAData) {
11621       SmallString<256> Buffer;
11622       llvm::raw_svector_ostream Out(Buffer);
11623       Out << "_ZGV" << Data.ISA << Mask;
11624       if (!VLENVal) {
11625         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11626         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11627         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11628       } else {
11629         Out << VLENVal;
11630       }
11631       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11632         switch (ParamAttr.Kind){
11633         case LinearWithVarStride:
11634           Out << 's' << ParamAttr.StrideOrArg;
11635           break;
11636         case Linear:
11637           Out << 'l';
11638           if (ParamAttr.StrideOrArg != 1)
11639             Out << ParamAttr.StrideOrArg;
11640           break;
11641         case Uniform:
11642           Out << 'u';
11643           break;
11644         case Vector:
11645           Out << 'v';
11646           break;
11647         }
11648         if (!!ParamAttr.Alignment)
11649           Out << 'a' << ParamAttr.Alignment;
11650       }
11651       Out << '_' << Fn->getName();
11652       Fn->addFnAttr(Out.str());
11653     }
11654   }
11655 }
11656 
11657 // This are the Functions that are needed to mangle the name of the
11658 // vector functions generated by the compiler, according to the rules
11659 // defined in the "Vector Function ABI specifications for AArch64",
11660 // available at
11661 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11662 
11663 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11664 ///
11665 /// TODO: Need to implement the behavior for reference marked with a
11666 /// var or no linear modifiers (1.b in the section). For this, we
11667 /// need to extend ParamKindTy to support the linear modifiers.
11668 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11669   QT = QT.getCanonicalType();
11670 
11671   if (QT->isVoidType())
11672     return false;
11673 
11674   if (Kind == ParamKindTy::Uniform)
11675     return false;
11676 
11677   if (Kind == ParamKindTy::Linear)
11678     return false;
11679 
11680   // TODO: Handle linear references with modifiers
11681 
11682   if (Kind == ParamKindTy::LinearWithVarStride)
11683     return false;
11684 
11685   return true;
11686 }
11687 
11688 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11689 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11690   QT = QT.getCanonicalType();
11691   unsigned Size = C.getTypeSize(QT);
11692 
11693   // Only scalars and complex within 16 bytes wide set PVB to true.
11694   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11695     return false;
11696 
11697   if (QT->isFloatingType())
11698     return true;
11699 
11700   if (QT->isIntegerType())
11701     return true;
11702 
11703   if (QT->isPointerType())
11704     return true;
11705 
11706   // TODO: Add support for complex types (section 3.1.2, item 2).
11707 
11708   return false;
11709 }
11710 
11711 /// Computes the lane size (LS) of a return type or of an input parameter,
11712 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11713 /// TODO: Add support for references, section 3.2.1, item 1.
11714 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11715   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11716     QualType PTy = QT.getCanonicalType()->getPointeeType();
11717     if (getAArch64PBV(PTy, C))
11718       return C.getTypeSize(PTy);
11719   }
11720   if (getAArch64PBV(QT, C))
11721     return C.getTypeSize(QT);
11722 
11723   return C.getTypeSize(C.getUIntPtrType());
11724 }
11725 
11726 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11727 // signature of the scalar function, as defined in 3.2.2 of the
11728 // AAVFABI.
11729 static std::tuple<unsigned, unsigned, bool>
11730 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11731   QualType RetType = FD->getReturnType().getCanonicalType();
11732 
11733   ASTContext &C = FD->getASTContext();
11734 
11735   bool OutputBecomesInput = false;
11736 
11737   llvm::SmallVector<unsigned, 8> Sizes;
11738   if (!RetType->isVoidType()) {
11739     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11740     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11741       OutputBecomesInput = true;
11742   }
11743   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11744     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11745     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11746   }
11747 
11748   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11749   // The LS of a function parameter / return value can only be a power
11750   // of 2, starting from 8 bits, up to 128.
11751   assert(llvm::all_of(Sizes,
11752                       [](unsigned Size) {
11753                         return Size == 8 || Size == 16 || Size == 32 ||
11754                                Size == 64 || Size == 128;
11755                       }) &&
11756          "Invalid size");
11757 
11758   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11759                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11760                          OutputBecomesInput);
11761 }
11762 
11763 /// Mangle the parameter part of the vector function name according to
11764 /// their OpenMP classification. The mangling function is defined in
11765 /// section 3.5 of the AAVFABI.
11766 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11767   SmallString<256> Buffer;
11768   llvm::raw_svector_ostream Out(Buffer);
11769   for (const auto &ParamAttr : ParamAttrs) {
11770     switch (ParamAttr.Kind) {
11771     case LinearWithVarStride:
11772       Out << "ls" << ParamAttr.StrideOrArg;
11773       break;
11774     case Linear:
11775       Out << 'l';
11776       // Don't print the step value if it is not present or if it is
11777       // equal to 1.
11778       if (ParamAttr.StrideOrArg != 1)
11779         Out << ParamAttr.StrideOrArg;
11780       break;
11781     case Uniform:
11782       Out << 'u';
11783       break;
11784     case Vector:
11785       Out << 'v';
11786       break;
11787     }
11788 
11789     if (!!ParamAttr.Alignment)
11790       Out << 'a' << ParamAttr.Alignment;
11791   }
11792 
11793   return std::string(Out.str());
11794 }
11795 
11796 // Function used to add the attribute. The parameter `VLEN` is
11797 // templated to allow the use of "x" when targeting scalable functions
11798 // for SVE.
11799 template <typename T>
11800 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11801                                  char ISA, StringRef ParSeq,
11802                                  StringRef MangledName, bool OutputBecomesInput,
11803                                  llvm::Function *Fn) {
11804   SmallString<256> Buffer;
11805   llvm::raw_svector_ostream Out(Buffer);
11806   Out << Prefix << ISA << LMask << VLEN;
11807   if (OutputBecomesInput)
11808     Out << "v";
11809   Out << ParSeq << "_" << MangledName;
11810   Fn->addFnAttr(Out.str());
11811 }
11812 
11813 // Helper function to generate the Advanced SIMD names depending on
11814 // the value of the NDS when simdlen is not present.
11815 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11816                                       StringRef Prefix, char ISA,
11817                                       StringRef ParSeq, StringRef MangledName,
11818                                       bool OutputBecomesInput,
11819                                       llvm::Function *Fn) {
11820   switch (NDS) {
11821   case 8:
11822     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11823                          OutputBecomesInput, Fn);
11824     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11825                          OutputBecomesInput, Fn);
11826     break;
11827   case 16:
11828     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11829                          OutputBecomesInput, Fn);
11830     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11831                          OutputBecomesInput, Fn);
11832     break;
11833   case 32:
11834     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11835                          OutputBecomesInput, Fn);
11836     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11837                          OutputBecomesInput, Fn);
11838     break;
11839   case 64:
11840   case 128:
11841     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11842                          OutputBecomesInput, Fn);
11843     break;
11844   default:
11845     llvm_unreachable("Scalar type is too wide.");
11846   }
11847 }
11848 
11849 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11850 static void emitAArch64DeclareSimdFunction(
11851     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11852     ArrayRef<ParamAttrTy> ParamAttrs,
11853     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11854     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11855 
11856   // Get basic data for building the vector signature.
11857   const auto Data = getNDSWDS(FD, ParamAttrs);
11858   const unsigned NDS = std::get<0>(Data);
11859   const unsigned WDS = std::get<1>(Data);
11860   const bool OutputBecomesInput = std::get<2>(Data);
11861 
11862   // Check the values provided via `simdlen` by the user.
11863   // 1. A `simdlen(1)` doesn't produce vector signatures,
11864   if (UserVLEN == 1) {
11865     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11866         DiagnosticsEngine::Warning,
11867         "The clause simdlen(1) has no effect when targeting aarch64.");
11868     CGM.getDiags().Report(SLoc, DiagID);
11869     return;
11870   }
11871 
11872   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11873   // Advanced SIMD output.
11874   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11875     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11876         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11877                                     "power of 2 when targeting Advanced SIMD.");
11878     CGM.getDiags().Report(SLoc, DiagID);
11879     return;
11880   }
11881 
11882   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11883   // limits.
11884   if (ISA == 's' && UserVLEN != 0) {
11885     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11886       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11887           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11888                                       "lanes in the architectural constraints "
11889                                       "for SVE (min is 128-bit, max is "
11890                                       "2048-bit, by steps of 128-bit)");
11891       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11892       return;
11893     }
11894   }
11895 
11896   // Sort out parameter sequence.
11897   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11898   StringRef Prefix = "_ZGV";
11899   // Generate simdlen from user input (if any).
11900   if (UserVLEN) {
11901     if (ISA == 's') {
11902       // SVE generates only a masked function.
11903       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11904                            OutputBecomesInput, Fn);
11905     } else {
11906       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11907       // Advanced SIMD generates one or two functions, depending on
11908       // the `[not]inbranch` clause.
11909       switch (State) {
11910       case OMPDeclareSimdDeclAttr::BS_Undefined:
11911         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11912                              OutputBecomesInput, Fn);
11913         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11914                              OutputBecomesInput, Fn);
11915         break;
11916       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11917         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11918                              OutputBecomesInput, Fn);
11919         break;
11920       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11921         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11922                              OutputBecomesInput, Fn);
11923         break;
11924       }
11925     }
11926   } else {
11927     // If no user simdlen is provided, follow the AAVFABI rules for
11928     // generating the vector length.
11929     if (ISA == 's') {
11930       // SVE, section 3.4.1, item 1.
11931       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11932                            OutputBecomesInput, Fn);
11933     } else {
11934       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11935       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11936       // two vector names depending on the use of the clause
11937       // `[not]inbranch`.
11938       switch (State) {
11939       case OMPDeclareSimdDeclAttr::BS_Undefined:
11940         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11941                                   OutputBecomesInput, Fn);
11942         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11943                                   OutputBecomesInput, Fn);
11944         break;
11945       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11946         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11947                                   OutputBecomesInput, Fn);
11948         break;
11949       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11950         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11951                                   OutputBecomesInput, Fn);
11952         break;
11953       }
11954     }
11955   }
11956 }
11957 
11958 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11959                                               llvm::Function *Fn) {
11960   ASTContext &C = CGM.getContext();
11961   FD = FD->getMostRecentDecl();
11962   // Map params to their positions in function decl.
11963   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11964   if (isa<CXXMethodDecl>(FD))
11965     ParamPositions.try_emplace(FD, 0);
11966   unsigned ParamPos = ParamPositions.size();
11967   for (const ParmVarDecl *P : FD->parameters()) {
11968     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11969     ++ParamPos;
11970   }
11971   while (FD) {
11972     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11973       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11974       // Mark uniform parameters.
11975       for (const Expr *E : Attr->uniforms()) {
11976         E = E->IgnoreParenImpCasts();
11977         unsigned Pos;
11978         if (isa<CXXThisExpr>(E)) {
11979           Pos = ParamPositions[FD];
11980         } else {
11981           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11982                                 ->getCanonicalDecl();
11983           Pos = ParamPositions[PVD];
11984         }
11985         ParamAttrs[Pos].Kind = Uniform;
11986       }
11987       // Get alignment info.
11988       auto NI = Attr->alignments_begin();
11989       for (const Expr *E : Attr->aligneds()) {
11990         E = E->IgnoreParenImpCasts();
11991         unsigned Pos;
11992         QualType ParmTy;
11993         if (isa<CXXThisExpr>(E)) {
11994           Pos = ParamPositions[FD];
11995           ParmTy = E->getType();
11996         } else {
11997           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11998                                 ->getCanonicalDecl();
11999           Pos = ParamPositions[PVD];
12000           ParmTy = PVD->getType();
12001         }
12002         ParamAttrs[Pos].Alignment =
12003             (*NI)
12004                 ? (*NI)->EvaluateKnownConstInt(C)
12005                 : llvm::APSInt::getUnsigned(
12006                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12007                           .getQuantity());
12008         ++NI;
12009       }
12010       // Mark linear parameters.
12011       auto SI = Attr->steps_begin();
12012       auto MI = Attr->modifiers_begin();
12013       for (const Expr *E : Attr->linears()) {
12014         E = E->IgnoreParenImpCasts();
12015         unsigned Pos;
12016         // Rescaling factor needed to compute the linear parameter
12017         // value in the mangled name.
12018         unsigned PtrRescalingFactor = 1;
12019         if (isa<CXXThisExpr>(E)) {
12020           Pos = ParamPositions[FD];
12021         } else {
12022           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12023                                 ->getCanonicalDecl();
12024           Pos = ParamPositions[PVD];
12025           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12026             PtrRescalingFactor = CGM.getContext()
12027                                      .getTypeSizeInChars(P->getPointeeType())
12028                                      .getQuantity();
12029         }
12030         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12031         ParamAttr.Kind = Linear;
12032         // Assuming a stride of 1, for `linear` without modifiers.
12033         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12034         if (*SI) {
12035           Expr::EvalResult Result;
12036           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12037             if (const auto *DRE =
12038                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12039               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12040                 ParamAttr.Kind = LinearWithVarStride;
12041                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12042                     ParamPositions[StridePVD->getCanonicalDecl()]);
12043               }
12044             }
12045           } else {
12046             ParamAttr.StrideOrArg = Result.Val.getInt();
12047           }
12048         }
12049         // If we are using a linear clause on a pointer, we need to
12050         // rescale the value of linear_step with the byte size of the
12051         // pointee type.
12052         if (Linear == ParamAttr.Kind)
12053           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12054         ++SI;
12055         ++MI;
12056       }
12057       llvm::APSInt VLENVal;
12058       SourceLocation ExprLoc;
12059       const Expr *VLENExpr = Attr->getSimdlen();
12060       if (VLENExpr) {
12061         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12062         ExprLoc = VLENExpr->getExprLoc();
12063       }
12064       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12065       if (CGM.getTriple().isX86()) {
12066         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12067       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12068         unsigned VLEN = VLENVal.getExtValue();
12069         StringRef MangledName = Fn->getName();
12070         if (CGM.getTarget().hasFeature("sve"))
12071           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12072                                          MangledName, 's', 128, Fn, ExprLoc);
12073         if (CGM.getTarget().hasFeature("neon"))
12074           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12075                                          MangledName, 'n', 128, Fn, ExprLoc);
12076       }
12077     }
12078     FD = FD->getPreviousDecl();
12079   }
12080 }
12081 
12082 namespace {
12083 /// Cleanup action for doacross support.
12084 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12085 public:
12086   static const int DoacrossFinArgs = 2;
12087 
12088 private:
12089   llvm::FunctionCallee RTLFn;
12090   llvm::Value *Args[DoacrossFinArgs];
12091 
12092 public:
12093   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12094                     ArrayRef<llvm::Value *> CallArgs)
12095       : RTLFn(RTLFn) {
12096     assert(CallArgs.size() == DoacrossFinArgs);
12097     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12098   }
12099   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12100     if (!CGF.HaveInsertPoint())
12101       return;
12102     CGF.EmitRuntimeCall(RTLFn, Args);
12103   }
12104 };
12105 } // namespace
12106 
12107 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12108                                        const OMPLoopDirective &D,
12109                                        ArrayRef<Expr *> NumIterations) {
12110   if (!CGF.HaveInsertPoint())
12111     return;
12112 
12113   ASTContext &C = CGM.getContext();
12114   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12115   RecordDecl *RD;
12116   if (KmpDimTy.isNull()) {
12117     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12118     //  kmp_int64 lo; // lower
12119     //  kmp_int64 up; // upper
12120     //  kmp_int64 st; // stride
12121     // };
12122     RD = C.buildImplicitRecord("kmp_dim");
12123     RD->startDefinition();
12124     addFieldToRecordDecl(C, RD, Int64Ty);
12125     addFieldToRecordDecl(C, RD, Int64Ty);
12126     addFieldToRecordDecl(C, RD, Int64Ty);
12127     RD->completeDefinition();
12128     KmpDimTy = C.getRecordType(RD);
12129   } else {
12130     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12131   }
12132   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12133   QualType ArrayTy =
12134       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12135 
12136   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12137   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12138   enum { LowerFD = 0, UpperFD, StrideFD };
12139   // Fill dims with data.
12140   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12141     LValue DimsLVal = CGF.MakeAddrLValue(
12142         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12143     // dims.upper = num_iterations;
12144     LValue UpperLVal = CGF.EmitLValueForField(
12145         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12146     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12147         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12148         Int64Ty, NumIterations[I]->getExprLoc());
12149     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12150     // dims.stride = 1;
12151     LValue StrideLVal = CGF.EmitLValueForField(
12152         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12153     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12154                           StrideLVal);
12155   }
12156 
12157   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12158   // kmp_int32 num_dims, struct kmp_dim * dims);
12159   llvm::Value *Args[] = {
12160       emitUpdateLocation(CGF, D.getBeginLoc()),
12161       getThreadID(CGF, D.getBeginLoc()),
12162       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12163       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12164           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12165           CGM.VoidPtrTy)};
12166 
12167   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12168       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12169   CGF.EmitRuntimeCall(RTLFn, Args);
12170   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12171       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12172   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12173       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12174   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12175                                              llvm::makeArrayRef(FiniArgs));
12176 }
12177 
12178 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12179                                           const OMPDependClause *C) {
12180   QualType Int64Ty =
12181       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12182   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12183   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12184       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12185   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12186   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12187     const Expr *CounterVal = C->getLoopData(I);
12188     assert(CounterVal);
12189     llvm::Value *CntVal = CGF.EmitScalarConversion(
12190         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12191         CounterVal->getExprLoc());
12192     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12193                           /*Volatile=*/false, Int64Ty);
12194   }
12195   llvm::Value *Args[] = {
12196       emitUpdateLocation(CGF, C->getBeginLoc()),
12197       getThreadID(CGF, C->getBeginLoc()),
12198       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12199   llvm::FunctionCallee RTLFn;
12200   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12201     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12202                                                   OMPRTL___kmpc_doacross_post);
12203   } else {
12204     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12205     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12206                                                   OMPRTL___kmpc_doacross_wait);
12207   }
12208   CGF.EmitRuntimeCall(RTLFn, Args);
12209 }
12210 
12211 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12212                                llvm::FunctionCallee Callee,
12213                                ArrayRef<llvm::Value *> Args) const {
12214   assert(Loc.isValid() && "Outlined function call location must be valid.");
12215   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12216 
12217   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12218     if (Fn->doesNotThrow()) {
12219       CGF.EmitNounwindRuntimeCall(Fn, Args);
12220       return;
12221     }
12222   }
12223   CGF.EmitRuntimeCall(Callee, Args);
12224 }
12225 
12226 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12227     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12228     ArrayRef<llvm::Value *> Args) const {
12229   emitCall(CGF, Loc, OutlinedFn, Args);
12230 }
12231 
12232 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12233   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12234     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12235       HasEmittedDeclareTargetRegion = true;
12236 }
12237 
12238 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12239                                              const VarDecl *NativeParam,
12240                                              const VarDecl *TargetParam) const {
12241   return CGF.GetAddrOfLocalVar(NativeParam);
12242 }
12243 
12244 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12245                                                    const VarDecl *VD) {
12246   if (!VD)
12247     return Address::invalid();
12248   Address UntiedAddr = Address::invalid();
12249   Address UntiedRealAddr = Address::invalid();
12250   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12251   if (It != FunctionToUntiedTaskStackMap.end()) {
12252     const UntiedLocalVarsAddressesMap &UntiedData =
12253         UntiedLocalVarsStack[It->second];
12254     auto I = UntiedData.find(VD);
12255     if (I != UntiedData.end()) {
12256       UntiedAddr = I->second.first;
12257       UntiedRealAddr = I->second.second;
12258     }
12259   }
12260   const VarDecl *CVD = VD->getCanonicalDecl();
12261   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12262     // Use the default allocation.
12263     if (!isAllocatableDecl(VD))
12264       return UntiedAddr;
12265     llvm::Value *Size;
12266     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12267     if (CVD->getType()->isVariablyModifiedType()) {
12268       Size = CGF.getTypeSize(CVD->getType());
12269       // Align the size: ((size + align - 1) / align) * align
12270       Size = CGF.Builder.CreateNUWAdd(
12271           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12272       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12273       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12274     } else {
12275       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12276       Size = CGM.getSize(Sz.alignTo(Align));
12277     }
12278     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12279     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12280     assert(AA->getAllocator() &&
12281            "Expected allocator expression for non-default allocator.");
12282     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12283     // According to the standard, the original allocator type is a enum
12284     // (integer). Convert to pointer type, if required.
12285     Allocator = CGF.EmitScalarConversion(
12286         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12287         AA->getAllocator()->getExprLoc());
12288     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12289 
12290     llvm::Value *Addr =
12291         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12292                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12293                             Args, getName({CVD->getName(), ".void.addr"}));
12294     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12295         CGM.getModule(), OMPRTL___kmpc_free);
12296     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12297     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12298         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12299     if (UntiedAddr.isValid())
12300       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12301 
12302     // Cleanup action for allocate support.
12303     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12304       llvm::FunctionCallee RTLFn;
12305       SourceLocation::UIntTy LocEncoding;
12306       Address Addr;
12307       const Expr *Allocator;
12308 
12309     public:
12310       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12311                            SourceLocation::UIntTy LocEncoding, Address Addr,
12312                            const Expr *Allocator)
12313           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12314             Allocator(Allocator) {}
12315       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12316         if (!CGF.HaveInsertPoint())
12317           return;
12318         llvm::Value *Args[3];
12319         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12320             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12321         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12322             Addr.getPointer(), CGF.VoidPtrTy);
12323         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12324         // According to the standard, the original allocator type is a enum
12325         // (integer). Convert to pointer type, if required.
12326         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12327                                             CGF.getContext().VoidPtrTy,
12328                                             Allocator->getExprLoc());
12329         Args[2] = AllocVal;
12330 
12331         CGF.EmitRuntimeCall(RTLFn, Args);
12332       }
12333     };
12334     Address VDAddr =
12335         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12336     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12337         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12338         VDAddr, AA->getAllocator());
12339     if (UntiedRealAddr.isValid())
12340       if (auto *Region =
12341               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12342         Region->emitUntiedSwitch(CGF);
12343     return VDAddr;
12344   }
12345   return UntiedAddr;
12346 }
12347 
12348 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12349                                              const VarDecl *VD) const {
12350   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12351   if (It == FunctionToUntiedTaskStackMap.end())
12352     return false;
12353   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12354 }
12355 
12356 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12357     CodeGenModule &CGM, const OMPLoopDirective &S)
12358     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12359   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12360   if (!NeedToPush)
12361     return;
12362   NontemporalDeclsSet &DS =
12363       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12364   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12365     for (const Stmt *Ref : C->private_refs()) {
12366       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12367       const ValueDecl *VD;
12368       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12369         VD = DRE->getDecl();
12370       } else {
12371         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12372         assert((ME->isImplicitCXXThis() ||
12373                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12374                "Expected member of current class.");
12375         VD = ME->getMemberDecl();
12376       }
12377       DS.insert(VD);
12378     }
12379   }
12380 }
12381 
12382 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12383   if (!NeedToPush)
12384     return;
12385   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12386 }
12387 
12388 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12389     CodeGenFunction &CGF,
12390     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12391                           std::pair<Address, Address>> &LocalVars)
12392     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12393   if (!NeedToPush)
12394     return;
12395   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12396       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12397   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12398 }
12399 
12400 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12401   if (!NeedToPush)
12402     return;
12403   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12404 }
12405 
12406 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12407   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12408 
12409   return llvm::any_of(
12410       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12411       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12412 }
12413 
12414 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12415     const OMPExecutableDirective &S,
12416     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12417     const {
12418   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12419   // Vars in target/task regions must be excluded completely.
12420   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12421       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12422     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12423     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12424     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12425     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12426       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12427         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12428     }
12429   }
12430   // Exclude vars in private clauses.
12431   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12432     for (const Expr *Ref : C->varlists()) {
12433       if (!Ref->getType()->isScalarType())
12434         continue;
12435       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12436       if (!DRE)
12437         continue;
12438       NeedToCheckForLPCs.insert(DRE->getDecl());
12439     }
12440   }
12441   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12442     for (const Expr *Ref : C->varlists()) {
12443       if (!Ref->getType()->isScalarType())
12444         continue;
12445       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12446       if (!DRE)
12447         continue;
12448       NeedToCheckForLPCs.insert(DRE->getDecl());
12449     }
12450   }
12451   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12452     for (const Expr *Ref : C->varlists()) {
12453       if (!Ref->getType()->isScalarType())
12454         continue;
12455       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12456       if (!DRE)
12457         continue;
12458       NeedToCheckForLPCs.insert(DRE->getDecl());
12459     }
12460   }
12461   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12462     for (const Expr *Ref : C->varlists()) {
12463       if (!Ref->getType()->isScalarType())
12464         continue;
12465       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12466       if (!DRE)
12467         continue;
12468       NeedToCheckForLPCs.insert(DRE->getDecl());
12469     }
12470   }
12471   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12472     for (const Expr *Ref : C->varlists()) {
12473       if (!Ref->getType()->isScalarType())
12474         continue;
12475       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12476       if (!DRE)
12477         continue;
12478       NeedToCheckForLPCs.insert(DRE->getDecl());
12479     }
12480   }
12481   for (const Decl *VD : NeedToCheckForLPCs) {
12482     for (const LastprivateConditionalData &Data :
12483          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12484       if (Data.DeclToUniqueName.count(VD) > 0) {
12485         if (!Data.Disabled)
12486           NeedToAddForLPCsAsDisabled.insert(VD);
12487         break;
12488       }
12489     }
12490   }
12491 }
12492 
12493 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12494     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12495     : CGM(CGF.CGM),
12496       Action((CGM.getLangOpts().OpenMP >= 50 &&
12497               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12498                            [](const OMPLastprivateClause *C) {
12499                              return C->getKind() ==
12500                                     OMPC_LASTPRIVATE_conditional;
12501                            }))
12502                  ? ActionToDo::PushAsLastprivateConditional
12503                  : ActionToDo::DoNotPush) {
12504   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12505   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12506     return;
12507   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12508          "Expected a push action.");
12509   LastprivateConditionalData &Data =
12510       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12511   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12512     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12513       continue;
12514 
12515     for (const Expr *Ref : C->varlists()) {
12516       Data.DeclToUniqueName.insert(std::make_pair(
12517           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12518           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12519     }
12520   }
12521   Data.IVLVal = IVLVal;
12522   Data.Fn = CGF.CurFn;
12523 }
12524 
12525 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12526     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12527     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12528   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12529   if (CGM.getLangOpts().OpenMP < 50)
12530     return;
12531   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12532   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12533   if (!NeedToAddForLPCsAsDisabled.empty()) {
12534     Action = ActionToDo::DisableLastprivateConditional;
12535     LastprivateConditionalData &Data =
12536         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12537     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12538       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12539     Data.Fn = CGF.CurFn;
12540     Data.Disabled = true;
12541   }
12542 }
12543 
12544 CGOpenMPRuntime::LastprivateConditionalRAII
12545 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12546     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12547   return LastprivateConditionalRAII(CGF, S);
12548 }
12549 
12550 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12551   if (CGM.getLangOpts().OpenMP < 50)
12552     return;
12553   if (Action == ActionToDo::DisableLastprivateConditional) {
12554     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12555            "Expected list of disabled private vars.");
12556     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12557   }
12558   if (Action == ActionToDo::PushAsLastprivateConditional) {
12559     assert(
12560         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12561         "Expected list of lastprivate conditional vars.");
12562     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12563   }
12564 }
12565 
12566 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12567                                                         const VarDecl *VD) {
12568   ASTContext &C = CGM.getContext();
12569   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12570   if (I == LastprivateConditionalToTypes.end())
12571     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12572   QualType NewType;
12573   const FieldDecl *VDField;
12574   const FieldDecl *FiredField;
12575   LValue BaseLVal;
12576   auto VI = I->getSecond().find(VD);
12577   if (VI == I->getSecond().end()) {
12578     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12579     RD->startDefinition();
12580     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12581     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12582     RD->completeDefinition();
12583     NewType = C.getRecordType(RD);
12584     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12585     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12586     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12587   } else {
12588     NewType = std::get<0>(VI->getSecond());
12589     VDField = std::get<1>(VI->getSecond());
12590     FiredField = std::get<2>(VI->getSecond());
12591     BaseLVal = std::get<3>(VI->getSecond());
12592   }
12593   LValue FiredLVal =
12594       CGF.EmitLValueForField(BaseLVal, FiredField);
12595   CGF.EmitStoreOfScalar(
12596       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12597       FiredLVal);
12598   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12599 }
12600 
12601 namespace {
12602 /// Checks if the lastprivate conditional variable is referenced in LHS.
12603 class LastprivateConditionalRefChecker final
12604     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12605   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12606   const Expr *FoundE = nullptr;
12607   const Decl *FoundD = nullptr;
12608   StringRef UniqueDeclName;
12609   LValue IVLVal;
12610   llvm::Function *FoundFn = nullptr;
12611   SourceLocation Loc;
12612 
12613 public:
12614   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12615     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12616          llvm::reverse(LPM)) {
12617       auto It = D.DeclToUniqueName.find(E->getDecl());
12618       if (It == D.DeclToUniqueName.end())
12619         continue;
12620       if (D.Disabled)
12621         return false;
12622       FoundE = E;
12623       FoundD = E->getDecl()->getCanonicalDecl();
12624       UniqueDeclName = It->second;
12625       IVLVal = D.IVLVal;
12626       FoundFn = D.Fn;
12627       break;
12628     }
12629     return FoundE == E;
12630   }
12631   bool VisitMemberExpr(const MemberExpr *E) {
12632     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12633       return false;
12634     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12635          llvm::reverse(LPM)) {
12636       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12637       if (It == D.DeclToUniqueName.end())
12638         continue;
12639       if (D.Disabled)
12640         return false;
12641       FoundE = E;
12642       FoundD = E->getMemberDecl()->getCanonicalDecl();
12643       UniqueDeclName = It->second;
12644       IVLVal = D.IVLVal;
12645       FoundFn = D.Fn;
12646       break;
12647     }
12648     return FoundE == E;
12649   }
12650   bool VisitStmt(const Stmt *S) {
12651     for (const Stmt *Child : S->children()) {
12652       if (!Child)
12653         continue;
12654       if (const auto *E = dyn_cast<Expr>(Child))
12655         if (!E->isGLValue())
12656           continue;
12657       if (Visit(Child))
12658         return true;
12659     }
12660     return false;
12661   }
12662   explicit LastprivateConditionalRefChecker(
12663       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12664       : LPM(LPM) {}
12665   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12666   getFoundData() const {
12667     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12668   }
12669 };
12670 } // namespace
12671 
12672 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12673                                                        LValue IVLVal,
12674                                                        StringRef UniqueDeclName,
12675                                                        LValue LVal,
12676                                                        SourceLocation Loc) {
12677   // Last updated loop counter for the lastprivate conditional var.
12678   // int<xx> last_iv = 0;
12679   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12680   llvm::Constant *LastIV =
12681       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12682   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12683       IVLVal.getAlignment().getAsAlign());
12684   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12685 
12686   // Last value of the lastprivate conditional.
12687   // decltype(priv_a) last_a;
12688   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12689       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12690   Last->setAlignment(LVal.getAlignment().getAsAlign());
12691   LValue LastLVal = CGF.MakeAddrLValue(
12692       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12693 
12694   // Global loop counter. Required to handle inner parallel-for regions.
12695   // iv
12696   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12697 
12698   // #pragma omp critical(a)
12699   // if (last_iv <= iv) {
12700   //   last_iv = iv;
12701   //   last_a = priv_a;
12702   // }
12703   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12704                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12705     Action.Enter(CGF);
12706     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12707     // (last_iv <= iv) ? Check if the variable is updated and store new
12708     // value in global var.
12709     llvm::Value *CmpRes;
12710     if (IVLVal.getType()->isSignedIntegerType()) {
12711       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12712     } else {
12713       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12714              "Loop iteration variable must be integer.");
12715       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12716     }
12717     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12718     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12719     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12720     // {
12721     CGF.EmitBlock(ThenBB);
12722 
12723     //   last_iv = iv;
12724     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12725 
12726     //   last_a = priv_a;
12727     switch (CGF.getEvaluationKind(LVal.getType())) {
12728     case TEK_Scalar: {
12729       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12730       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12731       break;
12732     }
12733     case TEK_Complex: {
12734       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12735       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12736       break;
12737     }
12738     case TEK_Aggregate:
12739       llvm_unreachable(
12740           "Aggregates are not supported in lastprivate conditional.");
12741     }
12742     // }
12743     CGF.EmitBranch(ExitBB);
12744     // There is no need to emit line number for unconditional branch.
12745     (void)ApplyDebugLocation::CreateEmpty(CGF);
12746     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12747   };
12748 
12749   if (CGM.getLangOpts().OpenMPSimd) {
12750     // Do not emit as a critical region as no parallel region could be emitted.
12751     RegionCodeGenTy ThenRCG(CodeGen);
12752     ThenRCG(CGF);
12753   } else {
12754     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12755   }
12756 }
12757 
12758 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12759                                                          const Expr *LHS) {
12760   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12761     return;
12762   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12763   if (!Checker.Visit(LHS))
12764     return;
12765   const Expr *FoundE;
12766   const Decl *FoundD;
12767   StringRef UniqueDeclName;
12768   LValue IVLVal;
12769   llvm::Function *FoundFn;
12770   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12771       Checker.getFoundData();
12772   if (FoundFn != CGF.CurFn) {
12773     // Special codegen for inner parallel regions.
12774     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12775     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12776     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12777            "Lastprivate conditional is not found in outer region.");
12778     QualType StructTy = std::get<0>(It->getSecond());
12779     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12780     LValue PrivLVal = CGF.EmitLValue(FoundE);
12781     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12782         PrivLVal.getAddress(CGF),
12783         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12784     LValue BaseLVal =
12785         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12786     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12787     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12788                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12789                         FiredLVal, llvm::AtomicOrdering::Unordered,
12790                         /*IsVolatile=*/true, /*isInit=*/false);
12791     return;
12792   }
12793 
12794   // Private address of the lastprivate conditional in the current context.
12795   // priv_a
12796   LValue LVal = CGF.EmitLValue(FoundE);
12797   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12798                                    FoundE->getExprLoc());
12799 }
12800 
12801 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12802     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12803     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12804   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12805     return;
12806   auto Range = llvm::reverse(LastprivateConditionalStack);
12807   auto It = llvm::find_if(
12808       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12809   if (It == Range.end() || It->Fn != CGF.CurFn)
12810     return;
12811   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12812   assert(LPCI != LastprivateConditionalToTypes.end() &&
12813          "Lastprivates must be registered already.");
12814   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12815   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12816   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12817   for (const auto &Pair : It->DeclToUniqueName) {
12818     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12819     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12820       continue;
12821     auto I = LPCI->getSecond().find(Pair.first);
12822     assert(I != LPCI->getSecond().end() &&
12823            "Lastprivate must be rehistered already.");
12824     // bool Cmp = priv_a.Fired != 0;
12825     LValue BaseLVal = std::get<3>(I->getSecond());
12826     LValue FiredLVal =
12827         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12828     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12829     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12830     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12831     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12832     // if (Cmp) {
12833     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12834     CGF.EmitBlock(ThenBB);
12835     Address Addr = CGF.GetAddrOfLocalVar(VD);
12836     LValue LVal;
12837     if (VD->getType()->isReferenceType())
12838       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12839                                            AlignmentSource::Decl);
12840     else
12841       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12842                                 AlignmentSource::Decl);
12843     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12844                                      D.getBeginLoc());
12845     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12846     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12847     // }
12848   }
12849 }
12850 
12851 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12852     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12853     SourceLocation Loc) {
12854   if (CGF.getLangOpts().OpenMP < 50)
12855     return;
12856   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12857   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12858          "Unknown lastprivate conditional variable.");
12859   StringRef UniqueName = It->second;
12860   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12861   // The variable was not updated in the region - exit.
12862   if (!GV)
12863     return;
12864   LValue LPLVal = CGF.MakeAddrLValue(
12865       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12866       PrivLVal.getType().getNonReferenceType());
12867   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12868   CGF.EmitStoreOfScalar(Res, PrivLVal);
12869 }
12870 
12871 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12872     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12873     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12874   llvm_unreachable("Not supported in SIMD-only mode");
12875 }
12876 
12877 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12878     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12879     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12880   llvm_unreachable("Not supported in SIMD-only mode");
12881 }
12882 
12883 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12884     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12885     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12886     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12887     bool Tied, unsigned &NumberOfParts) {
12888   llvm_unreachable("Not supported in SIMD-only mode");
12889 }
12890 
12891 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12892                                            SourceLocation Loc,
12893                                            llvm::Function *OutlinedFn,
12894                                            ArrayRef<llvm::Value *> CapturedVars,
12895                                            const Expr *IfCond,
12896                                            llvm::Value *NumThreads) {
12897   llvm_unreachable("Not supported in SIMD-only mode");
12898 }
12899 
12900 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12901     CodeGenFunction &CGF, StringRef CriticalName,
12902     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12903     const Expr *Hint) {
12904   llvm_unreachable("Not supported in SIMD-only mode");
12905 }
12906 
12907 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12908                                            const RegionCodeGenTy &MasterOpGen,
12909                                            SourceLocation Loc) {
12910   llvm_unreachable("Not supported in SIMD-only mode");
12911 }
12912 
12913 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12914                                            const RegionCodeGenTy &MasterOpGen,
12915                                            SourceLocation Loc,
12916                                            const Expr *Filter) {
12917   llvm_unreachable("Not supported in SIMD-only mode");
12918 }
12919 
12920 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12921                                             SourceLocation Loc) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12926     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12927     SourceLocation Loc) {
12928   llvm_unreachable("Not supported in SIMD-only mode");
12929 }
12930 
12931 void CGOpenMPSIMDRuntime::emitSingleRegion(
12932     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12933     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12934     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12935     ArrayRef<const Expr *> AssignmentOps) {
12936   llvm_unreachable("Not supported in SIMD-only mode");
12937 }
12938 
12939 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12940                                             const RegionCodeGenTy &OrderedOpGen,
12941                                             SourceLocation Loc,
12942                                             bool IsThreads) {
12943   llvm_unreachable("Not supported in SIMD-only mode");
12944 }
12945 
12946 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12947                                           SourceLocation Loc,
12948                                           OpenMPDirectiveKind Kind,
12949                                           bool EmitChecks,
12950                                           bool ForceSimpleCall) {
12951   llvm_unreachable("Not supported in SIMD-only mode");
12952 }
12953 
12954 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12955     CodeGenFunction &CGF, SourceLocation Loc,
12956     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12957     bool Ordered, const DispatchRTInput &DispatchValues) {
12958   llvm_unreachable("Not supported in SIMD-only mode");
12959 }
12960 
12961 void CGOpenMPSIMDRuntime::emitForStaticInit(
12962     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12963     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12964   llvm_unreachable("Not supported in SIMD-only mode");
12965 }
12966 
12967 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12968     CodeGenFunction &CGF, SourceLocation Loc,
12969     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12970   llvm_unreachable("Not supported in SIMD-only mode");
12971 }
12972 
12973 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12974                                                      SourceLocation Loc,
12975                                                      unsigned IVSize,
12976                                                      bool IVSigned) {
12977   llvm_unreachable("Not supported in SIMD-only mode");
12978 }
12979 
12980 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12981                                               SourceLocation Loc,
12982                                               OpenMPDirectiveKind DKind) {
12983   llvm_unreachable("Not supported in SIMD-only mode");
12984 }
12985 
12986 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12987                                               SourceLocation Loc,
12988                                               unsigned IVSize, bool IVSigned,
12989                                               Address IL, Address LB,
12990                                               Address UB, Address ST) {
12991   llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993 
12994 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12995                                                llvm::Value *NumThreads,
12996                                                SourceLocation Loc) {
12997   llvm_unreachable("Not supported in SIMD-only mode");
12998 }
12999 
13000 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13001                                              ProcBindKind ProcBind,
13002                                              SourceLocation Loc) {
13003   llvm_unreachable("Not supported in SIMD-only mode");
13004 }
13005 
13006 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13007                                                     const VarDecl *VD,
13008                                                     Address VDAddr,
13009                                                     SourceLocation Loc) {
13010   llvm_unreachable("Not supported in SIMD-only mode");
13011 }
13012 
13013 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13014     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13015     CodeGenFunction *CGF) {
13016   llvm_unreachable("Not supported in SIMD-only mode");
13017 }
13018 
13019 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13020     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13021   llvm_unreachable("Not supported in SIMD-only mode");
13022 }
13023 
13024 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13025                                     ArrayRef<const Expr *> Vars,
13026                                     SourceLocation Loc,
13027                                     llvm::AtomicOrdering AO) {
13028   llvm_unreachable("Not supported in SIMD-only mode");
13029 }
13030 
13031 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13032                                        const OMPExecutableDirective &D,
13033                                        llvm::Function *TaskFunction,
13034                                        QualType SharedsTy, Address Shareds,
13035                                        const Expr *IfCond,
13036                                        const OMPTaskDataTy &Data) {
13037   llvm_unreachable("Not supported in SIMD-only mode");
13038 }
13039 
13040 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13041     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13042     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13043     const Expr *IfCond, const OMPTaskDataTy &Data) {
13044   llvm_unreachable("Not supported in SIMD-only mode");
13045 }
13046 
13047 void CGOpenMPSIMDRuntime::emitReduction(
13048     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13049     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13050     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13051   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13052   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13053                                  ReductionOps, Options);
13054 }
13055 
13056 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13057     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13058     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13059   llvm_unreachable("Not supported in SIMD-only mode");
13060 }
13061 
13062 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13063                                                 SourceLocation Loc,
13064                                                 bool IsWorksharingReduction) {
13065   llvm_unreachable("Not supported in SIMD-only mode");
13066 }
13067 
13068 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13069                                                   SourceLocation Loc,
13070                                                   ReductionCodeGen &RCG,
13071                                                   unsigned N) {
13072   llvm_unreachable("Not supported in SIMD-only mode");
13073 }
13074 
13075 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13076                                                   SourceLocation Loc,
13077                                                   llvm::Value *ReductionsPtr,
13078                                                   LValue SharedLVal) {
13079   llvm_unreachable("Not supported in SIMD-only mode");
13080 }
13081 
13082 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13083                                            SourceLocation Loc,
13084                                            const OMPTaskDataTy &Data) {
13085   llvm_unreachable("Not supported in SIMD-only mode");
13086 }
13087 
13088 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13089     CodeGenFunction &CGF, SourceLocation Loc,
13090     OpenMPDirectiveKind CancelRegion) {
13091   llvm_unreachable("Not supported in SIMD-only mode");
13092 }
13093 
13094 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13095                                          SourceLocation Loc, const Expr *IfCond,
13096                                          OpenMPDirectiveKind CancelRegion) {
13097   llvm_unreachable("Not supported in SIMD-only mode");
13098 }
13099 
13100 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13101     const OMPExecutableDirective &D, StringRef ParentName,
13102     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13103     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13104   llvm_unreachable("Not supported in SIMD-only mode");
13105 }
13106 
13107 void CGOpenMPSIMDRuntime::emitTargetCall(
13108     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13109     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13110     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13111     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13112                                      const OMPLoopDirective &D)>
13113         SizeEmitter) {
13114   llvm_unreachable("Not supported in SIMD-only mode");
13115 }
13116 
13117 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13118   llvm_unreachable("Not supported in SIMD-only mode");
13119 }
13120 
13121 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13122   llvm_unreachable("Not supported in SIMD-only mode");
13123 }
13124 
13125 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13126   return false;
13127 }
13128 
13129 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13130                                         const OMPExecutableDirective &D,
13131                                         SourceLocation Loc,
13132                                         llvm::Function *OutlinedFn,
13133                                         ArrayRef<llvm::Value *> CapturedVars) {
13134   llvm_unreachable("Not supported in SIMD-only mode");
13135 }
13136 
13137 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13138                                              const Expr *NumTeams,
13139                                              const Expr *ThreadLimit,
13140                                              SourceLocation Loc) {
13141   llvm_unreachable("Not supported in SIMD-only mode");
13142 }
13143 
13144 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13145     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13146     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13147   llvm_unreachable("Not supported in SIMD-only mode");
13148 }
13149 
13150 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13151     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13152     const Expr *Device) {
13153   llvm_unreachable("Not supported in SIMD-only mode");
13154 }
13155 
13156 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13157                                            const OMPLoopDirective &D,
13158                                            ArrayRef<Expr *> NumIterations) {
13159   llvm_unreachable("Not supported in SIMD-only mode");
13160 }
13161 
13162 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13163                                               const OMPDependClause *C) {
13164   llvm_unreachable("Not supported in SIMD-only mode");
13165 }
13166 
13167 const VarDecl *
13168 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13169                                         const VarDecl *NativeParam) const {
13170   llvm_unreachable("Not supported in SIMD-only mode");
13171 }
13172 
13173 Address
13174 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13175                                          const VarDecl *NativeParam,
13176                                          const VarDecl *TargetParam) const {
13177   llvm_unreachable("Not supported in SIMD-only mode");
13178 }
13179